| 2561 3511 3501 8037 2982 246 314 10 4 7 4 8055 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 | // SPDX-License-Identifier: GPL-2.0-only /* * Netlink message type permission tables, for user generated messages. * * Author: James Morris <jmorris@redhat.com> * * Copyright (C) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> */ #include <linux/types.h> #include <linux/kernel.h> #include <linux/netlink.h> #include <linux/rtnetlink.h> #include <linux/if.h> #include <linux/inet_diag.h> #include <linux/xfrm.h> #include <linux/audit.h> #include <linux/sock_diag.h> #include "flask.h" #include "av_permissions.h" #include "security.h" struct nlmsg_perm { u16 nlmsg_type; u32 perm; }; static const struct nlmsg_perm nlmsg_route_perms[] = { { RTM_NEWLINK, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELLINK, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETLINK, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_SETLINK, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_NEWADDR, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELADDR, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETADDR, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWROUTE, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELROUTE, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETROUTE, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWNEIGH, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELNEIGH, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETNEIGH, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWRULE, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELRULE, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETRULE, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWQDISC, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELQDISC, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETQDISC, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWTCLASS, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELTCLASS, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETTCLASS, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWTFILTER, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELTFILTER, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETTFILTER, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWACTION, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELACTION, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETACTION, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWPREFIX, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETMULTICAST, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_GETANYCAST, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_GETNEIGHTBL, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_SETNEIGHTBL, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_NEWADDRLABEL, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELADDRLABEL, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETADDRLABEL, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_GETDCB, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_SETDCB, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_NEWNETCONF, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELNETCONF, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETNETCONF, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWMDB, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELMDB, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETMDB, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWNSID, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELNSID, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_GETNSID, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWSTATS, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_GETSTATS, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_SETSTATS, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_NEWCACHEREPORT, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWCHAIN, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELCHAIN, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETCHAIN, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWNEXTHOP, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELNEXTHOP, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETNEXTHOP, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWLINKPROP, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELLINKPROP, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_NEWVLAN, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELVLAN, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETVLAN, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWNEXTHOPBUCKET, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELNEXTHOPBUCKET, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETNEXTHOPBUCKET, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_NEWTUNNEL, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELTUNNEL, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_GETTUNNEL, NETLINK_ROUTE_SOCKET__NLMSG_READ }, }; static const struct nlmsg_perm nlmsg_tcpdiag_perms[] = { { TCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, { DCCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, { SOCK_DIAG_BY_FAMILY, NETLINK_TCPDIAG_SOCKET__NLMSG_READ }, { SOCK_DESTROY, NETLINK_TCPDIAG_SOCKET__NLMSG_WRITE }, }; static const struct nlmsg_perm nlmsg_xfrm_perms[] = { { XFRM_MSG_NEWSA, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_DELSA, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_GETSA, NETLINK_XFRM_SOCKET__NLMSG_READ }, { XFRM_MSG_NEWPOLICY, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_DELPOLICY, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_GETPOLICY, NETLINK_XFRM_SOCKET__NLMSG_READ }, { XFRM_MSG_ALLOCSPI, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_ACQUIRE, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_EXPIRE, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_UPDPOLICY, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_UPDSA, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_POLEXPIRE, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_FLUSHSA, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_FLUSHPOLICY, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_NEWAE, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_GETAE, NETLINK_XFRM_SOCKET__NLMSG_READ }, { XFRM_MSG_REPORT, NETLINK_XFRM_SOCKET__NLMSG_READ }, { XFRM_MSG_MIGRATE, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_NEWSADINFO, NETLINK_XFRM_SOCKET__NLMSG_READ }, { XFRM_MSG_GETSADINFO, NETLINK_XFRM_SOCKET__NLMSG_READ }, { XFRM_MSG_NEWSPDINFO, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_GETSPDINFO, NETLINK_XFRM_SOCKET__NLMSG_READ }, { XFRM_MSG_MAPPING, NETLINK_XFRM_SOCKET__NLMSG_READ }, { XFRM_MSG_SETDEFAULT, NETLINK_XFRM_SOCKET__NLMSG_WRITE }, { XFRM_MSG_GETDEFAULT, NETLINK_XFRM_SOCKET__NLMSG_READ }, }; static const struct nlmsg_perm nlmsg_audit_perms[] = { { AUDIT_GET, NETLINK_AUDIT_SOCKET__NLMSG_READ }, { AUDIT_SET, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, { AUDIT_LIST, NETLINK_AUDIT_SOCKET__NLMSG_READPRIV }, { AUDIT_ADD, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, { AUDIT_DEL, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, { AUDIT_LIST_RULES, NETLINK_AUDIT_SOCKET__NLMSG_READPRIV }, { AUDIT_ADD_RULE, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, { AUDIT_DEL_RULE, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, { AUDIT_USER, NETLINK_AUDIT_SOCKET__NLMSG_RELAY }, { AUDIT_SIGNAL_INFO, NETLINK_AUDIT_SOCKET__NLMSG_READ }, { AUDIT_TRIM, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, { AUDIT_MAKE_EQUIV, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, { AUDIT_TTY_GET, NETLINK_AUDIT_SOCKET__NLMSG_READ }, { AUDIT_TTY_SET, NETLINK_AUDIT_SOCKET__NLMSG_TTY_AUDIT }, { AUDIT_GET_FEATURE, NETLINK_AUDIT_SOCKET__NLMSG_READ }, { AUDIT_SET_FEATURE, NETLINK_AUDIT_SOCKET__NLMSG_WRITE }, }; static int nlmsg_perm(u16 nlmsg_type, u32 *perm, const struct nlmsg_perm *tab, size_t tabsize) { unsigned int i; int err = -EINVAL; for (i = 0; i < tabsize/sizeof(struct nlmsg_perm); i++) if (nlmsg_type == tab[i].nlmsg_type) { *perm = tab[i].perm; err = 0; break; } return err; } int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm) { int err = 0; switch (sclass) { case SECCLASS_NETLINK_ROUTE_SOCKET: /* RTM_MAX always points to RTM_SETxxxx, ie RTM_NEWxxx + 3. * If the BUILD_BUG_ON() below fails you must update the * structures at the top of this file with the new mappings * before updating the BUILD_BUG_ON() macro! */ BUILD_BUG_ON(RTM_MAX != (RTM_NEWTUNNEL + 3)); err = nlmsg_perm(nlmsg_type, perm, nlmsg_route_perms, sizeof(nlmsg_route_perms)); break; case SECCLASS_NETLINK_TCPDIAG_SOCKET: err = nlmsg_perm(nlmsg_type, perm, nlmsg_tcpdiag_perms, sizeof(nlmsg_tcpdiag_perms)); break; case SECCLASS_NETLINK_XFRM_SOCKET: /* If the BUILD_BUG_ON() below fails you must update the * structures at the top of this file with the new mappings * before updating the BUILD_BUG_ON() macro! */ BUILD_BUG_ON(XFRM_MSG_MAX != XFRM_MSG_GETDEFAULT); err = nlmsg_perm(nlmsg_type, perm, nlmsg_xfrm_perms, sizeof(nlmsg_xfrm_perms)); break; case SECCLASS_NETLINK_AUDIT_SOCKET: if ((nlmsg_type >= AUDIT_FIRST_USER_MSG && nlmsg_type <= AUDIT_LAST_USER_MSG) || (nlmsg_type >= AUDIT_FIRST_USER_MSG2 && nlmsg_type <= AUDIT_LAST_USER_MSG2)) { *perm = NETLINK_AUDIT_SOCKET__NLMSG_RELAY; } else { err = nlmsg_perm(nlmsg_type, perm, nlmsg_audit_perms, sizeof(nlmsg_audit_perms)); } break; /* No messaging from userspace, or class unknown/unhandled */ default: err = -ENOENT; break; } return err; } |
| 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, 2009 open80211s Ltd. * Copyright (C) 2023 Intel Corporation * Author: Luis Carlos Cobo <luisca@cozybit.com> */ #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/random.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/string.h> #include <net/mac80211.h> #include "wme.h" #include "ieee80211_i.h" #include "mesh.h" #include <linux/rhashtable.h> static void mesh_path_free_rcu(struct mesh_table *tbl, struct mesh_path *mpath); static u32 mesh_table_hash(const void *addr, u32 len, u32 seed) { /* Use last four bytes of hw addr as hash index */ return jhash_1word(__get_unaligned_cpu32((u8 *)addr + 2), seed); } static const struct rhashtable_params mesh_rht_params = { .nelem_hint = 2, .automatic_shrinking = true, .key_len = ETH_ALEN, .key_offset = offsetof(struct mesh_path, dst), .head_offset = offsetof(struct mesh_path, rhash), .hashfn = mesh_table_hash, }; static const struct rhashtable_params fast_tx_rht_params = { .nelem_hint = 10, .automatic_shrinking = true, .key_len = sizeof_field(struct ieee80211_mesh_fast_tx, key), .key_offset = offsetof(struct ieee80211_mesh_fast_tx, key), .head_offset = offsetof(struct ieee80211_mesh_fast_tx, rhash), .hashfn = mesh_table_hash, }; static void __mesh_fast_tx_entry_free(void *ptr, void *tblptr) { struct ieee80211_mesh_fast_tx *entry = ptr; kfree_rcu(entry, fast_tx.rcu_head); } static void mesh_fast_tx_deinit(struct ieee80211_sub_if_data *sdata) { struct mesh_tx_cache *cache; cache = &sdata->u.mesh.tx_cache; rhashtable_free_and_destroy(&cache->rht, __mesh_fast_tx_entry_free, NULL); } static void mesh_fast_tx_init(struct ieee80211_sub_if_data *sdata) { struct mesh_tx_cache *cache; cache = &sdata->u.mesh.tx_cache; rhashtable_init(&cache->rht, &fast_tx_rht_params); INIT_HLIST_HEAD(&cache->walk_head); spin_lock_init(&cache->walk_lock); } static inline bool mpath_expired(struct mesh_path *mpath) { return (mpath->flags & MESH_PATH_ACTIVE) && time_after(jiffies, mpath->exp_time) && !(mpath->flags & MESH_PATH_FIXED); } static void mesh_path_rht_free(void *ptr, void *tblptr) { struct mesh_path *mpath = ptr; struct mesh_table *tbl = tblptr; mesh_path_free_rcu(tbl, mpath); } static void mesh_table_init(struct mesh_table *tbl) { INIT_HLIST_HEAD(&tbl->known_gates); INIT_HLIST_HEAD(&tbl->walk_head); atomic_set(&tbl->entries, 0); spin_lock_init(&tbl->gates_lock); spin_lock_init(&tbl->walk_lock); /* rhashtable_init() may fail only in case of wrong * mesh_rht_params */ WARN_ON(rhashtable_init(&tbl->rhead, &mesh_rht_params)); } static void mesh_table_free(struct mesh_table *tbl) { rhashtable_free_and_destroy(&tbl->rhead, mesh_path_rht_free, tbl); } /** * mesh_path_assign_nexthop - update mesh path next hop * * @mpath: mesh path to update * @sta: next hop to assign * * Locking: mpath->state_lock must be held when calling this function */ void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta) { struct sk_buff *skb; struct ieee80211_hdr *hdr; unsigned long flags; rcu_assign_pointer(mpath->next_hop, sta); spin_lock_irqsave(&mpath->frame_queue.lock, flags); skb_queue_walk(&mpath->frame_queue, skb) { hdr = (struct ieee80211_hdr *) skb->data; memcpy(hdr->addr1, sta->sta.addr, ETH_ALEN); memcpy(hdr->addr2, mpath->sdata->vif.addr, ETH_ALEN); ieee80211_mps_set_frame_flags(sta->sdata, sta, hdr); } spin_unlock_irqrestore(&mpath->frame_queue.lock, flags); } static void prepare_for_gate(struct sk_buff *skb, char *dst_addr, struct mesh_path *gate_mpath) { struct ieee80211_hdr *hdr; struct ieee80211s_hdr *mshdr; int mesh_hdrlen, hdrlen; char *next_hop; hdr = (struct ieee80211_hdr *) skb->data; hdrlen = ieee80211_hdrlen(hdr->frame_control); mshdr = (struct ieee80211s_hdr *) (skb->data + hdrlen); if (!(mshdr->flags & MESH_FLAGS_AE)) { /* size of the fixed part of the mesh header */ mesh_hdrlen = 6; /* make room for the two extended addresses */ skb_push(skb, 2 * ETH_ALEN); memmove(skb->data, hdr, hdrlen + mesh_hdrlen); hdr = (struct ieee80211_hdr *) skb->data; /* we preserve the previous mesh header and only add * the new addresses */ mshdr = (struct ieee80211s_hdr *) (skb->data + hdrlen); mshdr->flags = MESH_FLAGS_AE_A5_A6; memcpy(mshdr->eaddr1, hdr->addr3, ETH_ALEN); memcpy(mshdr->eaddr2, hdr->addr4, ETH_ALEN); } /* update next hop */ hdr = (struct ieee80211_hdr *) skb->data; rcu_read_lock(); next_hop = rcu_dereference(gate_mpath->next_hop)->sta.addr; memcpy(hdr->addr1, next_hop, ETH_ALEN); rcu_read_unlock(); memcpy(hdr->addr2, gate_mpath->sdata->vif.addr, ETH_ALEN); memcpy(hdr->addr3, dst_addr, ETH_ALEN); } /** * mesh_path_move_to_queue - Move or copy frames from one mpath queue to another * * @gate_mpath: An active mpath the frames will be sent to (i.e. the gate) * @from_mpath: The failed mpath * @copy: When true, copy all the frames to the new mpath queue. When false, * move them. * * This function is used to transfer or copy frames from an unresolved mpath to * a gate mpath. The function also adds the Address Extension field and * updates the next hop. * * If a frame already has an Address Extension field, only the next hop and * destination addresses are updated. * * The gate mpath must be an active mpath with a valid mpath->next_hop. */ static void mesh_path_move_to_queue(struct mesh_path *gate_mpath, struct mesh_path *from_mpath, bool copy) { struct sk_buff *skb, *fskb, *tmp; struct sk_buff_head failq; unsigned long flags; if (WARN_ON(gate_mpath == from_mpath)) return; if (WARN_ON(!gate_mpath->next_hop)) return; __skb_queue_head_init(&failq); spin_lock_irqsave(&from_mpath->frame_queue.lock, flags); skb_queue_splice_init(&from_mpath->frame_queue, &failq); spin_unlock_irqrestore(&from_mpath->frame_queue.lock, flags); skb_queue_walk_safe(&failq, fskb, tmp) { if (skb_queue_len(&gate_mpath->frame_queue) >= MESH_FRAME_QUEUE_LEN) { mpath_dbg(gate_mpath->sdata, "mpath queue full!\n"); break; } skb = skb_copy(fskb, GFP_ATOMIC); if (WARN_ON(!skb)) break; prepare_for_gate(skb, gate_mpath->dst, gate_mpath); skb_queue_tail(&gate_mpath->frame_queue, skb); if (copy) continue; __skb_unlink(fskb, &failq); kfree_skb(fskb); } mpath_dbg(gate_mpath->sdata, "Mpath queue for gate %pM has %d frames\n", gate_mpath->dst, skb_queue_len(&gate_mpath->frame_queue)); if (!copy) return; spin_lock_irqsave(&from_mpath->frame_queue.lock, flags); skb_queue_splice(&failq, &from_mpath->frame_queue); spin_unlock_irqrestore(&from_mpath->frame_queue.lock, flags); } static struct mesh_path *mpath_lookup(struct mesh_table *tbl, const u8 *dst, struct ieee80211_sub_if_data *sdata) { struct mesh_path *mpath; mpath = rhashtable_lookup(&tbl->rhead, dst, mesh_rht_params); if (mpath && mpath_expired(mpath)) { spin_lock_bh(&mpath->state_lock); mpath->flags &= ~MESH_PATH_ACTIVE; spin_unlock_bh(&mpath->state_lock); } return mpath; } /** * mesh_path_lookup - look up a path in the mesh path table * @sdata: local subif * @dst: hardware address (ETH_ALEN length) of destination * * Returns: pointer to the mesh path structure, or NULL if not found * * Locking: must be called within a read rcu section. */ struct mesh_path * mesh_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst) { return mpath_lookup(&sdata->u.mesh.mesh_paths, dst, sdata); } struct mesh_path * mpp_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst) { return mpath_lookup(&sdata->u.mesh.mpp_paths, dst, sdata); } static struct mesh_path * __mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx) { int i = 0; struct mesh_path *mpath; hlist_for_each_entry_rcu(mpath, &tbl->walk_head, walk_list) { if (i++ == idx) break; } if (!mpath) return NULL; if (mpath_expired(mpath)) { spin_lock_bh(&mpath->state_lock); mpath->flags &= ~MESH_PATH_ACTIVE; spin_unlock_bh(&mpath->state_lock); } return mpath; } /** * mesh_path_lookup_by_idx - look up a path in the mesh path table by its index * @idx: index * @sdata: local subif, or NULL for all entries * * Returns: pointer to the mesh path structure, or NULL if not found. * * Locking: must be called within a read rcu section. */ struct mesh_path * mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) { return __mesh_path_lookup_by_idx(&sdata->u.mesh.mesh_paths, idx); } /** * mpp_path_lookup_by_idx - look up a path in the proxy path table by its index * @idx: index * @sdata: local subif, or NULL for all entries * * Returns: pointer to the proxy path structure, or NULL if not found. * * Locking: must be called within a read rcu section. */ struct mesh_path * mpp_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx) { return __mesh_path_lookup_by_idx(&sdata->u.mesh.mpp_paths, idx); } /** * mesh_path_add_gate - add the given mpath to a mesh gate to our path table * @mpath: gate path to add to table * * Returns: 0 on success, -EEXIST */ int mesh_path_add_gate(struct mesh_path *mpath) { struct mesh_table *tbl; int err; rcu_read_lock(); tbl = &mpath->sdata->u.mesh.mesh_paths; spin_lock_bh(&mpath->state_lock); if (mpath->is_gate) { err = -EEXIST; spin_unlock_bh(&mpath->state_lock); goto err_rcu; } mpath->is_gate = true; mpath->sdata->u.mesh.num_gates++; spin_lock(&tbl->gates_lock); hlist_add_head_rcu(&mpath->gate_list, &tbl->known_gates); spin_unlock(&tbl->gates_lock); spin_unlock_bh(&mpath->state_lock); mpath_dbg(mpath->sdata, "Mesh path: Recorded new gate: %pM. %d known gates\n", mpath->dst, mpath->sdata->u.mesh.num_gates); err = 0; err_rcu: rcu_read_unlock(); return err; } /** * mesh_gate_del - remove a mesh gate from the list of known gates * @tbl: table which holds our list of known gates * @mpath: gate mpath */ static void mesh_gate_del(struct mesh_table *tbl, struct mesh_path *mpath) { lockdep_assert_held(&mpath->state_lock); if (!mpath->is_gate) return; mpath->is_gate = false; spin_lock_bh(&tbl->gates_lock); hlist_del_rcu(&mpath->gate_list); mpath->sdata->u.mesh.num_gates--; spin_unlock_bh(&tbl->gates_lock); mpath_dbg(mpath->sdata, "Mesh path: Deleted gate: %pM. %d known gates\n", mpath->dst, mpath->sdata->u.mesh.num_gates); } /** * mesh_gate_num - number of gates known to this interface * @sdata: subif data * * Returns: The number of gates */ int mesh_gate_num(struct ieee80211_sub_if_data *sdata) { return sdata->u.mesh.num_gates; } static struct mesh_path *mesh_path_new(struct ieee80211_sub_if_data *sdata, const u8 *dst, gfp_t gfp_flags) { struct mesh_path *new_mpath; new_mpath = kzalloc(sizeof(struct mesh_path), gfp_flags); if (!new_mpath) return NULL; memcpy(new_mpath->dst, dst, ETH_ALEN); eth_broadcast_addr(new_mpath->rann_snd_addr); new_mpath->is_root = false; new_mpath->sdata = sdata; new_mpath->flags = 0; skb_queue_head_init(&new_mpath->frame_queue); new_mpath->exp_time = jiffies; spin_lock_init(&new_mpath->state_lock); timer_setup(&new_mpath->timer, mesh_path_timer, 0); return new_mpath; } static void mesh_fast_tx_entry_free(struct mesh_tx_cache *cache, struct ieee80211_mesh_fast_tx *entry) { hlist_del_rcu(&entry->walk_list); rhashtable_remove_fast(&cache->rht, &entry->rhash, fast_tx_rht_params); kfree_rcu(entry, fast_tx.rcu_head); } struct ieee80211_mesh_fast_tx * mesh_fast_tx_get(struct ieee80211_sub_if_data *sdata, struct ieee80211_mesh_fast_tx_key *key) { struct ieee80211_mesh_fast_tx *entry; struct mesh_tx_cache *cache; cache = &sdata->u.mesh.tx_cache; entry = rhashtable_lookup(&cache->rht, key, fast_tx_rht_params); if (!entry) return NULL; if (!(entry->mpath->flags & MESH_PATH_ACTIVE) || mpath_expired(entry->mpath)) { spin_lock_bh(&cache->walk_lock); entry = rhashtable_lookup(&cache->rht, key, fast_tx_rht_params); if (entry) mesh_fast_tx_entry_free(cache, entry); spin_unlock_bh(&cache->walk_lock); return NULL; } mesh_path_refresh(sdata, entry->mpath, NULL); if (entry->mppath) entry->mppath->exp_time = jiffies; entry->timestamp = jiffies; return entry; } void mesh_fast_tx_cache(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, struct mesh_path *mpath) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_mesh_fast_tx *entry, *prev; struct ieee80211_mesh_fast_tx build = {}; struct ieee80211s_hdr *meshhdr; struct mesh_tx_cache *cache; struct ieee80211_key *key; struct mesh_path *mppath; struct sta_info *sta; u8 *qc; if (sdata->noack_map || !ieee80211_is_data_qos(hdr->frame_control)) return; build.fast_tx.hdr_len = ieee80211_hdrlen(hdr->frame_control); meshhdr = (struct ieee80211s_hdr *)(skb->data + build.fast_tx.hdr_len); build.hdrlen = ieee80211_get_mesh_hdrlen(meshhdr); cache = &sdata->u.mesh.tx_cache; if (atomic_read(&cache->rht.nelems) >= MESH_FAST_TX_CACHE_MAX_SIZE) return; sta = rcu_dereference(mpath->next_hop); if (!sta) return; build.key.type = MESH_FAST_TX_TYPE_LOCAL; if ((meshhdr->flags & MESH_FLAGS_AE) == MESH_FLAGS_AE_A5_A6) { /* This is required to keep the mppath alive */ mppath = mpp_path_lookup(sdata, meshhdr->eaddr1); if (!mppath) return; build.mppath = mppath; if (!ether_addr_equal(meshhdr->eaddr2, sdata->vif.addr)) build.key.type = MESH_FAST_TX_TYPE_PROXIED; } else if (ieee80211_has_a4(hdr->frame_control)) { mppath = mpath; } else { return; } if (!ether_addr_equal(hdr->addr4, sdata->vif.addr)) build.key.type = MESH_FAST_TX_TYPE_FORWARDED; /* rate limit, in case fast xmit can't be enabled */ if (mppath->fast_tx_check == jiffies) return; mppath->fast_tx_check = jiffies; /* * Same use of the sta lock as in ieee80211_check_fast_xmit, in order * to protect against concurrent sta key updates. */ spin_lock_bh(&sta->lock); key = rcu_access_pointer(sta->ptk[sta->ptk_idx]); if (!key) key = rcu_access_pointer(sdata->default_unicast_key); build.fast_tx.key = key; if (key) { bool gen_iv, iv_spc; gen_iv = key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV; iv_spc = key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE; if (!(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) || (key->flags & KEY_FLAG_TAINTED)) goto unlock_sta; switch (key->conf.cipher) { case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: if (gen_iv) build.fast_tx.pn_offs = build.fast_tx.hdr_len; if (gen_iv || iv_spc) build.fast_tx.hdr_len += IEEE80211_CCMP_HDR_LEN; break; case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: if (gen_iv) build.fast_tx.pn_offs = build.fast_tx.hdr_len; if (gen_iv || iv_spc) build.fast_tx.hdr_len += IEEE80211_GCMP_HDR_LEN; break; default: goto unlock_sta; } } memcpy(build.key.addr, mppath->dst, ETH_ALEN); build.timestamp = jiffies; build.fast_tx.band = info->band; build.fast_tx.da_offs = offsetof(struct ieee80211_hdr, addr3); build.fast_tx.sa_offs = offsetof(struct ieee80211_hdr, addr4); build.mpath = mpath; memcpy(build.hdr, meshhdr, build.hdrlen); memcpy(build.hdr + build.hdrlen, rfc1042_header, sizeof(rfc1042_header)); build.hdrlen += sizeof(rfc1042_header); memcpy(build.fast_tx.hdr, hdr, build.fast_tx.hdr_len); hdr = (struct ieee80211_hdr *)build.fast_tx.hdr; if (build.fast_tx.key) hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); qc = ieee80211_get_qos_ctl(hdr); qc[1] |= IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT >> 8; entry = kmemdup(&build, sizeof(build), GFP_ATOMIC); if (!entry) goto unlock_sta; spin_lock(&cache->walk_lock); prev = rhashtable_lookup_get_insert_fast(&cache->rht, &entry->rhash, fast_tx_rht_params); if (unlikely(IS_ERR(prev))) { kfree(entry); goto unlock_cache; } /* * replace any previous entry in the hash table, in case we're * replacing it with a different type (e.g. mpath -> mpp) */ if (unlikely(prev)) { rhashtable_replace_fast(&cache->rht, &prev->rhash, &entry->rhash, fast_tx_rht_params); hlist_del_rcu(&prev->walk_list); kfree_rcu(prev, fast_tx.rcu_head); } hlist_add_head(&entry->walk_list, &cache->walk_head); unlock_cache: spin_unlock(&cache->walk_lock); unlock_sta: spin_unlock_bh(&sta->lock); } void mesh_fast_tx_gc(struct ieee80211_sub_if_data *sdata) { unsigned long timeout = msecs_to_jiffies(MESH_FAST_TX_CACHE_TIMEOUT); struct mesh_tx_cache *cache = &sdata->u.mesh.tx_cache; struct ieee80211_mesh_fast_tx *entry; struct hlist_node *n; if (atomic_read(&cache->rht.nelems) < MESH_FAST_TX_CACHE_THRESHOLD_SIZE) return; spin_lock_bh(&cache->walk_lock); hlist_for_each_entry_safe(entry, n, &cache->walk_head, walk_list) if (!time_is_after_jiffies(entry->timestamp + timeout)) mesh_fast_tx_entry_free(cache, entry); spin_unlock_bh(&cache->walk_lock); } void mesh_fast_tx_flush_mpath(struct mesh_path *mpath) { struct ieee80211_sub_if_data *sdata = mpath->sdata; struct mesh_tx_cache *cache = &sdata->u.mesh.tx_cache; struct ieee80211_mesh_fast_tx *entry; struct hlist_node *n; spin_lock_bh(&cache->walk_lock); hlist_for_each_entry_safe(entry, n, &cache->walk_head, walk_list) if (entry->mpath == mpath) mesh_fast_tx_entry_free(cache, entry); spin_unlock_bh(&cache->walk_lock); } void mesh_fast_tx_flush_sta(struct ieee80211_sub_if_data *sdata, struct sta_info *sta) { struct mesh_tx_cache *cache = &sdata->u.mesh.tx_cache; struct ieee80211_mesh_fast_tx *entry; struct hlist_node *n; spin_lock_bh(&cache->walk_lock); hlist_for_each_entry_safe(entry, n, &cache->walk_head, walk_list) if (rcu_access_pointer(entry->mpath->next_hop) == sta) mesh_fast_tx_entry_free(cache, entry); spin_unlock_bh(&cache->walk_lock); } void mesh_fast_tx_flush_addr(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct mesh_tx_cache *cache = &sdata->u.mesh.tx_cache; struct ieee80211_mesh_fast_tx_key key = {}; struct ieee80211_mesh_fast_tx *entry; int i; ether_addr_copy(key.addr, addr); spin_lock_bh(&cache->walk_lock); for (i = 0; i < NUM_MESH_FAST_TX_TYPE; i++) { key.type = i; entry = rhashtable_lookup_fast(&cache->rht, &key, fast_tx_rht_params); if (entry) mesh_fast_tx_entry_free(cache, entry); } spin_unlock_bh(&cache->walk_lock); } /** * mesh_path_add - allocate and add a new path to the mesh path table * @dst: destination address of the path (ETH_ALEN length) * @sdata: local subif * * Returns: 0 on success * * State: the initial state of the new path is set to 0 */ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst) { struct mesh_table *tbl; struct mesh_path *mpath, *new_mpath; if (ether_addr_equal(dst, sdata->vif.addr)) /* never add ourselves as neighbours */ return ERR_PTR(-EOPNOTSUPP); if (is_multicast_ether_addr(dst)) return ERR_PTR(-EOPNOTSUPP); if (atomic_add_unless(&sdata->u.mesh.mpaths, 1, MESH_MAX_MPATHS) == 0) return ERR_PTR(-ENOSPC); new_mpath = mesh_path_new(sdata, dst, GFP_ATOMIC); if (!new_mpath) return ERR_PTR(-ENOMEM); tbl = &sdata->u.mesh.mesh_paths; spin_lock_bh(&tbl->walk_lock); mpath = rhashtable_lookup_get_insert_fast(&tbl->rhead, &new_mpath->rhash, mesh_rht_params); if (!mpath) hlist_add_head(&new_mpath->walk_list, &tbl->walk_head); spin_unlock_bh(&tbl->walk_lock); if (mpath) { kfree(new_mpath); if (IS_ERR(mpath)) return mpath; new_mpath = mpath; } sdata->u.mesh.mesh_paths_generation++; return new_mpath; } int mpp_path_add(struct ieee80211_sub_if_data *sdata, const u8 *dst, const u8 *mpp) { struct mesh_table *tbl; struct mesh_path *new_mpath; int ret; if (ether_addr_equal(dst, sdata->vif.addr)) /* never add ourselves as neighbours */ return -EOPNOTSUPP; if (is_multicast_ether_addr(dst)) return -EOPNOTSUPP; new_mpath = mesh_path_new(sdata, dst, GFP_ATOMIC); if (!new_mpath) return -ENOMEM; memcpy(new_mpath->mpp, mpp, ETH_ALEN); tbl = &sdata->u.mesh.mpp_paths; spin_lock_bh(&tbl->walk_lock); ret = rhashtable_lookup_insert_fast(&tbl->rhead, &new_mpath->rhash, mesh_rht_params); if (!ret) hlist_add_head_rcu(&new_mpath->walk_list, &tbl->walk_head); spin_unlock_bh(&tbl->walk_lock); if (ret) kfree(new_mpath); else mesh_fast_tx_flush_addr(sdata, dst); sdata->u.mesh.mpp_paths_generation++; return ret; } /** * mesh_plink_broken - deactivates paths and sends perr when a link breaks * * @sta: broken peer link * * This function must be called from the rate control algorithm if enough * delivery errors suggest that a peer link is no longer usable. */ void mesh_plink_broken(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct mesh_table *tbl = &sdata->u.mesh.mesh_paths; static const u8 bcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; struct mesh_path *mpath; rcu_read_lock(); hlist_for_each_entry_rcu(mpath, &tbl->walk_head, walk_list) { if (rcu_access_pointer(mpath->next_hop) == sta && mpath->flags & MESH_PATH_ACTIVE && !(mpath->flags & MESH_PATH_FIXED)) { spin_lock_bh(&mpath->state_lock); mpath->flags &= ~MESH_PATH_ACTIVE; ++mpath->sn; spin_unlock_bh(&mpath->state_lock); mesh_path_error_tx(sdata, sdata->u.mesh.mshcfg.element_ttl, mpath->dst, mpath->sn, WLAN_REASON_MESH_PATH_DEST_UNREACHABLE, bcast); } } rcu_read_unlock(); } static void mesh_path_free_rcu(struct mesh_table *tbl, struct mesh_path *mpath) { struct ieee80211_sub_if_data *sdata = mpath->sdata; spin_lock_bh(&mpath->state_lock); mpath->flags |= MESH_PATH_RESOLVING | MESH_PATH_DELETED; mesh_gate_del(tbl, mpath); spin_unlock_bh(&mpath->state_lock); timer_shutdown_sync(&mpath->timer); atomic_dec(&sdata->u.mesh.mpaths); atomic_dec(&tbl->entries); mesh_path_flush_pending(mpath); kfree_rcu(mpath, rcu); } static void __mesh_path_del(struct mesh_table *tbl, struct mesh_path *mpath) { hlist_del_rcu(&mpath->walk_list); rhashtable_remove_fast(&tbl->rhead, &mpath->rhash, mesh_rht_params); if (tbl == &mpath->sdata->u.mesh.mpp_paths) mesh_fast_tx_flush_addr(mpath->sdata, mpath->dst); else mesh_fast_tx_flush_mpath(mpath); mesh_path_free_rcu(tbl, mpath); } /** * mesh_path_flush_by_nexthop - Deletes mesh paths if their next hop matches * * @sta: mesh peer to match * * RCU notes: this function is called when a mesh plink transitions from * PLINK_ESTAB to any other state, since PLINK_ESTAB state is the only one that * allows path creation. This will happen before the sta can be freed (because * sta_info_destroy() calls this) so any reader in a rcu read block will be * protected against the plink disappearing. */ void mesh_path_flush_by_nexthop(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct mesh_table *tbl = &sdata->u.mesh.mesh_paths; struct mesh_path *mpath; struct hlist_node *n; spin_lock_bh(&tbl->walk_lock); hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) { if (rcu_access_pointer(mpath->next_hop) == sta) __mesh_path_del(tbl, mpath); } spin_unlock_bh(&tbl->walk_lock); } static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata, const u8 *proxy) { struct mesh_table *tbl = &sdata->u.mesh.mpp_paths; struct mesh_path *mpath; struct hlist_node *n; spin_lock_bh(&tbl->walk_lock); hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) { if (ether_addr_equal(mpath->mpp, proxy)) __mesh_path_del(tbl, mpath); } spin_unlock_bh(&tbl->walk_lock); } static void table_flush_by_iface(struct mesh_table *tbl) { struct mesh_path *mpath; struct hlist_node *n; spin_lock_bh(&tbl->walk_lock); hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) { __mesh_path_del(tbl, mpath); } spin_unlock_bh(&tbl->walk_lock); } /** * mesh_path_flush_by_iface - Deletes all mesh paths associated with a given iface * * @sdata: interface data to match * * This function deletes both mesh paths as well as mesh portal paths. */ void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata) { table_flush_by_iface(&sdata->u.mesh.mesh_paths); table_flush_by_iface(&sdata->u.mesh.mpp_paths); } /** * table_path_del - delete a path from the mesh or mpp table * * @tbl: mesh or mpp path table * @sdata: local subif * @addr: dst address (ETH_ALEN length) * * Returns: 0 if successful */ static int table_path_del(struct mesh_table *tbl, struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct mesh_path *mpath; spin_lock_bh(&tbl->walk_lock); mpath = rhashtable_lookup_fast(&tbl->rhead, addr, mesh_rht_params); if (!mpath) { spin_unlock_bh(&tbl->walk_lock); return -ENXIO; } __mesh_path_del(tbl, mpath); spin_unlock_bh(&tbl->walk_lock); return 0; } /** * mesh_path_del - delete a mesh path from the table * * @addr: dst address (ETH_ALEN length) * @sdata: local subif * * Returns: 0 if successful */ int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr) { int err; /* flush relevant mpp entries first */ mpp_flush_by_proxy(sdata, addr); err = table_path_del(&sdata->u.mesh.mesh_paths, sdata, addr); sdata->u.mesh.mesh_paths_generation++; return err; } /** * mesh_path_tx_pending - sends pending frames in a mesh path queue * * @mpath: mesh path to activate * * Locking: the state_lock of the mpath structure must NOT be held when calling * this function. */ void mesh_path_tx_pending(struct mesh_path *mpath) { if (mpath->flags & MESH_PATH_ACTIVE) ieee80211_add_pending_skbs(mpath->sdata->local, &mpath->frame_queue); } /** * mesh_path_send_to_gates - sends pending frames to all known mesh gates * * @mpath: mesh path whose queue will be emptied * * If there is only one gate, the frames are transferred from the failed mpath * queue to that gate's queue. If there are more than one gates, the frames * are copied from each gate to the next. After frames are copied, the * mpath queues are emptied onto the transmission queue. * * Returns: 0 on success, -EHOSTUNREACH */ int mesh_path_send_to_gates(struct mesh_path *mpath) { struct ieee80211_sub_if_data *sdata = mpath->sdata; struct mesh_table *tbl; struct mesh_path *from_mpath = mpath; struct mesh_path *gate; bool copy = false; tbl = &sdata->u.mesh.mesh_paths; rcu_read_lock(); hlist_for_each_entry_rcu(gate, &tbl->known_gates, gate_list) { if (gate->flags & MESH_PATH_ACTIVE) { mpath_dbg(sdata, "Forwarding to %pM\n", gate->dst); mesh_path_move_to_queue(gate, from_mpath, copy); from_mpath = gate; copy = true; } else { mpath_dbg(sdata, "Not forwarding to %pM (flags %#x)\n", gate->dst, gate->flags); } } hlist_for_each_entry_rcu(gate, &tbl->known_gates, gate_list) { mpath_dbg(sdata, "Sending to %pM\n", gate->dst); mesh_path_tx_pending(gate); } rcu_read_unlock(); return (from_mpath == mpath) ? -EHOSTUNREACH : 0; } /** * mesh_path_discard_frame - discard a frame whose path could not be resolved * * @skb: frame to discard * @sdata: network subif the frame was to be sent through * * Locking: the function must me called within a rcu_read_lock region */ void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { ieee80211_free_txskb(&sdata->local->hw, skb); sdata->u.mesh.mshstats.dropped_frames_no_route++; } /** * mesh_path_flush_pending - free the pending queue of a mesh path * * @mpath: mesh path whose queue has to be freed * * Locking: the function must me called within a rcu_read_lock region */ void mesh_path_flush_pending(struct mesh_path *mpath) { struct ieee80211_sub_if_data *sdata = mpath->sdata; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_preq_queue *preq, *tmp; struct sk_buff *skb; while ((skb = skb_dequeue(&mpath->frame_queue)) != NULL) mesh_path_discard_frame(mpath->sdata, skb); spin_lock_bh(&ifmsh->mesh_preq_queue_lock); list_for_each_entry_safe(preq, tmp, &ifmsh->preq_queue.list, list) { if (ether_addr_equal(mpath->dst, preq->dst)) { list_del(&preq->list); kfree(preq); --ifmsh->preq_queue_len; } } spin_unlock_bh(&ifmsh->mesh_preq_queue_lock); } /** * mesh_path_fix_nexthop - force a specific next hop for a mesh path * * @mpath: the mesh path to modify * @next_hop: the next hop to force * * Locking: this function must be called holding mpath->state_lock */ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop) { spin_lock_bh(&mpath->state_lock); mesh_path_assign_nexthop(mpath, next_hop); mpath->sn = 0xffff; mpath->metric = 0; mpath->hop_count = 0; mpath->exp_time = 0; mpath->flags = MESH_PATH_FIXED | MESH_PATH_SN_VALID; mesh_path_activate(mpath); mesh_fast_tx_flush_mpath(mpath); spin_unlock_bh(&mpath->state_lock); ewma_mesh_fail_avg_init(&next_hop->mesh->fail_avg); /* init it at a low value - 0 start is tricky */ ewma_mesh_fail_avg_add(&next_hop->mesh->fail_avg, 1); mesh_path_tx_pending(mpath); } void mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata) { mesh_table_init(&sdata->u.mesh.mesh_paths); mesh_table_init(&sdata->u.mesh.mpp_paths); mesh_fast_tx_init(sdata); } static void mesh_path_tbl_expire(struct ieee80211_sub_if_data *sdata, struct mesh_table *tbl) { struct mesh_path *mpath; struct hlist_node *n; spin_lock_bh(&tbl->walk_lock); hlist_for_each_entry_safe(mpath, n, &tbl->walk_head, walk_list) { if ((!(mpath->flags & MESH_PATH_RESOLVING)) && (!(mpath->flags & MESH_PATH_FIXED)) && time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE)) __mesh_path_del(tbl, mpath); } spin_unlock_bh(&tbl->walk_lock); } void mesh_path_expire(struct ieee80211_sub_if_data *sdata) { mesh_path_tbl_expire(sdata, &sdata->u.mesh.mesh_paths); mesh_path_tbl_expire(sdata, &sdata->u.mesh.mpp_paths); } void mesh_pathtbl_unregister(struct ieee80211_sub_if_data *sdata) { mesh_fast_tx_deinit(sdata); mesh_table_free(&sdata->u.mesh.mesh_paths); mesh_table_free(&sdata->u.mesh.mpp_paths); } |
| 5 12 8 13 5 3 2 8 5 1 1 1 1 1 3 1 1 2 1 1 1 1 1 1 1 11 13 2 13 13 13 13 13 3 2 2 12 12 5 5 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 | // SPDX-License-Identifier: GPL-2.0-or-later /* Linux driver for Philips webcam USB and Video4Linux interface part. (C) 1999-2004 Nemosoft Unv. (C) 2004-2006 Luc Saillard (luc@saillard.org) (C) 2011 Hans de Goede <hdegoede@redhat.com> NOTE: this version of pwc is an unofficial (modified) release of pwc & pcwx driver and thus may have bugs that are not present in the original version. Please send bug reports and support requests to <luc@saillard.org>. The decompression routines have been implemented by reverse-engineering the Nemosoft binary pwcx module. Caveat emptor. */ /* This code forms the interface between the USB layers and the Philips specific stuff. Some adanved stuff of the driver falls under an NDA, signed between me and Philips B.V., Eindhoven, the Netherlands, and is thus not distributed in source form. The binary pwcx.o module contains the code that falls under the NDA. In case you're wondering: 'pwc' stands for "Philips WebCam", but I really didn't want to type 'philips_web_cam' every time (I'm lazy as any Linux kernel hacker, but I don't like uncomprehensible abbreviations without explanation). Oh yes, convention: to disctinguish between all the various pointers to device-structures, I use these names for the pointer variables: udev: struct usb_device * vdev: struct video_device (member of pwc_dev) pdev: struct pwc_devive * */ /* Contributors: - Alvarado: adding whitebalance code - Alistar Moire: QuickCam 3000 Pro device/product ID - Tony Hoyle: Creative Labs Webcam 5 device/product ID - Mark Burazin: solving hang in VIDIOCSYNC when camera gets unplugged - Jk Fang: Sotec Afina Eye ID - Xavier Roche: QuickCam Pro 4000 ID - Jens Knudsen: QuickCam Zoom ID - J. Debert: QuickCam for Notebooks ID - Pham Thanh Nam: webcam snapshot button as an event input device */ #include <linux/errno.h> #include <linux/init.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/poll.h> #include <linux/slab.h> #ifdef CONFIG_USB_PWC_INPUT_EVDEV #include <linux/usb/input.h> #endif #include <linux/vmalloc.h> #include <asm/io.h> #include <linux/kernel.h> /* simple_strtol() */ #include "pwc.h" #include "pwc-kiara.h" #include "pwc-timon.h" #include "pwc-dec23.h" #include "pwc-dec1.h" #define CREATE_TRACE_POINTS #include <trace/events/pwc.h> /* Function prototypes and driver templates */ /* hotplug device table support */ static const struct usb_device_id pwc_device_table [] = { { USB_DEVICE(0x041E, 0x400C) }, /* Creative Webcam 5 */ { USB_DEVICE(0x041E, 0x4011) }, /* Creative Webcam Pro Ex */ { USB_DEVICE(0x046D, 0x08B0) }, /* Logitech QuickCam 3000 Pro */ { USB_DEVICE(0x046D, 0x08B1) }, /* Logitech QuickCam Notebook Pro */ { USB_DEVICE(0x046D, 0x08B2) }, /* Logitech QuickCam 4000 Pro */ { USB_DEVICE(0x046D, 0x08B3) }, /* Logitech QuickCam Zoom (old model) */ { USB_DEVICE(0x046D, 0x08B4) }, /* Logitech QuickCam Zoom (new model) */ { USB_DEVICE(0x046D, 0x08B5) }, /* Logitech QuickCam Orbit/Sphere */ { USB_DEVICE(0x046D, 0x08B6) }, /* Logitech/Cisco VT Camera */ { USB_DEVICE(0x046D, 0x08B7) }, /* Logitech ViewPort AV 100 */ { USB_DEVICE(0x046D, 0x08B8) }, /* Logitech QuickCam */ { USB_DEVICE(0x0471, 0x0302) }, /* Philips PCA645VC */ { USB_DEVICE(0x0471, 0x0303) }, /* Philips PCA646VC */ { USB_DEVICE(0x0471, 0x0304) }, /* Askey VC010 type 2 */ { USB_DEVICE(0x0471, 0x0307) }, /* Philips PCVC675K (Vesta) */ { USB_DEVICE(0x0471, 0x0308) }, /* Philips PCVC680K (Vesta Pro) */ { USB_DEVICE(0x0471, 0x030C) }, /* Philips PCVC690K (Vesta Pro Scan) */ { USB_DEVICE(0x0471, 0x0310) }, /* Philips PCVC730K (ToUCam Fun)/PCVC830 (ToUCam II) */ { USB_DEVICE(0x0471, 0x0311) }, /* Philips PCVC740K (ToUCam Pro)/PCVC840 (ToUCam II) */ { USB_DEVICE(0x0471, 0x0312) }, /* Philips PCVC750K (ToUCam Pro Scan) */ { USB_DEVICE(0x0471, 0x0313) }, /* Philips PCVC720K/40 (ToUCam XS) */ { USB_DEVICE(0x0471, 0x0329) }, /* Philips SPC 900NC webcam */ { USB_DEVICE(0x0471, 0x032C) }, /* Philips SPC 880NC webcam */ { USB_DEVICE(0x04CC, 0x8116) }, /* Sotec Afina Eye */ { USB_DEVICE(0x055D, 0x9000) }, /* Samsung MPC-C10 */ { USB_DEVICE(0x055D, 0x9001) }, /* Samsung MPC-C30 */ { USB_DEVICE(0x055D, 0x9002) }, /* Samsung SNC-35E (Ver3.0) */ { USB_DEVICE(0x069A, 0x0001) }, /* Askey VC010 type 1 */ { USB_DEVICE(0x06BE, 0x8116) }, /* AME Co. Afina Eye */ { USB_DEVICE(0x0d81, 0x1900) }, /* Visionite VCS-UC300 */ { USB_DEVICE(0x0d81, 0x1910) }, /* Visionite VCS-UM100 */ { } }; MODULE_DEVICE_TABLE(usb, pwc_device_table); static int usb_pwc_probe(struct usb_interface *intf, const struct usb_device_id *id); static void usb_pwc_disconnect(struct usb_interface *intf); static void pwc_isoc_cleanup(struct pwc_device *pdev); static struct usb_driver pwc_driver = { .name = "Philips webcam", /* name */ .id_table = pwc_device_table, .probe = usb_pwc_probe, /* probe() */ .disconnect = usb_pwc_disconnect, /* disconnect() */ }; #define MAX_DEV_HINTS 20 #define MAX_ISOC_ERRORS 20 #ifdef CONFIG_USB_PWC_DEBUG int pwc_trace = PWC_DEBUG_LEVEL; #endif static int power_save = -1; static int leds[2] = { 100, 0 }; /***/ static const struct v4l2_file_operations pwc_fops = { .owner = THIS_MODULE, .open = v4l2_fh_open, .release = vb2_fop_release, .read = vb2_fop_read, .poll = vb2_fop_poll, .mmap = vb2_fop_mmap, .unlocked_ioctl = video_ioctl2, }; static const struct video_device pwc_template = { .name = "Philips Webcam", /* Filled in later */ .release = video_device_release_empty, .fops = &pwc_fops, .ioctl_ops = &pwc_ioctl_ops, }; /***************************************************************************/ /* Private functions */ static void *pwc_alloc_urb_buffer(struct usb_device *dev, size_t size, dma_addr_t *dma_handle) { struct device *dmadev = dev->bus->sysdev; void *buffer = kmalloc(size, GFP_KERNEL); if (!buffer) return NULL; *dma_handle = dma_map_single(dmadev, buffer, size, DMA_FROM_DEVICE); if (dma_mapping_error(dmadev, *dma_handle)) { kfree(buffer); return NULL; } return buffer; } static void pwc_free_urb_buffer(struct usb_device *dev, size_t size, void *buffer, dma_addr_t dma_handle) { struct device *dmadev = dev->bus->sysdev; dma_unmap_single(dmadev, dma_handle, size, DMA_FROM_DEVICE); kfree(buffer); } static struct pwc_frame_buf *pwc_get_next_fill_buf(struct pwc_device *pdev) { unsigned long flags = 0; struct pwc_frame_buf *buf = NULL; spin_lock_irqsave(&pdev->queued_bufs_lock, flags); if (list_empty(&pdev->queued_bufs)) goto leave; buf = list_entry(pdev->queued_bufs.next, struct pwc_frame_buf, list); list_del(&buf->list); leave: spin_unlock_irqrestore(&pdev->queued_bufs_lock, flags); return buf; } static void pwc_snapshot_button(struct pwc_device *pdev, int down) { if (down) { PWC_TRACE("Snapshot button pressed.\n"); } else { PWC_TRACE("Snapshot button released.\n"); } #ifdef CONFIG_USB_PWC_INPUT_EVDEV if (pdev->button_dev) { input_report_key(pdev->button_dev, KEY_CAMERA, down); input_sync(pdev->button_dev); } #endif } static void pwc_frame_complete(struct pwc_device *pdev) { struct pwc_frame_buf *fbuf = pdev->fill_buf; /* The ToUCam Fun CMOS sensor causes the firmware to send 2 or 3 bogus frames on the USB wire after an exposure change. This conditition is however detected in the cam and a bit is set in the header. */ if (pdev->type == 730) { unsigned char *ptr = (unsigned char *)fbuf->data; if (ptr[1] == 1 && ptr[0] & 0x10) { PWC_TRACE("Hyundai CMOS sensor bug. Dropping frame.\n"); pdev->drop_frames += 2; } if ((ptr[0] ^ pdev->vmirror) & 0x01) { pwc_snapshot_button(pdev, ptr[0] & 0x01); } if ((ptr[0] ^ pdev->vmirror) & 0x02) { if (ptr[0] & 0x02) PWC_TRACE("Image is mirrored.\n"); else PWC_TRACE("Image is normal.\n"); } pdev->vmirror = ptr[0] & 0x03; /* Sometimes the trailer of the 730 is still sent as a 4 byte packet after a short frame; this condition is filtered out specifically. A 4 byte frame doesn't make sense anyway. So we get either this sequence: drop_bit set -> 4 byte frame -> short frame -> good frame Or this one: drop_bit set -> short frame -> good frame So we drop either 3 or 2 frames in all! */ if (fbuf->filled == 4) pdev->drop_frames++; } else if (pdev->type == 740 || pdev->type == 720) { unsigned char *ptr = (unsigned char *)fbuf->data; if ((ptr[0] ^ pdev->vmirror) & 0x01) { pwc_snapshot_button(pdev, ptr[0] & 0x01); } pdev->vmirror = ptr[0] & 0x03; } /* In case we were instructed to drop the frame, do so silently. */ if (pdev->drop_frames > 0) { pdev->drop_frames--; } else { /* Check for underflow first */ if (fbuf->filled < pdev->frame_total_size) { PWC_DEBUG_FLOW("Frame buffer underflow (%d bytes); discarded.\n", fbuf->filled); } else { fbuf->vb.field = V4L2_FIELD_NONE; fbuf->vb.sequence = pdev->vframe_count; vb2_buffer_done(&fbuf->vb.vb2_buf, VB2_BUF_STATE_DONE); pdev->fill_buf = NULL; pdev->vsync = 0; } } /* !drop_frames */ pdev->vframe_count++; } /* This gets called for the Isochronous pipe (video). This is done in * interrupt time, so it has to be fast, not crash, and not stall. Neat. */ static void pwc_isoc_handler(struct urb *urb) { struct pwc_device *pdev = (struct pwc_device *)urb->context; struct device *dmadev = urb->dev->bus->sysdev; int i, fst, flen; unsigned char *iso_buf = NULL; trace_pwc_handler_enter(urb, pdev); if (urb->status == -ENOENT || urb->status == -ECONNRESET || urb->status == -ESHUTDOWN) { PWC_DEBUG_OPEN("URB (%p) unlinked %ssynchronously.\n", urb, urb->status == -ENOENT ? "" : "a"); return; } if (pdev->fill_buf == NULL) pdev->fill_buf = pwc_get_next_fill_buf(pdev); if (urb->status != 0) { const char *errmsg; errmsg = "Unknown"; switch(urb->status) { case -ENOSR: errmsg = "Buffer error (overrun)"; break; case -EPIPE: errmsg = "Stalled (device not responding)"; break; case -EOVERFLOW: errmsg = "Babble (bad cable?)"; break; case -EPROTO: errmsg = "Bit-stuff error (bad cable?)"; break; case -EILSEQ: errmsg = "CRC/Timeout (could be anything)"; break; case -ETIME: errmsg = "Device does not respond"; break; } PWC_ERROR("pwc_isoc_handler() called with status %d [%s].\n", urb->status, errmsg); /* Give up after a number of contiguous errors */ if (++pdev->visoc_errors > MAX_ISOC_ERRORS) { PWC_ERROR("Too many ISOC errors, bailing out.\n"); if (pdev->fill_buf) { vb2_buffer_done(&pdev->fill_buf->vb.vb2_buf, VB2_BUF_STATE_ERROR); pdev->fill_buf = NULL; } } pdev->vsync = 0; /* Drop the current frame */ goto handler_end; } /* Reset ISOC error counter. We did get here, after all. */ pdev->visoc_errors = 0; dma_sync_single_for_cpu(dmadev, urb->transfer_dma, urb->transfer_buffer_length, DMA_FROM_DEVICE); /* vsync: 0 = don't copy data 1 = sync-hunt 2 = synched */ /* Compact data */ for (i = 0; i < urb->number_of_packets; i++) { fst = urb->iso_frame_desc[i].status; flen = urb->iso_frame_desc[i].actual_length; iso_buf = urb->transfer_buffer + urb->iso_frame_desc[i].offset; if (fst != 0) { PWC_ERROR("Iso frame %d has error %d\n", i, fst); continue; } if (flen > 0 && pdev->vsync) { struct pwc_frame_buf *fbuf = pdev->fill_buf; if (pdev->vsync == 1) { fbuf->vb.vb2_buf.timestamp = ktime_get_ns(); pdev->vsync = 2; } if (flen + fbuf->filled > pdev->frame_total_size) { PWC_ERROR("Frame overflow (%d > %d)\n", flen + fbuf->filled, pdev->frame_total_size); pdev->vsync = 0; /* Let's wait for an EOF */ } else { memcpy(fbuf->data + fbuf->filled, iso_buf, flen); fbuf->filled += flen; } } if (flen < pdev->vlast_packet_size) { /* Shorter packet... end of frame */ if (pdev->vsync == 2) pwc_frame_complete(pdev); if (pdev->fill_buf == NULL) pdev->fill_buf = pwc_get_next_fill_buf(pdev); if (pdev->fill_buf) { pdev->fill_buf->filled = 0; pdev->vsync = 1; } } pdev->vlast_packet_size = flen; } dma_sync_single_for_device(dmadev, urb->transfer_dma, urb->transfer_buffer_length, DMA_FROM_DEVICE); handler_end: trace_pwc_handler_exit(urb, pdev); i = usb_submit_urb(urb, GFP_ATOMIC); if (i != 0) PWC_ERROR("Error (%d) re-submitting urb in pwc_isoc_handler.\n", i); } /* Both v4l2_lock and vb_queue_lock should be locked when calling this */ static int pwc_isoc_init(struct pwc_device *pdev) { struct usb_device *udev; struct urb *urb; int i, j, ret; struct usb_interface *intf; struct usb_host_interface *idesc = NULL; int compression = 0; /* 0..3 = uncompressed..high */ pdev->vsync = 0; pdev->vlast_packet_size = 0; pdev->fill_buf = NULL; pdev->vframe_count = 0; pdev->visoc_errors = 0; udev = pdev->udev; retry: /* We first try with low compression and then retry with a higher compression setting if there is not enough bandwidth. */ ret = pwc_set_video_mode(pdev, pdev->width, pdev->height, pdev->pixfmt, pdev->vframes, &compression, 1); /* Get the current alternate interface, adjust packet size */ intf = usb_ifnum_to_if(udev, 0); if (intf) idesc = usb_altnum_to_altsetting(intf, pdev->valternate); if (!idesc) return -EIO; /* Search video endpoint */ pdev->vmax_packet_size = -1; for (i = 0; i < idesc->desc.bNumEndpoints; i++) { if ((idesc->endpoint[i].desc.bEndpointAddress & 0xF) == pdev->vendpoint) { pdev->vmax_packet_size = le16_to_cpu(idesc->endpoint[i].desc.wMaxPacketSize); break; } } if (pdev->vmax_packet_size < 0 || pdev->vmax_packet_size > ISO_MAX_FRAME_SIZE) { PWC_ERROR("Failed to find packet size for video endpoint in current alternate setting.\n"); return -ENFILE; /* Odd error, that should be noticeable */ } /* Set alternate interface */ PWC_DEBUG_OPEN("Setting alternate interface %d\n", pdev->valternate); ret = usb_set_interface(pdev->udev, 0, pdev->valternate); if (ret == -ENOSPC && compression < 3) { compression++; goto retry; } if (ret < 0) return ret; /* Allocate and init Isochronuous urbs */ for (i = 0; i < MAX_ISO_BUFS; i++) { urb = usb_alloc_urb(ISO_FRAMES_PER_DESC, GFP_KERNEL); if (urb == NULL) { pwc_isoc_cleanup(pdev); return -ENOMEM; } pdev->urbs[i] = urb; PWC_DEBUG_MEMORY("Allocated URB at 0x%p\n", urb); urb->interval = 1; // devik urb->dev = udev; urb->pipe = usb_rcvisocpipe(udev, pdev->vendpoint); urb->transfer_flags = URB_ISO_ASAP | URB_NO_TRANSFER_DMA_MAP; urb->transfer_buffer_length = ISO_BUFFER_SIZE; urb->transfer_buffer = pwc_alloc_urb_buffer(udev, urb->transfer_buffer_length, &urb->transfer_dma); if (urb->transfer_buffer == NULL) { PWC_ERROR("Failed to allocate urb buffer %d\n", i); pwc_isoc_cleanup(pdev); return -ENOMEM; } urb->complete = pwc_isoc_handler; urb->context = pdev; urb->start_frame = 0; urb->number_of_packets = ISO_FRAMES_PER_DESC; for (j = 0; j < ISO_FRAMES_PER_DESC; j++) { urb->iso_frame_desc[j].offset = j * ISO_MAX_FRAME_SIZE; urb->iso_frame_desc[j].length = pdev->vmax_packet_size; } } /* link */ for (i = 0; i < MAX_ISO_BUFS; i++) { ret = usb_submit_urb(pdev->urbs[i], GFP_KERNEL); if (ret == -ENOSPC && compression < 3) { compression++; pwc_isoc_cleanup(pdev); goto retry; } if (ret) { PWC_ERROR("isoc_init() submit_urb %d failed with error %d\n", i, ret); pwc_isoc_cleanup(pdev); return ret; } PWC_DEBUG_MEMORY("URB 0x%p submitted.\n", pdev->urbs[i]); } /* All is done... */ PWC_DEBUG_OPEN("<< pwc_isoc_init()\n"); return 0; } static void pwc_iso_stop(struct pwc_device *pdev) { int i; /* Unlinking ISOC buffers one by one */ for (i = 0; i < MAX_ISO_BUFS; i++) { if (pdev->urbs[i]) { PWC_DEBUG_MEMORY("Unlinking URB %p\n", pdev->urbs[i]); usb_kill_urb(pdev->urbs[i]); } } } static void pwc_iso_free(struct pwc_device *pdev) { int i; /* Freeing ISOC buffers one by one */ for (i = 0; i < MAX_ISO_BUFS; i++) { struct urb *urb = pdev->urbs[i]; if (urb) { PWC_DEBUG_MEMORY("Freeing URB\n"); if (urb->transfer_buffer) pwc_free_urb_buffer(urb->dev, urb->transfer_buffer_length, urb->transfer_buffer, urb->transfer_dma); usb_free_urb(urb); pdev->urbs[i] = NULL; } } } /* Both v4l2_lock and vb_queue_lock should be locked when calling this */ static void pwc_isoc_cleanup(struct pwc_device *pdev) { PWC_DEBUG_OPEN(">> pwc_isoc_cleanup()\n"); pwc_iso_stop(pdev); pwc_iso_free(pdev); usb_set_interface(pdev->udev, 0, 0); PWC_DEBUG_OPEN("<< pwc_isoc_cleanup()\n"); } /* Must be called with vb_queue_lock hold */ static void pwc_cleanup_queued_bufs(struct pwc_device *pdev, enum vb2_buffer_state state) { unsigned long flags = 0; spin_lock_irqsave(&pdev->queued_bufs_lock, flags); while (!list_empty(&pdev->queued_bufs)) { struct pwc_frame_buf *buf; buf = list_entry(pdev->queued_bufs.next, struct pwc_frame_buf, list); list_del(&buf->list); vb2_buffer_done(&buf->vb.vb2_buf, state); } spin_unlock_irqrestore(&pdev->queued_bufs_lock, flags); } #ifdef CONFIG_USB_PWC_DEBUG static const char *pwc_sensor_type_to_string(unsigned int sensor_type) { switch(sensor_type) { case 0x00: return "Hyundai CMOS sensor"; case 0x20: return "Sony CCD sensor + TDA8787"; case 0x2E: return "Sony CCD sensor + Exas 98L59"; case 0x2F: return "Sony CCD sensor + ADI 9804"; case 0x30: return "Sharp CCD sensor + TDA8787"; case 0x3E: return "Sharp CCD sensor + Exas 98L59"; case 0x3F: return "Sharp CCD sensor + ADI 9804"; case 0x40: return "UPA 1021 sensor"; case 0x100: return "VGA sensor"; case 0x101: return "PAL MR sensor"; default: return "unknown type of sensor"; } } #endif /***************************************************************************/ /* Video4Linux functions */ static void pwc_video_release(struct v4l2_device *v) { struct pwc_device *pdev = container_of(v, struct pwc_device, v4l2_dev); v4l2_ctrl_handler_free(&pdev->ctrl_handler); v4l2_device_unregister(&pdev->v4l2_dev); kfree(pdev->ctrl_buf); kfree(pdev); } /***************************************************************************/ /* Videobuf2 operations */ static int queue_setup(struct vb2_queue *vq, unsigned int *nbuffers, unsigned int *nplanes, unsigned int sizes[], struct device *alloc_devs[]) { struct pwc_device *pdev = vb2_get_drv_priv(vq); int size; if (*nbuffers < MIN_FRAMES) *nbuffers = MIN_FRAMES; else if (*nbuffers > MAX_FRAMES) *nbuffers = MAX_FRAMES; *nplanes = 1; size = pwc_get_size(pdev, MAX_WIDTH, MAX_HEIGHT); sizes[0] = PAGE_ALIGN(pwc_image_sizes[size][0] * pwc_image_sizes[size][1] * 3 / 2); return 0; } static int buffer_init(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct pwc_frame_buf *buf = container_of(vbuf, struct pwc_frame_buf, vb); /* need vmalloc since frame buffer > 128K */ buf->data = vzalloc(PWC_FRAME_SIZE); if (buf->data == NULL) return -ENOMEM; return 0; } static int buffer_prepare(struct vb2_buffer *vb) { struct pwc_device *pdev = vb2_get_drv_priv(vb->vb2_queue); /* Don't allow queueing new buffers after device disconnection */ if (!pdev->udev) return -ENODEV; return 0; } static void buffer_finish(struct vb2_buffer *vb) { struct pwc_device *pdev = vb2_get_drv_priv(vb->vb2_queue); struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct pwc_frame_buf *buf = container_of(vbuf, struct pwc_frame_buf, vb); if (vb->state == VB2_BUF_STATE_DONE) { /* * Application has called dqbuf and is getting back a buffer * we've filled, take the pwc data we've stored in buf->data * and decompress it into a usable format, storing the result * in the vb2_buffer. */ pwc_decompress(pdev, buf); } } static void buffer_cleanup(struct vb2_buffer *vb) { struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct pwc_frame_buf *buf = container_of(vbuf, struct pwc_frame_buf, vb); vfree(buf->data); } static void buffer_queue(struct vb2_buffer *vb) { struct pwc_device *pdev = vb2_get_drv_priv(vb->vb2_queue); struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); struct pwc_frame_buf *buf = container_of(vbuf, struct pwc_frame_buf, vb); unsigned long flags = 0; /* Check the device has not disconnected between prep and queuing */ if (!pdev->udev) { vb2_buffer_done(vb, VB2_BUF_STATE_ERROR); return; } spin_lock_irqsave(&pdev->queued_bufs_lock, flags); list_add_tail(&buf->list, &pdev->queued_bufs); spin_unlock_irqrestore(&pdev->queued_bufs_lock, flags); } static int start_streaming(struct vb2_queue *vq, unsigned int count) { struct pwc_device *pdev = vb2_get_drv_priv(vq); int r; if (!pdev->udev) return -ENODEV; if (mutex_lock_interruptible(&pdev->v4l2_lock)) return -ERESTARTSYS; /* Turn on camera and set LEDS on */ pwc_camera_power(pdev, 1); pwc_set_leds(pdev, leds[0], leds[1]); r = pwc_isoc_init(pdev); if (r) { /* If we failed turn camera and LEDS back off */ pwc_set_leds(pdev, 0, 0); pwc_camera_power(pdev, 0); /* And cleanup any queued bufs!! */ pwc_cleanup_queued_bufs(pdev, VB2_BUF_STATE_QUEUED); } mutex_unlock(&pdev->v4l2_lock); return r; } static void stop_streaming(struct vb2_queue *vq) { struct pwc_device *pdev = vb2_get_drv_priv(vq); mutex_lock(&pdev->v4l2_lock); if (pdev->udev) { pwc_set_leds(pdev, 0, 0); pwc_camera_power(pdev, 0); pwc_isoc_cleanup(pdev); } pwc_cleanup_queued_bufs(pdev, VB2_BUF_STATE_ERROR); if (pdev->fill_buf) vb2_buffer_done(&pdev->fill_buf->vb.vb2_buf, VB2_BUF_STATE_ERROR); mutex_unlock(&pdev->v4l2_lock); } static const struct vb2_ops pwc_vb_queue_ops = { .queue_setup = queue_setup, .buf_init = buffer_init, .buf_prepare = buffer_prepare, .buf_finish = buffer_finish, .buf_cleanup = buffer_cleanup, .buf_queue = buffer_queue, .start_streaming = start_streaming, .stop_streaming = stop_streaming, .wait_prepare = vb2_ops_wait_prepare, .wait_finish = vb2_ops_wait_finish, }; /***************************************************************************/ /* USB functions */ /* This function gets called when a new device is plugged in or the usb core * is loaded. */ static int usb_pwc_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(intf); struct pwc_device *pdev = NULL; int vendor_id, product_id, type_id; int rc; int features = 0; int compression = 0; int my_power_save = power_save; char serial_number[30], *name; vendor_id = le16_to_cpu(udev->descriptor.idVendor); product_id = le16_to_cpu(udev->descriptor.idProduct); /* Check if we can handle this device */ PWC_DEBUG_PROBE("probe() called [%04X %04X], if %d\n", vendor_id, product_id, intf->altsetting->desc.bInterfaceNumber); /* the interfaces are probed one by one. We are only interested in the video interface (0) now. Interface 1 is the Audio Control, and interface 2 Audio itself. */ if (intf->altsetting->desc.bInterfaceNumber > 0) return -ENODEV; if (vendor_id == 0x0471) { switch (product_id) { case 0x0302: PWC_INFO("Philips PCA645VC USB webcam detected.\n"); name = "Philips 645 webcam"; type_id = 645; break; case 0x0303: PWC_INFO("Philips PCA646VC USB webcam detected.\n"); name = "Philips 646 webcam"; type_id = 646; break; case 0x0304: PWC_INFO("Askey VC010 type 2 USB webcam detected.\n"); name = "Askey VC010 webcam"; type_id = 646; break; case 0x0307: PWC_INFO("Philips PCVC675K (Vesta) USB webcam detected.\n"); name = "Philips 675 webcam"; type_id = 675; break; case 0x0308: PWC_INFO("Philips PCVC680K (Vesta Pro) USB webcam detected.\n"); name = "Philips 680 webcam"; type_id = 680; break; case 0x030C: PWC_INFO("Philips PCVC690K (Vesta Pro Scan) USB webcam detected.\n"); name = "Philips 690 webcam"; type_id = 690; break; case 0x0310: PWC_INFO("Philips PCVC730K (ToUCam Fun)/PCVC830 (ToUCam II) USB webcam detected.\n"); name = "Philips 730 webcam"; type_id = 730; break; case 0x0311: PWC_INFO("Philips PCVC740K (ToUCam Pro)/PCVC840 (ToUCam II) USB webcam detected.\n"); name = "Philips 740 webcam"; type_id = 740; break; case 0x0312: PWC_INFO("Philips PCVC750K (ToUCam Pro Scan) USB webcam detected.\n"); name = "Philips 750 webcam"; type_id = 750; break; case 0x0313: PWC_INFO("Philips PCVC720K/40 (ToUCam XS) USB webcam detected.\n"); name = "Philips 720K/40 webcam"; type_id = 720; break; case 0x0329: PWC_INFO("Philips SPC 900NC USB webcam detected.\n"); name = "Philips SPC 900NC webcam"; type_id = 740; break; case 0x032C: PWC_INFO("Philips SPC 880NC USB webcam detected.\n"); name = "Philips SPC 880NC webcam"; type_id = 740; break; default: return -ENODEV; } } else if (vendor_id == 0x069A) { switch(product_id) { case 0x0001: PWC_INFO("Askey VC010 type 1 USB webcam detected.\n"); name = "Askey VC010 webcam"; type_id = 645; break; default: return -ENODEV; } } else if (vendor_id == 0x046d) { switch(product_id) { case 0x08b0: PWC_INFO("Logitech QuickCam Pro 3000 USB webcam detected.\n"); name = "Logitech QuickCam Pro 3000"; type_id = 740; /* CCD sensor */ break; case 0x08b1: PWC_INFO("Logitech QuickCam Notebook Pro USB webcam detected.\n"); name = "Logitech QuickCam Notebook Pro"; type_id = 740; /* CCD sensor */ break; case 0x08b2: PWC_INFO("Logitech QuickCam 4000 Pro USB webcam detected.\n"); name = "Logitech QuickCam Pro 4000"; type_id = 740; /* CCD sensor */ if (my_power_save == -1) my_power_save = 1; break; case 0x08b3: PWC_INFO("Logitech QuickCam Zoom USB webcam detected.\n"); name = "Logitech QuickCam Zoom"; type_id = 740; /* CCD sensor */ break; case 0x08B4: PWC_INFO("Logitech QuickCam Zoom (new model) USB webcam detected.\n"); name = "Logitech QuickCam Zoom"; type_id = 740; /* CCD sensor */ if (my_power_save == -1) my_power_save = 1; break; case 0x08b5: PWC_INFO("Logitech QuickCam Orbit/Sphere USB webcam detected.\n"); name = "Logitech QuickCam Orbit"; type_id = 740; /* CCD sensor */ if (my_power_save == -1) my_power_save = 1; features |= FEATURE_MOTOR_PANTILT; break; case 0x08b6: PWC_INFO("Logitech/Cisco VT Camera webcam detected.\n"); name = "Cisco VT Camera"; type_id = 740; /* CCD sensor */ break; case 0x08b7: PWC_INFO("Logitech ViewPort AV 100 webcam detected.\n"); name = "Logitech ViewPort AV 100"; type_id = 740; /* CCD sensor */ break; case 0x08b8: /* Where this released? */ PWC_INFO("Logitech QuickCam detected (reserved ID).\n"); name = "Logitech QuickCam (res.)"; type_id = 730; /* Assuming CMOS */ break; default: return -ENODEV; } } else if (vendor_id == 0x055d) { /* I don't know the difference between the C10 and the C30; I suppose the difference is the sensor, but both cameras work equally well with a type_id of 675 */ switch(product_id) { case 0x9000: PWC_INFO("Samsung MPC-C10 USB webcam detected.\n"); name = "Samsung MPC-C10"; type_id = 675; break; case 0x9001: PWC_INFO("Samsung MPC-C30 USB webcam detected.\n"); name = "Samsung MPC-C30"; type_id = 675; break; case 0x9002: PWC_INFO("Samsung SNC-35E (v3.0) USB webcam detected.\n"); name = "Samsung MPC-C30"; type_id = 740; break; default: return -ENODEV; } } else if (vendor_id == 0x041e) { switch(product_id) { case 0x400c: PWC_INFO("Creative Labs Webcam 5 detected.\n"); name = "Creative Labs Webcam 5"; type_id = 730; if (my_power_save == -1) my_power_save = 1; break; case 0x4011: PWC_INFO("Creative Labs Webcam Pro Ex detected.\n"); name = "Creative Labs Webcam Pro Ex"; type_id = 740; break; default: return -ENODEV; } } else if (vendor_id == 0x04cc) { switch(product_id) { case 0x8116: PWC_INFO("Sotec Afina Eye USB webcam detected.\n"); name = "Sotec Afina Eye"; type_id = 730; break; default: return -ENODEV; } } else if (vendor_id == 0x06be) { switch(product_id) { case 0x8116: /* This is essentially the same cam as the Sotec Afina Eye */ PWC_INFO("AME Co. Afina Eye USB webcam detected.\n"); name = "AME Co. Afina Eye"; type_id = 750; break; default: return -ENODEV; } } else if (vendor_id == 0x0d81) { switch(product_id) { case 0x1900: PWC_INFO("Visionite VCS-UC300 USB webcam detected.\n"); name = "Visionite VCS-UC300"; type_id = 740; /* CCD sensor */ break; case 0x1910: PWC_INFO("Visionite VCS-UM100 USB webcam detected.\n"); name = "Visionite VCS-UM100"; type_id = 730; /* CMOS sensor */ break; default: return -ENODEV; } } else return -ENODEV; /* Not any of the know types; but the list keeps growing. */ if (my_power_save == -1) my_power_save = 0; memset(serial_number, 0, 30); usb_string(udev, udev->descriptor.iSerialNumber, serial_number, 29); PWC_DEBUG_PROBE("Device serial number is %s\n", serial_number); if (udev->descriptor.bNumConfigurations > 1) PWC_WARNING("Warning: more than 1 configuration available.\n"); /* Allocate structure, initialize pointers, mutexes, etc. and link it to the usb_device */ pdev = kzalloc(sizeof(struct pwc_device), GFP_KERNEL); if (pdev == NULL) { PWC_ERROR("Oops, could not allocate memory for pwc_device.\n"); return -ENOMEM; } pdev->type = type_id; pdev->features = features; pwc_construct(pdev); /* set min/max sizes correct */ mutex_init(&pdev->v4l2_lock); mutex_init(&pdev->vb_queue_lock); spin_lock_init(&pdev->queued_bufs_lock); INIT_LIST_HEAD(&pdev->queued_bufs); pdev->udev = udev; pdev->power_save = my_power_save; /* Init videobuf2 queue structure */ pdev->vb_queue.type = V4L2_BUF_TYPE_VIDEO_CAPTURE; pdev->vb_queue.io_modes = VB2_MMAP | VB2_USERPTR | VB2_READ; pdev->vb_queue.drv_priv = pdev; pdev->vb_queue.buf_struct_size = sizeof(struct pwc_frame_buf); pdev->vb_queue.ops = &pwc_vb_queue_ops; pdev->vb_queue.mem_ops = &vb2_vmalloc_memops; pdev->vb_queue.timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC; rc = vb2_queue_init(&pdev->vb_queue); if (rc < 0) { PWC_ERROR("Oops, could not initialize vb2 queue.\n"); goto err_free_mem; } /* Init video_device structure */ pdev->vdev = pwc_template; strscpy(pdev->vdev.name, name, sizeof(pdev->vdev.name)); pdev->vdev.queue = &pdev->vb_queue; pdev->vdev.queue->lock = &pdev->vb_queue_lock; video_set_drvdata(&pdev->vdev, pdev); pdev->release = le16_to_cpu(udev->descriptor.bcdDevice); PWC_DEBUG_PROBE("Release: %04x\n", pdev->release); /* Allocate USB command buffers */ pdev->ctrl_buf = kmalloc(sizeof(pdev->cmd_buf), GFP_KERNEL); if (!pdev->ctrl_buf) { PWC_ERROR("Oops, could not allocate memory for pwc_device.\n"); rc = -ENOMEM; goto err_free_mem; } #ifdef CONFIG_USB_PWC_DEBUG /* Query sensor type */ if (pwc_get_cmos_sensor(pdev, &rc) >= 0) { PWC_DEBUG_OPEN("This %s camera is equipped with a %s (%d).\n", pdev->vdev.name, pwc_sensor_type_to_string(rc), rc); } #endif /* Set the leds off */ pwc_set_leds(pdev, 0, 0); /* Setup initial videomode */ rc = pwc_set_video_mode(pdev, MAX_WIDTH, MAX_HEIGHT, V4L2_PIX_FMT_YUV420, 30, &compression, 1); if (rc) goto err_free_mem; /* Register controls (and read default values from camera */ rc = pwc_init_controls(pdev); if (rc) { PWC_ERROR("Failed to register v4l2 controls (%d).\n", rc); goto err_free_mem; } /* And powerdown the camera until streaming starts */ pwc_camera_power(pdev, 0); /* Register the v4l2_device structure */ pdev->v4l2_dev.release = pwc_video_release; rc = v4l2_device_register(&intf->dev, &pdev->v4l2_dev); if (rc) { PWC_ERROR("Failed to register v4l2-device (%d).\n", rc); goto err_free_controls; } pdev->v4l2_dev.ctrl_handler = &pdev->ctrl_handler; pdev->vdev.v4l2_dev = &pdev->v4l2_dev; pdev->vdev.lock = &pdev->v4l2_lock; pdev->vdev.device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING | V4L2_CAP_READWRITE; rc = video_register_device(&pdev->vdev, VFL_TYPE_VIDEO, -1); if (rc < 0) { PWC_ERROR("Failed to register as video device (%d).\n", rc); goto err_unregister_v4l2_dev; } PWC_INFO("Registered as %s.\n", video_device_node_name(&pdev->vdev)); #ifdef CONFIG_USB_PWC_INPUT_EVDEV /* register webcam snapshot button input device */ pdev->button_dev = input_allocate_device(); if (!pdev->button_dev) { rc = -ENOMEM; goto err_video_unreg; } usb_make_path(udev, pdev->button_phys, sizeof(pdev->button_phys)); strlcat(pdev->button_phys, "/input0", sizeof(pdev->button_phys)); pdev->button_dev->name = "PWC snapshot button"; pdev->button_dev->phys = pdev->button_phys; usb_to_input_id(pdev->udev, &pdev->button_dev->id); pdev->button_dev->dev.parent = &pdev->udev->dev; pdev->button_dev->evbit[0] = BIT_MASK(EV_KEY); pdev->button_dev->keybit[BIT_WORD(KEY_CAMERA)] = BIT_MASK(KEY_CAMERA); rc = input_register_device(pdev->button_dev); if (rc) { input_free_device(pdev->button_dev); pdev->button_dev = NULL; goto err_video_unreg; } #endif return 0; #ifdef CONFIG_USB_PWC_INPUT_EVDEV err_video_unreg: video_unregister_device(&pdev->vdev); #endif err_unregister_v4l2_dev: v4l2_device_unregister(&pdev->v4l2_dev); err_free_controls: v4l2_ctrl_handler_free(&pdev->ctrl_handler); err_free_mem: kfree(pdev->ctrl_buf); kfree(pdev); return rc; } /* The user yanked out the cable... */ static void usb_pwc_disconnect(struct usb_interface *intf) { struct v4l2_device *v = usb_get_intfdata(intf); struct pwc_device *pdev = container_of(v, struct pwc_device, v4l2_dev); mutex_lock(&pdev->vb_queue_lock); mutex_lock(&pdev->v4l2_lock); /* No need to keep the urbs around after disconnection */ if (pdev->vb_queue.streaming) pwc_isoc_cleanup(pdev); pdev->udev = NULL; v4l2_device_disconnect(&pdev->v4l2_dev); video_unregister_device(&pdev->vdev); mutex_unlock(&pdev->v4l2_lock); mutex_unlock(&pdev->vb_queue_lock); #ifdef CONFIG_USB_PWC_INPUT_EVDEV if (pdev->button_dev) input_unregister_device(pdev->button_dev); #endif v4l2_device_put(&pdev->v4l2_dev); } /* * Initialization code & module stuff */ static unsigned int leds_nargs; #ifdef CONFIG_USB_PWC_DEBUG module_param_named(trace, pwc_trace, int, 0644); #endif module_param(power_save, int, 0644); module_param_array(leds, int, &leds_nargs, 0444); #ifdef CONFIG_USB_PWC_DEBUG MODULE_PARM_DESC(trace, "For debugging purposes"); #endif MODULE_PARM_DESC(power_save, "Turn power saving for new cameras on or off"); MODULE_PARM_DESC(leds, "LED on,off time in milliseconds"); MODULE_DESCRIPTION("Philips & OEM USB webcam driver"); MODULE_AUTHOR("Luc Saillard <luc@saillard.org>"); MODULE_LICENSE("GPL"); MODULE_ALIAS("pwcx"); MODULE_VERSION( PWC_VERSION ); module_usb_driver(pwc_driver); |
| 150 150 149 17 17 108 108 30 2 13 4 6 142 1 5 1 2 142 1 150 145 146 142 141 142 142 145 150 149 150 150 150 5 5 149 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 | // SPDX-License-Identifier: GPL-2.0 /* * XFRM compat layer * Author: Dmitry Safonov <dima@arista.com> * Based on code and translator idea by: Florian Westphal <fw@strlen.de> */ #include <linux/compat.h> #include <linux/nospec.h> #include <linux/xfrm.h> #include <net/xfrm.h> struct compat_xfrm_lifetime_cfg { compat_u64 soft_byte_limit, hard_byte_limit; compat_u64 soft_packet_limit, hard_packet_limit; compat_u64 soft_add_expires_seconds, hard_add_expires_seconds; compat_u64 soft_use_expires_seconds, hard_use_expires_seconds; }; /* same size on 32bit, but only 4 byte alignment required */ struct compat_xfrm_lifetime_cur { compat_u64 bytes, packets, add_time, use_time; }; /* same size on 32bit, but only 4 byte alignment required */ struct compat_xfrm_userpolicy_info { struct xfrm_selector sel; struct compat_xfrm_lifetime_cfg lft; struct compat_xfrm_lifetime_cur curlft; __u32 priority, index; u8 dir, action, flags, share; /* 4 bytes additional padding on 64bit */ }; struct compat_xfrm_usersa_info { struct xfrm_selector sel; struct xfrm_id id; xfrm_address_t saddr; struct compat_xfrm_lifetime_cfg lft; struct compat_xfrm_lifetime_cur curlft; struct xfrm_stats stats; __u32 seq, reqid; u16 family; u8 mode, replay_window, flags; /* 4 bytes additional padding on 64bit */ }; struct compat_xfrm_user_acquire { struct xfrm_id id; xfrm_address_t saddr; struct xfrm_selector sel; struct compat_xfrm_userpolicy_info policy; /* 4 bytes additional padding on 64bit */ __u32 aalgos, ealgos, calgos, seq; }; struct compat_xfrm_userspi_info { struct compat_xfrm_usersa_info info; /* 4 bytes additional padding on 64bit */ __u32 min, max; }; struct compat_xfrm_user_expire { struct compat_xfrm_usersa_info state; /* 8 bytes additional padding on 64bit */ u8 hard; }; struct compat_xfrm_user_polexpire { struct compat_xfrm_userpolicy_info pol; /* 8 bytes additional padding on 64bit */ u8 hard; }; #define XMSGSIZE(type) sizeof(struct type) static const int compat_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = XMSGSIZE(compat_xfrm_usersa_info), [XFRM_MSG_DELSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id), [XFRM_MSG_GETSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id), [XFRM_MSG_NEWPOLICY - XFRM_MSG_BASE] = XMSGSIZE(compat_xfrm_userpolicy_info), [XFRM_MSG_DELPOLICY - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id), [XFRM_MSG_GETPOLICY - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id), [XFRM_MSG_ALLOCSPI - XFRM_MSG_BASE] = XMSGSIZE(compat_xfrm_userspi_info), [XFRM_MSG_ACQUIRE - XFRM_MSG_BASE] = XMSGSIZE(compat_xfrm_user_acquire), [XFRM_MSG_EXPIRE - XFRM_MSG_BASE] = XMSGSIZE(compat_xfrm_user_expire), [XFRM_MSG_UPDPOLICY - XFRM_MSG_BASE] = XMSGSIZE(compat_xfrm_userpolicy_info), [XFRM_MSG_UPDSA - XFRM_MSG_BASE] = XMSGSIZE(compat_xfrm_usersa_info), [XFRM_MSG_POLEXPIRE - XFRM_MSG_BASE] = XMSGSIZE(compat_xfrm_user_polexpire), [XFRM_MSG_FLUSHSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_flush), [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = 0, [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), [XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), [XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report), [XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id), [XFRM_MSG_NEWSADINFO - XFRM_MSG_BASE] = sizeof(u32), [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = sizeof(u32), [XFRM_MSG_NEWSPDINFO - XFRM_MSG_BASE] = sizeof(u32), [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32), [XFRM_MSG_MAPPING - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_mapping) }; static const struct nla_policy compat_policy[XFRMA_MAX+1] = { [XFRMA_UNSPEC] = { .strict_start_type = XFRMA_SA_DIR }, [XFRMA_SA] = { .len = XMSGSIZE(compat_xfrm_usersa_info)}, [XFRMA_POLICY] = { .len = XMSGSIZE(compat_xfrm_userpolicy_info)}, [XFRMA_LASTUSED] = { .type = NLA_U64}, [XFRMA_ALG_AUTH_TRUNC] = { .len = sizeof(struct xfrm_algo_auth)}, [XFRMA_ALG_AEAD] = { .len = sizeof(struct xfrm_algo_aead) }, [XFRMA_ALG_AUTH] = { .len = sizeof(struct xfrm_algo) }, [XFRMA_ALG_CRYPT] = { .len = sizeof(struct xfrm_algo) }, [XFRMA_ALG_COMP] = { .len = sizeof(struct xfrm_algo) }, [XFRMA_ENCAP] = { .len = sizeof(struct xfrm_encap_tmpl) }, [XFRMA_TMPL] = { .len = sizeof(struct xfrm_user_tmpl) }, [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_user_sec_ctx) }, [XFRMA_LTIME_VAL] = { .len = sizeof(struct xfrm_lifetime_cur) }, [XFRMA_REPLAY_VAL] = { .len = sizeof(struct xfrm_replay_state) }, [XFRMA_REPLAY_THRESH] = { .type = NLA_U32 }, [XFRMA_ETIMER_THRESH] = { .type = NLA_U32 }, [XFRMA_SRCADDR] = { .len = sizeof(xfrm_address_t) }, [XFRMA_COADDR] = { .len = sizeof(xfrm_address_t) }, [XFRMA_POLICY_TYPE] = { .len = sizeof(struct xfrm_userpolicy_type)}, [XFRMA_MIGRATE] = { .len = sizeof(struct xfrm_user_migrate) }, [XFRMA_KMADDRESS] = { .len = sizeof(struct xfrm_user_kmaddress) }, [XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) }, [XFRMA_TFCPAD] = { .type = NLA_U32 }, [XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) }, [XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 }, [XFRMA_PROTO] = { .type = NLA_U8 }, [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) }, [XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) }, [XFRMA_SET_MARK] = { .type = NLA_U32 }, [XFRMA_SET_MARK_MASK] = { .type = NLA_U32 }, [XFRMA_IF_ID] = { .type = NLA_U32 }, [XFRMA_MTIMER_THRESH] = { .type = NLA_U32 }, [XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT), [XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 }, }; static struct nlmsghdr *xfrm_nlmsg_put_compat(struct sk_buff *skb, const struct nlmsghdr *nlh_src, u16 type) { int payload = compat_msg_min[type]; int src_len = xfrm_msg_min[type]; struct nlmsghdr *nlh_dst; /* Compat messages are shorter or equal to native (+padding) */ if (WARN_ON_ONCE(src_len < payload)) return ERR_PTR(-EMSGSIZE); nlh_dst = nlmsg_put(skb, nlh_src->nlmsg_pid, nlh_src->nlmsg_seq, nlh_src->nlmsg_type, payload, nlh_src->nlmsg_flags); if (!nlh_dst) return ERR_PTR(-EMSGSIZE); memset(nlmsg_data(nlh_dst), 0, payload); switch (nlh_src->nlmsg_type) { /* Compat message has the same layout as native */ case XFRM_MSG_DELSA: case XFRM_MSG_DELPOLICY: case XFRM_MSG_FLUSHSA: case XFRM_MSG_FLUSHPOLICY: case XFRM_MSG_NEWAE: case XFRM_MSG_REPORT: case XFRM_MSG_MIGRATE: case XFRM_MSG_NEWSADINFO: case XFRM_MSG_NEWSPDINFO: case XFRM_MSG_MAPPING: WARN_ON_ONCE(src_len != payload); memcpy(nlmsg_data(nlh_dst), nlmsg_data(nlh_src), src_len); break; /* 4 byte alignment for trailing u64 on native, but not on compat */ case XFRM_MSG_NEWSA: case XFRM_MSG_NEWPOLICY: case XFRM_MSG_UPDSA: case XFRM_MSG_UPDPOLICY: WARN_ON_ONCE(src_len != payload + 4); memcpy(nlmsg_data(nlh_dst), nlmsg_data(nlh_src), payload); break; case XFRM_MSG_EXPIRE: { const struct xfrm_user_expire *src_ue = nlmsg_data(nlh_src); struct compat_xfrm_user_expire *dst_ue = nlmsg_data(nlh_dst); /* compat_xfrm_user_expire has 4-byte smaller state */ memcpy(dst_ue, src_ue, sizeof(dst_ue->state)); dst_ue->hard = src_ue->hard; break; } case XFRM_MSG_ACQUIRE: { const struct xfrm_user_acquire *src_ua = nlmsg_data(nlh_src); struct compat_xfrm_user_acquire *dst_ua = nlmsg_data(nlh_dst); memcpy(dst_ua, src_ua, offsetof(struct compat_xfrm_user_acquire, aalgos)); dst_ua->aalgos = src_ua->aalgos; dst_ua->ealgos = src_ua->ealgos; dst_ua->calgos = src_ua->calgos; dst_ua->seq = src_ua->seq; break; } case XFRM_MSG_POLEXPIRE: { const struct xfrm_user_polexpire *src_upe = nlmsg_data(nlh_src); struct compat_xfrm_user_polexpire *dst_upe = nlmsg_data(nlh_dst); /* compat_xfrm_user_polexpire has 4-byte smaller state */ memcpy(dst_upe, src_upe, sizeof(dst_upe->pol)); dst_upe->hard = src_upe->hard; break; } case XFRM_MSG_ALLOCSPI: { const struct xfrm_userspi_info *src_usi = nlmsg_data(nlh_src); struct compat_xfrm_userspi_info *dst_usi = nlmsg_data(nlh_dst); /* compat_xfrm_user_polexpire has 4-byte smaller state */ memcpy(dst_usi, src_usi, sizeof(src_usi->info)); dst_usi->min = src_usi->min; dst_usi->max = src_usi->max; break; } /* Not being sent by kernel */ case XFRM_MSG_GETSA: case XFRM_MSG_GETPOLICY: case XFRM_MSG_GETAE: case XFRM_MSG_GETSADINFO: case XFRM_MSG_GETSPDINFO: default: pr_warn_once("unsupported nlmsg_type %d\n", nlh_src->nlmsg_type); return ERR_PTR(-EOPNOTSUPP); } return nlh_dst; } static int xfrm_nla_cpy(struct sk_buff *dst, const struct nlattr *src, int len) { return nla_put(dst, src->nla_type, len, nla_data(src)); } static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src) { switch (src->nla_type) { case XFRMA_PAD: /* Ignore */ return 0; case XFRMA_UNSPEC: case XFRMA_ALG_AUTH: case XFRMA_ALG_CRYPT: case XFRMA_ALG_COMP: case XFRMA_ENCAP: case XFRMA_TMPL: return xfrm_nla_cpy(dst, src, nla_len(src)); case XFRMA_SA: return xfrm_nla_cpy(dst, src, XMSGSIZE(compat_xfrm_usersa_info)); case XFRMA_POLICY: return xfrm_nla_cpy(dst, src, XMSGSIZE(compat_xfrm_userpolicy_info)); case XFRMA_SEC_CTX: return xfrm_nla_cpy(dst, src, nla_len(src)); case XFRMA_LTIME_VAL: return nla_put_64bit(dst, src->nla_type, nla_len(src), nla_data(src), XFRMA_PAD); case XFRMA_REPLAY_VAL: case XFRMA_REPLAY_THRESH: case XFRMA_ETIMER_THRESH: case XFRMA_SRCADDR: case XFRMA_COADDR: return xfrm_nla_cpy(dst, src, nla_len(src)); case XFRMA_LASTUSED: return nla_put_64bit(dst, src->nla_type, nla_len(src), nla_data(src), XFRMA_PAD); case XFRMA_POLICY_TYPE: case XFRMA_MIGRATE: case XFRMA_ALG_AEAD: case XFRMA_KMADDRESS: case XFRMA_ALG_AUTH_TRUNC: case XFRMA_MARK: case XFRMA_TFCPAD: case XFRMA_REPLAY_ESN_VAL: case XFRMA_SA_EXTRA_FLAGS: case XFRMA_PROTO: case XFRMA_ADDRESS_FILTER: case XFRMA_OFFLOAD_DEV: case XFRMA_SET_MARK: case XFRMA_SET_MARK_MASK: case XFRMA_IF_ID: case XFRMA_MTIMER_THRESH: case XFRMA_SA_DIR: case XFRMA_NAT_KEEPALIVE_INTERVAL: return xfrm_nla_cpy(dst, src, nla_len(src)); default: BUILD_BUG_ON(XFRMA_MAX != XFRMA_NAT_KEEPALIVE_INTERVAL); pr_warn_once("unsupported nla_type %d\n", src->nla_type); return -EOPNOTSUPP; } } /* Take kernel-built (64bit layout) and create 32bit layout for userspace */ static int xfrm_xlate64(struct sk_buff *dst, const struct nlmsghdr *nlh_src) { u16 type = nlh_src->nlmsg_type - XFRM_MSG_BASE; const struct nlattr *nla, *attrs; struct nlmsghdr *nlh_dst; int len, remaining; nlh_dst = xfrm_nlmsg_put_compat(dst, nlh_src, type); if (IS_ERR(nlh_dst)) return PTR_ERR(nlh_dst); attrs = nlmsg_attrdata(nlh_src, xfrm_msg_min[type]); len = nlmsg_attrlen(nlh_src, xfrm_msg_min[type]); nla_for_each_attr(nla, attrs, len, remaining) { int err; switch (nlh_src->nlmsg_type) { case XFRM_MSG_NEWSPDINFO: err = xfrm_nla_cpy(dst, nla, nla_len(nla)); break; default: err = xfrm_xlate64_attr(dst, nla); break; } if (err) return err; } nlmsg_end(dst, nlh_dst); return 0; } static int xfrm_alloc_compat(struct sk_buff *skb, const struct nlmsghdr *nlh_src) { u16 type = nlh_src->nlmsg_type - XFRM_MSG_BASE; struct sk_buff *new = NULL; int err; if (type >= ARRAY_SIZE(xfrm_msg_min)) { pr_warn_once("unsupported nlmsg_type %d\n", nlh_src->nlmsg_type); return -EOPNOTSUPP; } if (skb_shinfo(skb)->frag_list == NULL) { new = alloc_skb(skb->len + skb_tailroom(skb), GFP_ATOMIC); if (!new) return -ENOMEM; skb_shinfo(skb)->frag_list = new; } err = xfrm_xlate64(skb_shinfo(skb)->frag_list, nlh_src); if (err) { if (new) { kfree_skb(new); skb_shinfo(skb)->frag_list = NULL; } return err; } return 0; } /* Calculates len of translated 64-bit message. */ static size_t xfrm_user_rcv_calculate_len64(const struct nlmsghdr *src, struct nlattr *attrs[XFRMA_MAX + 1], int maxtype) { size_t len = nlmsg_len(src); switch (src->nlmsg_type) { case XFRM_MSG_NEWSA: case XFRM_MSG_NEWPOLICY: case XFRM_MSG_ALLOCSPI: case XFRM_MSG_ACQUIRE: case XFRM_MSG_UPDPOLICY: case XFRM_MSG_UPDSA: len += 4; break; case XFRM_MSG_EXPIRE: case XFRM_MSG_POLEXPIRE: len += 8; break; case XFRM_MSG_NEWSPDINFO: /* attirbutes are xfrm_spdattr_type_t, not xfrm_attr_type_t */ return len; default: break; } /* Unexpected for anything, but XFRM_MSG_NEWSPDINFO, please * correct both 64=>32-bit and 32=>64-bit translators to copy * new attributes. */ if (WARN_ON_ONCE(maxtype)) return len; if (attrs[XFRMA_SA]) len += 4; if (attrs[XFRMA_POLICY]) len += 4; /* XXX: some attrs may need to be realigned * if !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ return len; } static int xfrm_attr_cpy32(void *dst, size_t *pos, const struct nlattr *src, size_t size, int copy_len, int payload) { struct nlmsghdr *nlmsg = dst; struct nlattr *nla; /* xfrm_user_rcv_msg_compat() relies on fact that 32-bit messages * have the same len or shorted than 64-bit ones. * 32-bit translation that is bigger than 64-bit original is unexpected. */ if (WARN_ON_ONCE(copy_len > payload)) copy_len = payload; if (size - *pos < nla_attr_size(payload)) return -ENOBUFS; nla = dst + *pos; memcpy(nla, src, nla_attr_size(copy_len)); nla->nla_len = nla_attr_size(payload); *pos += nla_attr_size(copy_len); nlmsg->nlmsg_len += nla->nla_len; memset(dst + *pos, 0, payload - copy_len); *pos += payload - copy_len; return 0; } static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla, size_t *pos, size_t size, struct netlink_ext_ack *extack) { int type = nla_type(nla); u16 pol_len32, pol_len64; int err; if (type > XFRMA_MAX) { BUILD_BUG_ON(XFRMA_MAX != XFRMA_NAT_KEEPALIVE_INTERVAL); NL_SET_ERR_MSG(extack, "Bad attribute"); return -EOPNOTSUPP; } type = array_index_nospec(type, XFRMA_MAX + 1); if (nla_len(nla) < compat_policy[type].len) { NL_SET_ERR_MSG(extack, "Attribute bad length"); return -EOPNOTSUPP; } pol_len32 = compat_policy[type].len; pol_len64 = xfrma_policy[type].len; /* XFRMA_SA and XFRMA_POLICY - need to know how-to translate */ if (pol_len32 != pol_len64) { if (nla_len(nla) != compat_policy[type].len) { NL_SET_ERR_MSG(extack, "Attribute bad length"); return -EOPNOTSUPP; } err = xfrm_attr_cpy32(dst, pos, nla, size, pol_len32, pol_len64); if (err) return err; } return xfrm_attr_cpy32(dst, pos, nla, size, nla_len(nla), nla_len(nla)); } static int xfrm_xlate32(struct nlmsghdr *dst, const struct nlmsghdr *src, struct nlattr *attrs[XFRMA_MAX+1], size_t size, u8 type, int maxtype, struct netlink_ext_ack *extack) { size_t pos; int i; memcpy(dst, src, NLMSG_HDRLEN); dst->nlmsg_len = NLMSG_HDRLEN + xfrm_msg_min[type]; memset(nlmsg_data(dst), 0, xfrm_msg_min[type]); switch (src->nlmsg_type) { /* Compat message has the same layout as native */ case XFRM_MSG_DELSA: case XFRM_MSG_GETSA: case XFRM_MSG_DELPOLICY: case XFRM_MSG_GETPOLICY: case XFRM_MSG_FLUSHSA: case XFRM_MSG_FLUSHPOLICY: case XFRM_MSG_NEWAE: case XFRM_MSG_GETAE: case XFRM_MSG_REPORT: case XFRM_MSG_MIGRATE: case XFRM_MSG_NEWSADINFO: case XFRM_MSG_GETSADINFO: case XFRM_MSG_NEWSPDINFO: case XFRM_MSG_GETSPDINFO: case XFRM_MSG_MAPPING: memcpy(nlmsg_data(dst), nlmsg_data(src), compat_msg_min[type]); break; /* 4 byte alignment for trailing u64 on native, but not on compat */ case XFRM_MSG_NEWSA: case XFRM_MSG_NEWPOLICY: case XFRM_MSG_UPDSA: case XFRM_MSG_UPDPOLICY: memcpy(nlmsg_data(dst), nlmsg_data(src), compat_msg_min[type]); break; case XFRM_MSG_EXPIRE: { const struct compat_xfrm_user_expire *src_ue = nlmsg_data(src); struct xfrm_user_expire *dst_ue = nlmsg_data(dst); /* compat_xfrm_user_expire has 4-byte smaller state */ memcpy(dst_ue, src_ue, sizeof(src_ue->state)); dst_ue->hard = src_ue->hard; break; } case XFRM_MSG_ACQUIRE: { const struct compat_xfrm_user_acquire *src_ua = nlmsg_data(src); struct xfrm_user_acquire *dst_ua = nlmsg_data(dst); memcpy(dst_ua, src_ua, offsetof(struct compat_xfrm_user_acquire, aalgos)); dst_ua->aalgos = src_ua->aalgos; dst_ua->ealgos = src_ua->ealgos; dst_ua->calgos = src_ua->calgos; dst_ua->seq = src_ua->seq; break; } case XFRM_MSG_POLEXPIRE: { const struct compat_xfrm_user_polexpire *src_upe = nlmsg_data(src); struct xfrm_user_polexpire *dst_upe = nlmsg_data(dst); /* compat_xfrm_user_polexpire has 4-byte smaller state */ memcpy(dst_upe, src_upe, sizeof(src_upe->pol)); dst_upe->hard = src_upe->hard; break; } case XFRM_MSG_ALLOCSPI: { const struct compat_xfrm_userspi_info *src_usi = nlmsg_data(src); struct xfrm_userspi_info *dst_usi = nlmsg_data(dst); /* compat_xfrm_user_polexpire has 4-byte smaller state */ memcpy(dst_usi, src_usi, sizeof(src_usi->info)); dst_usi->min = src_usi->min; dst_usi->max = src_usi->max; break; } default: NL_SET_ERR_MSG(extack, "Unsupported message type"); return -EOPNOTSUPP; } pos = dst->nlmsg_len; if (maxtype) { /* attirbutes are xfrm_spdattr_type_t, not xfrm_attr_type_t */ WARN_ON_ONCE(src->nlmsg_type != XFRM_MSG_NEWSPDINFO); for (i = 1; i <= maxtype; i++) { int err; if (!attrs[i]) continue; /* just copy - no need for translation */ err = xfrm_attr_cpy32(dst, &pos, attrs[i], size, nla_len(attrs[i]), nla_len(attrs[i])); if (err) return err; } return 0; } for (i = 1; i < XFRMA_MAX + 1; i++) { int err; if (i == XFRMA_PAD) continue; if (!attrs[i]) continue; err = xfrm_xlate32_attr(dst, attrs[i], &pos, size, extack); if (err) return err; } return 0; } static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32, int maxtype, const struct nla_policy *policy, struct netlink_ext_ack *extack) { /* netlink_rcv_skb() checks if a message has full (struct nlmsghdr) */ u16 type = h32->nlmsg_type - XFRM_MSG_BASE; struct nlattr *attrs[XFRMA_MAX+1]; struct nlmsghdr *h64; size_t len; int err; BUILD_BUG_ON(ARRAY_SIZE(xfrm_msg_min) != ARRAY_SIZE(compat_msg_min)); if (type >= ARRAY_SIZE(xfrm_msg_min)) return ERR_PTR(-EINVAL); /* Don't call parse: the message might have only nlmsg header */ if ((h32->nlmsg_type == XFRM_MSG_GETSA || h32->nlmsg_type == XFRM_MSG_GETPOLICY) && (h32->nlmsg_flags & NLM_F_DUMP)) return NULL; err = nlmsg_parse_deprecated(h32, compat_msg_min[type], attrs, maxtype ? : XFRMA_MAX, policy ? : compat_policy, extack); if (err < 0) return ERR_PTR(err); len = xfrm_user_rcv_calculate_len64(h32, attrs, maxtype); /* The message doesn't need translation */ if (len == nlmsg_len(h32)) return NULL; len += NLMSG_HDRLEN; h64 = kvmalloc(len, GFP_KERNEL); if (!h64) return ERR_PTR(-ENOMEM); err = xfrm_xlate32(h64, h32, attrs, len, type, maxtype, extack); if (err < 0) { kvfree(h64); return ERR_PTR(err); } return h64; } static int xfrm_user_policy_compat(u8 **pdata32, int optlen) { struct compat_xfrm_userpolicy_info *p = (void *)*pdata32; u8 *src_templates, *dst_templates; u8 *data64; if (optlen < sizeof(*p)) return -EINVAL; data64 = kmalloc_track_caller(optlen + 4, GFP_USER | __GFP_NOWARN); if (!data64) return -ENOMEM; memcpy(data64, *pdata32, sizeof(*p)); memset(data64 + sizeof(*p), 0, 4); src_templates = *pdata32 + sizeof(*p); dst_templates = data64 + sizeof(*p) + 4; memcpy(dst_templates, src_templates, optlen - sizeof(*p)); kfree(*pdata32); *pdata32 = data64; return 0; } static struct xfrm_translator xfrm_translator = { .owner = THIS_MODULE, .alloc_compat = xfrm_alloc_compat, .rcv_msg_compat = xfrm_user_rcv_msg_compat, .xlate_user_policy_sockptr = xfrm_user_policy_compat, }; static int __init xfrm_compat_init(void) { return xfrm_register_translator(&xfrm_translator); } static void __exit xfrm_compat_exit(void) { xfrm_unregister_translator(&xfrm_translator); } module_init(xfrm_compat_init); module_exit(xfrm_compat_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Dmitry Safonov"); MODULE_DESCRIPTION("XFRM 32-bit compatibility layer"); |
| 4 6 3 2 2 2 2 2 2 1 3 94 94 94 94 4 4 4 4 4 4 4 4 4 4 4 1 4 93 71 70 158 9 157 70 153 153 64 64 64 18 7 18 18 7 13 18 18 13 18 18 18 1 18 10 34 34 34 34 34 33 33 3595 3582 109 30 109 106 94 94 94 87 86 86 84 92 3 3 3 91 88 1 1 1 92 88 88 88 3508 3445 5 3450 36 36 34 34 1 34 21 1 34 34 1 34 34 34 35 34 12 12 12 12 12 11 2 12 2 12 3 129 128 12 12 12 12 2 12 6 12 12 12 12 12 1 12 12 12 12 12 2 12 12 12 10 77 55 28 23 28 24 24 24 15 15 15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 | // SPDX-License-Identifier: GPL-2.0-only /* * mm/truncate.c - code for taking down pages from address_spaces * * Copyright (C) 2002, Linus Torvalds * * 10Sep2002 Andrew Morton * Initial version. */ #include <linux/kernel.h> #include <linux/backing-dev.h> #include <linux/dax.h> #include <linux/gfp.h> #include <linux/mm.h> #include <linux/swap.h> #include <linux/export.h> #include <linux/pagemap.h> #include <linux/highmem.h> #include <linux/pagevec.h> #include <linux/task_io_accounting_ops.h> #include <linux/shmem_fs.h> #include <linux/rmap.h> #include "internal.h" /* * Regular page slots are stabilized by the page lock even without the tree * itself locked. These unlocked entries need verification under the tree * lock. */ static inline void __clear_shadow_entry(struct address_space *mapping, pgoff_t index, void *entry) { XA_STATE(xas, &mapping->i_pages, index); xas_set_update(&xas, workingset_update_node); if (xas_load(&xas) != entry) return; xas_store(&xas, NULL); } static void clear_shadow_entries(struct address_space *mapping, struct folio_batch *fbatch, pgoff_t *indices) { int i; /* Handled by shmem itself, or for DAX we do nothing. */ if (shmem_mapping(mapping) || dax_mapping(mapping)) return; spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); for (i = 0; i < folio_batch_count(fbatch); i++) { struct folio *folio = fbatch->folios[i]; if (xa_is_value(folio)) __clear_shadow_entry(mapping, indices[i], folio); } xa_unlock_irq(&mapping->i_pages); if (mapping_shrinkable(mapping)) inode_add_lru(mapping->host); spin_unlock(&mapping->host->i_lock); } /* * Unconditionally remove exceptional entries. Usually called from truncate * path. Note that the folio_batch may be altered by this function by removing * exceptional entries similar to what folio_batch_remove_exceptionals() does. */ static void truncate_folio_batch_exceptionals(struct address_space *mapping, struct folio_batch *fbatch, pgoff_t *indices) { int i, j; bool dax; /* Handled by shmem itself */ if (shmem_mapping(mapping)) return; for (j = 0; j < folio_batch_count(fbatch); j++) if (xa_is_value(fbatch->folios[j])) break; if (j == folio_batch_count(fbatch)) return; dax = dax_mapping(mapping); if (!dax) { spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); } for (i = j; i < folio_batch_count(fbatch); i++) { struct folio *folio = fbatch->folios[i]; pgoff_t index = indices[i]; if (!xa_is_value(folio)) { fbatch->folios[j++] = folio; continue; } if (unlikely(dax)) { dax_delete_mapping_entry(mapping, index); continue; } __clear_shadow_entry(mapping, index, folio); } if (!dax) { xa_unlock_irq(&mapping->i_pages); if (mapping_shrinkable(mapping)) inode_add_lru(mapping->host); spin_unlock(&mapping->host->i_lock); } fbatch->nr = j; } /** * folio_invalidate - Invalidate part or all of a folio. * @folio: The folio which is affected. * @offset: start of the range to invalidate * @length: length of the range to invalidate * * folio_invalidate() is called when all or part of the folio has become * invalidated by a truncate operation. * * folio_invalidate() does not have to release all buffers, but it must * ensure that no dirty buffer is left outside @offset and that no I/O * is underway against any of the blocks which are outside the truncation * point. Because the caller is about to free (and possibly reuse) those * blocks on-disk. */ void folio_invalidate(struct folio *folio, size_t offset, size_t length) { const struct address_space_operations *aops = folio->mapping->a_ops; if (aops->invalidate_folio) aops->invalidate_folio(folio, offset, length); } EXPORT_SYMBOL_GPL(folio_invalidate); /* * If truncate cannot remove the fs-private metadata from the page, the page * becomes orphaned. It will be left on the LRU and may even be mapped into * user pagetables if we're racing with filemap_fault(). * * We need to bail out if page->mapping is no longer equal to the original * mapping. This happens a) when the VM reclaimed the page while we waited on * its lock, b) when a concurrent invalidate_mapping_pages got there first and * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. */ static void truncate_cleanup_folio(struct folio *folio) { if (folio_mapped(folio)) unmap_mapping_folio(folio); if (folio_has_private(folio)) folio_invalidate(folio, 0, folio_size(folio)); /* * Some filesystems seem to re-dirty the page even after * the VM has canceled the dirty bit (eg ext3 journaling). * Hence dirty accounting check is placed after invalidation. */ folio_cancel_dirty(folio); folio_clear_mappedtodisk(folio); } int truncate_inode_folio(struct address_space *mapping, struct folio *folio) { if (folio->mapping != mapping) return -EIO; truncate_cleanup_folio(folio); filemap_remove_folio(folio); return 0; } /* * Handle partial folios. The folio may be entirely within the * range if a split has raced with us. If not, we zero the part of the * folio that's within the [start, end] range, and then split the folio if * it's large. split_page_range() will discard pages which now lie beyond * i_size, and we rely on the caller to discard pages which lie within a * newly created hole. * * Returns false if splitting failed so the caller can avoid * discarding the entire folio which is stubbornly unsplit. */ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) { loff_t pos = folio_pos(folio); unsigned int offset, length; if (pos < start) offset = start - pos; else offset = 0; length = folio_size(folio); if (pos + length <= (u64)end) length = length - offset; else length = end + 1 - pos - offset; folio_wait_writeback(folio); if (length == folio_size(folio)) { truncate_inode_folio(folio->mapping, folio); return true; } /* * We may be zeroing pages we're about to discard, but it avoids * doing a complex calculation here, and then doing the zeroing * anyway if the page split fails. */ if (!mapping_inaccessible(folio->mapping)) folio_zero_range(folio, offset, length); if (folio_has_private(folio)) folio_invalidate(folio, offset, length); if (!folio_test_large(folio)) return true; if (split_folio(folio) == 0) return true; if (folio_test_dirty(folio)) return false; truncate_inode_folio(folio->mapping, folio); return true; } /* * Used to get rid of pages on hardware memory corruption. */ int generic_error_remove_folio(struct address_space *mapping, struct folio *folio) { if (!mapping) return -EINVAL; /* * Only punch for normal data pages for now. * Handling other types like directories would need more auditing. */ if (!S_ISREG(mapping->host->i_mode)) return -EIO; return truncate_inode_folio(mapping, folio); } EXPORT_SYMBOL(generic_error_remove_folio); /** * mapping_evict_folio() - Remove an unused folio from the page-cache. * @mapping: The mapping this folio belongs to. * @folio: The folio to remove. * * Safely remove one folio from the page cache. * It only drops clean, unused folios. * * Context: Folio must be locked. * Return: The number of pages successfully removed. */ long mapping_evict_folio(struct address_space *mapping, struct folio *folio) { /* The page may have been truncated before it was locked */ if (!mapping) return 0; if (folio_test_dirty(folio) || folio_test_writeback(folio)) return 0; /* The refcount will be elevated if any page in the folio is mapped */ if (folio_ref_count(folio) > folio_nr_pages(folio) + folio_has_private(folio) + 1) return 0; if (!filemap_release_folio(folio, 0)) return 0; return remove_mapping(mapping, folio); } /** * truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets * @mapping: mapping to truncate * @lstart: offset from which to truncate * @lend: offset to which to truncate (inclusive) * * Truncate the page cache, removing the pages that are between * specified offsets (and zeroing out partial pages * if lstart or lend + 1 is not page aligned). * * Truncate takes two passes - the first pass is nonblocking. It will not * block on page locks and it will not block on writeback. The second pass * will wait. This is to prevent as much IO as possible in the affected region. * The first pass will remove most pages, so the search cost of the second pass * is low. * * We pass down the cache-hot hint to the page freeing code. Even if the * mapping is large, it is probably the case that the final pages are the most * recently touched, and freeing happens in ascending file offset order. * * Note that since ->invalidate_folio() accepts range to invalidate * truncate_inode_pages_range is able to handle cases where lend + 1 is not * page aligned properly. */ void truncate_inode_pages_range(struct address_space *mapping, loff_t lstart, loff_t lend) { pgoff_t start; /* inclusive */ pgoff_t end; /* exclusive */ struct folio_batch fbatch; pgoff_t indices[PAGEVEC_SIZE]; pgoff_t index; int i; struct folio *folio; bool same_folio; if (mapping_empty(mapping)) return; /* * 'start' and 'end' always covers the range of pages to be fully * truncated. Partial pages are covered with 'partial_start' at the * start of the range and 'partial_end' at the end of the range. * Note that 'end' is exclusive while 'lend' is inclusive. */ start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT; if (lend == -1) /* * lend == -1 indicates end-of-file so we have to set 'end' * to the highest possible pgoff_t and since the type is * unsigned we're using -1. */ end = -1; else end = (lend + 1) >> PAGE_SHIFT; folio_batch_init(&fbatch); index = start; while (index < end && find_lock_entries(mapping, &index, end - 1, &fbatch, indices)) { truncate_folio_batch_exceptionals(mapping, &fbatch, indices); for (i = 0; i < folio_batch_count(&fbatch); i++) truncate_cleanup_folio(fbatch.folios[i]); delete_from_page_cache_batch(mapping, &fbatch); for (i = 0; i < folio_batch_count(&fbatch); i++) folio_unlock(fbatch.folios[i]); folio_batch_release(&fbatch); cond_resched(); } same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT); folio = __filemap_get_folio(mapping, lstart >> PAGE_SHIFT, FGP_LOCK, 0); if (!IS_ERR(folio)) { same_folio = lend < folio_pos(folio) + folio_size(folio); if (!truncate_inode_partial_folio(folio, lstart, lend)) { start = folio_next_index(folio); if (same_folio) end = folio->index; } folio_unlock(folio); folio_put(folio); folio = NULL; } if (!same_folio) { folio = __filemap_get_folio(mapping, lend >> PAGE_SHIFT, FGP_LOCK, 0); if (!IS_ERR(folio)) { if (!truncate_inode_partial_folio(folio, lstart, lend)) end = folio->index; folio_unlock(folio); folio_put(folio); } } index = start; while (index < end) { cond_resched(); if (!find_get_entries(mapping, &index, end - 1, &fbatch, indices)) { /* If all gone from start onwards, we're done */ if (index == start) break; /* Otherwise restart to make sure all gone */ index = start; continue; } for (i = 0; i < folio_batch_count(&fbatch); i++) { struct folio *folio = fbatch.folios[i]; /* We rely upon deletion not changing page->index */ if (xa_is_value(folio)) continue; folio_lock(folio); VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio); folio_wait_writeback(folio); truncate_inode_folio(mapping, folio); folio_unlock(folio); } truncate_folio_batch_exceptionals(mapping, &fbatch, indices); folio_batch_release(&fbatch); } } EXPORT_SYMBOL(truncate_inode_pages_range); /** * truncate_inode_pages - truncate *all* the pages from an offset * @mapping: mapping to truncate * @lstart: offset from which to truncate * * Called under (and serialised by) inode->i_rwsem and * mapping->invalidate_lock. * * Note: When this function returns, there can be a page in the process of * deletion (inside __filemap_remove_folio()) in the specified range. Thus * mapping->nrpages can be non-zero when this function returns even after * truncation of the whole mapping. */ void truncate_inode_pages(struct address_space *mapping, loff_t lstart) { truncate_inode_pages_range(mapping, lstart, (loff_t)-1); } EXPORT_SYMBOL(truncate_inode_pages); /** * truncate_inode_pages_final - truncate *all* pages before inode dies * @mapping: mapping to truncate * * Called under (and serialized by) inode->i_rwsem. * * Filesystems have to use this in the .evict_inode path to inform the * VM that this is the final truncate and the inode is going away. */ void truncate_inode_pages_final(struct address_space *mapping) { /* * Page reclaim can not participate in regular inode lifetime * management (can't call iput()) and thus can race with the * inode teardown. Tell it when the address space is exiting, * so that it does not install eviction information after the * final truncate has begun. */ mapping_set_exiting(mapping); if (!mapping_empty(mapping)) { /* * As truncation uses a lockless tree lookup, cycle * the tree lock to make sure any ongoing tree * modification that does not see AS_EXITING is * completed before starting the final truncate. */ xa_lock_irq(&mapping->i_pages); xa_unlock_irq(&mapping->i_pages); } truncate_inode_pages(mapping, 0); } EXPORT_SYMBOL(truncate_inode_pages_final); /** * mapping_try_invalidate - Invalidate all the evictable folios of one inode * @mapping: the address_space which holds the folios to invalidate * @start: the offset 'from' which to invalidate * @end: the offset 'to' which to invalidate (inclusive) * @nr_failed: How many folio invalidations failed * * This function is similar to invalidate_mapping_pages(), except that it * returns the number of folios which could not be evicted in @nr_failed. */ unsigned long mapping_try_invalidate(struct address_space *mapping, pgoff_t start, pgoff_t end, unsigned long *nr_failed) { pgoff_t indices[PAGEVEC_SIZE]; struct folio_batch fbatch; pgoff_t index = start; unsigned long ret; unsigned long count = 0; int i; bool xa_has_values = false; folio_batch_init(&fbatch); while (find_lock_entries(mapping, &index, end, &fbatch, indices)) { for (i = 0; i < folio_batch_count(&fbatch); i++) { struct folio *folio = fbatch.folios[i]; /* We rely upon deletion not changing folio->index */ if (xa_is_value(folio)) { xa_has_values = true; count++; continue; } ret = mapping_evict_folio(mapping, folio); folio_unlock(folio); /* * Invalidation is a hint that the folio is no longer * of interest and try to speed up its reclaim. */ if (!ret) { deactivate_file_folio(folio); /* Likely in the lru cache of a remote CPU */ if (nr_failed) (*nr_failed)++; } count += ret; } if (xa_has_values) clear_shadow_entries(mapping, &fbatch, indices); folio_batch_remove_exceptionals(&fbatch); folio_batch_release(&fbatch); cond_resched(); } return count; } /** * invalidate_mapping_pages - Invalidate all clean, unlocked cache of one inode * @mapping: the address_space which holds the cache to invalidate * @start: the offset 'from' which to invalidate * @end: the offset 'to' which to invalidate (inclusive) * * This function removes pages that are clean, unmapped and unlocked, * as well as shadow entries. It will not block on IO activity. * * If you want to remove all the pages of one inode, regardless of * their use and writeback state, use truncate_inode_pages(). * * Return: The number of indices that had their contents invalidated */ unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end) { return mapping_try_invalidate(mapping, start, end, NULL); } EXPORT_SYMBOL(invalidate_mapping_pages); /* * This is like mapping_evict_folio(), except it ignores the folio's * refcount. We do this because invalidate_inode_pages2() needs stronger * invalidation guarantees, and cannot afford to leave folios behind because * shrink_folio_list() has a temp ref on them, or because they're transiently * sitting in the folio_add_lru() caches. */ static int invalidate_complete_folio2(struct address_space *mapping, struct folio *folio) { if (folio->mapping != mapping) return 0; if (!filemap_release_folio(folio, GFP_KERNEL)) return 0; spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); if (folio_test_dirty(folio)) goto failed; BUG_ON(folio_has_private(folio)); __filemap_remove_folio(folio, NULL); xa_unlock_irq(&mapping->i_pages); if (mapping_shrinkable(mapping)) inode_add_lru(mapping->host); spin_unlock(&mapping->host->i_lock); filemap_free_folio(mapping, folio); return 1; failed: xa_unlock_irq(&mapping->i_pages); spin_unlock(&mapping->host->i_lock); return 0; } static int folio_launder(struct address_space *mapping, struct folio *folio) { if (!folio_test_dirty(folio)) return 0; if (folio->mapping != mapping || mapping->a_ops->launder_folio == NULL) return 0; return mapping->a_ops->launder_folio(folio); } /** * invalidate_inode_pages2_range - remove range of pages from an address_space * @mapping: the address_space * @start: the page offset 'from' which to invalidate * @end: the page offset 'to' which to invalidate (inclusive) * * Any pages which are found to be mapped into pagetables are unmapped prior to * invalidation. * * Return: -EBUSY if any pages could not be invalidated. */ int invalidate_inode_pages2_range(struct address_space *mapping, pgoff_t start, pgoff_t end) { pgoff_t indices[PAGEVEC_SIZE]; struct folio_batch fbatch; pgoff_t index; int i; int ret = 0; int ret2 = 0; int did_range_unmap = 0; bool xa_has_values = false; if (mapping_empty(mapping)) return 0; folio_batch_init(&fbatch); index = start; while (find_get_entries(mapping, &index, end, &fbatch, indices)) { for (i = 0; i < folio_batch_count(&fbatch); i++) { struct folio *folio = fbatch.folios[i]; /* We rely upon deletion not changing folio->index */ if (xa_is_value(folio)) { xa_has_values = true; if (dax_mapping(mapping) && !dax_invalidate_mapping_entry_sync(mapping, indices[i])) ret = -EBUSY; continue; } if (!did_range_unmap && folio_mapped(folio)) { /* * If folio is mapped, before taking its lock, * zap the rest of the file in one hit. */ unmap_mapping_pages(mapping, indices[i], (1 + end - indices[i]), false); did_range_unmap = 1; } folio_lock(folio); if (unlikely(folio->mapping != mapping)) { folio_unlock(folio); continue; } VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio); folio_wait_writeback(folio); if (folio_mapped(folio)) unmap_mapping_folio(folio); BUG_ON(folio_mapped(folio)); ret2 = folio_launder(mapping, folio); if (ret2 == 0) { if (!invalidate_complete_folio2(mapping, folio)) ret2 = -EBUSY; } if (ret2 < 0) ret = ret2; folio_unlock(folio); } if (xa_has_values) clear_shadow_entries(mapping, &fbatch, indices); folio_batch_remove_exceptionals(&fbatch); folio_batch_release(&fbatch); cond_resched(); } /* * For DAX we invalidate page tables after invalidating page cache. We * could invalidate page tables while invalidating each entry however * that would be expensive. And doing range unmapping before doesn't * work as we have no cheap way to find whether page cache entry didn't * get remapped later. */ if (dax_mapping(mapping)) { unmap_mapping_pages(mapping, start, end - start + 1, false); } return ret; } EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); /** * invalidate_inode_pages2 - remove all pages from an address_space * @mapping: the address_space * * Any pages which are found to be mapped into pagetables are unmapped prior to * invalidation. * * Return: -EBUSY if any pages could not be invalidated. */ int invalidate_inode_pages2(struct address_space *mapping) { return invalidate_inode_pages2_range(mapping, 0, -1); } EXPORT_SYMBOL_GPL(invalidate_inode_pages2); /** * truncate_pagecache - unmap and remove pagecache that has been truncated * @inode: inode * @newsize: new file size * * inode's new i_size must already be written before truncate_pagecache * is called. * * This function should typically be called before the filesystem * releases resources associated with the freed range (eg. deallocates * blocks). This way, pagecache will always stay logically coherent * with on-disk format, and the filesystem would not have to deal with * situations such as writepage being called for a page that has already * had its underlying blocks deallocated. */ void truncate_pagecache(struct inode *inode, loff_t newsize) { struct address_space *mapping = inode->i_mapping; loff_t holebegin = round_up(newsize, PAGE_SIZE); /* * unmap_mapping_range is called twice, first simply for * efficiency so that truncate_inode_pages does fewer * single-page unmaps. However after this first call, and * before truncate_inode_pages finishes, it is possible for * private pages to be COWed, which remain after * truncate_inode_pages finishes, hence the second * unmap_mapping_range call must be made for correctness. */ unmap_mapping_range(mapping, holebegin, 0, 1); truncate_inode_pages(mapping, newsize); unmap_mapping_range(mapping, holebegin, 0, 1); } EXPORT_SYMBOL(truncate_pagecache); /** * truncate_setsize - update inode and pagecache for a new file size * @inode: inode * @newsize: new file size * * truncate_setsize updates i_size and performs pagecache truncation (if * necessary) to @newsize. It will be typically be called from the filesystem's * setattr function when ATTR_SIZE is passed in. * * Must be called with a lock serializing truncates and writes (generally * i_rwsem but e.g. xfs uses a different lock) and before all filesystem * specific block truncation has been performed. */ void truncate_setsize(struct inode *inode, loff_t newsize) { loff_t oldsize = inode->i_size; i_size_write(inode, newsize); if (newsize > oldsize) pagecache_isize_extended(inode, oldsize, newsize); truncate_pagecache(inode, newsize); } EXPORT_SYMBOL(truncate_setsize); /** * pagecache_isize_extended - update pagecache after extension of i_size * @inode: inode for which i_size was extended * @from: original inode size * @to: new inode size * * Handle extension of inode size either caused by extending truncate or * by write starting after current i_size. We mark the page straddling * current i_size RO so that page_mkwrite() is called on the first * write access to the page. The filesystem will update its per-block * information before user writes to the page via mmap after the i_size * has been changed. * * The function must be called after i_size is updated so that page fault * coming after we unlock the folio will already see the new i_size. * The function must be called while we still hold i_rwsem - this not only * makes sure i_size is stable but also that userspace cannot observe new * i_size value before we are prepared to store mmap writes at new inode size. */ void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to) { int bsize = i_blocksize(inode); loff_t rounded_from; struct folio *folio; WARN_ON(to > inode->i_size); if (from >= to || bsize >= PAGE_SIZE) return; /* Page straddling @from will not have any hole block created? */ rounded_from = round_up(from, bsize); if (to <= rounded_from || !(rounded_from & (PAGE_SIZE - 1))) return; folio = filemap_lock_folio(inode->i_mapping, from / PAGE_SIZE); /* Folio not cached? Nothing to do */ if (IS_ERR(folio)) return; /* * See folio_clear_dirty_for_io() for details why folio_mark_dirty() * is needed. */ if (folio_mkclean(folio)) folio_mark_dirty(folio); folio_unlock(folio); folio_put(folio); } EXPORT_SYMBOL(pagecache_isize_extended); /** * truncate_pagecache_range - unmap and remove pagecache that is hole-punched * @inode: inode * @lstart: offset of beginning of hole * @lend: offset of last byte of hole * * This function should typically be called before the filesystem * releases resources associated with the freed range (eg. deallocates * blocks). This way, pagecache will always stay logically coherent * with on-disk format, and the filesystem would not have to deal with * situations such as writepage being called for a page that has already * had its underlying blocks deallocated. */ void truncate_pagecache_range(struct inode *inode, loff_t lstart, loff_t lend) { struct address_space *mapping = inode->i_mapping; loff_t unmap_start = round_up(lstart, PAGE_SIZE); loff_t unmap_end = round_down(1 + lend, PAGE_SIZE) - 1; /* * This rounding is currently just for example: unmap_mapping_range * expands its hole outwards, whereas we want it to contract the hole * inwards. However, existing callers of truncate_pagecache_range are * doing their own page rounding first. Note that unmap_mapping_range * allows holelen 0 for all, and we allow lend -1 for end of file. */ /* * Unlike in truncate_pagecache, unmap_mapping_range is called only * once (before truncating pagecache), and without "even_cows" flag: * hole-punching should not remove private COWed pages from the hole. */ if ((u64)unmap_end > (u64)unmap_start) unmap_mapping_range(mapping, unmap_start, 1 + unmap_end - unmap_start, 0); truncate_inode_pages_range(mapping, lstart, lend); } EXPORT_SYMBOL(truncate_pagecache_range); |
| 4 27 27 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Task I/O accounting operations */ #ifndef __TASK_IO_ACCOUNTING_OPS_INCLUDED #define __TASK_IO_ACCOUNTING_OPS_INCLUDED #include <linux/sched.h> #ifdef CONFIG_TASK_IO_ACCOUNTING static inline void task_io_account_read(size_t bytes) { current->ioac.read_bytes += bytes; } /* * We approximate number of blocks, because we account bytes only. * A 'block' is 512 bytes */ static inline unsigned long task_io_get_inblock(const struct task_struct *p) { return p->ioac.read_bytes >> 9; } static inline void task_io_account_write(size_t bytes) { current->ioac.write_bytes += bytes; } /* * We approximate number of blocks, because we account bytes only. * A 'block' is 512 bytes */ static inline unsigned long task_io_get_oublock(const struct task_struct *p) { return p->ioac.write_bytes >> 9; } static inline void task_io_account_cancelled_write(size_t bytes) { current->ioac.cancelled_write_bytes += bytes; } static inline void task_io_accounting_init(struct task_io_accounting *ioac) { memset(ioac, 0, sizeof(*ioac)); } static inline void task_blk_io_accounting_add(struct task_io_accounting *dst, struct task_io_accounting *src) { dst->read_bytes += src->read_bytes; dst->write_bytes += src->write_bytes; dst->cancelled_write_bytes += src->cancelled_write_bytes; } #else static inline void task_io_account_read(size_t bytes) { } static inline unsigned long task_io_get_inblock(const struct task_struct *p) { return 0; } static inline void task_io_account_write(size_t bytes) { } static inline unsigned long task_io_get_oublock(const struct task_struct *p) { return 0; } static inline void task_io_account_cancelled_write(size_t bytes) { } static inline void task_io_accounting_init(struct task_io_accounting *ioac) { } static inline void task_blk_io_accounting_add(struct task_io_accounting *dst, struct task_io_accounting *src) { } #endif /* CONFIG_TASK_IO_ACCOUNTING */ #ifdef CONFIG_TASK_XACCT static inline void task_chr_io_accounting_add(struct task_io_accounting *dst, struct task_io_accounting *src) { dst->rchar += src->rchar; dst->wchar += src->wchar; dst->syscr += src->syscr; dst->syscw += src->syscw; } #else static inline void task_chr_io_accounting_add(struct task_io_accounting *dst, struct task_io_accounting *src) { } #endif /* CONFIG_TASK_XACCT */ static inline void task_io_accounting_add(struct task_io_accounting *dst, struct task_io_accounting *src) { task_chr_io_accounting_add(dst, src); task_blk_io_accounting_add(dst, src); } #endif /* __TASK_IO_ACCOUNTING_OPS_INCLUDED */ |
| 362 2 357 338 343 440 206 440 308 424 73 2 227 227 62 62 376 237 409 336 409 407 409 407 407 237 407 407 336 337 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __KVM_X86_VMX_INSN_H #define __KVM_X86_VMX_INSN_H #include <linux/nospec.h> #include <asm/vmx.h> #include "vmx_onhyperv.h" #include "vmcs.h" #include "../x86.h" void vmread_error(unsigned long field); void vmwrite_error(unsigned long field, unsigned long value); void vmclear_error(struct vmcs *vmcs, u64 phys_addr); void vmptrld_error(struct vmcs *vmcs, u64 phys_addr); void invvpid_error(unsigned long ext, u16 vpid, gva_t gva); void invept_error(unsigned long ext, u64 eptp, gpa_t gpa); #ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT /* * The VMREAD error trampoline _always_ uses the stack to pass parameters, even * for 64-bit targets. Preserving all registers allows the VMREAD inline asm * blob to avoid clobbering GPRs, which in turn allows the compiler to better * optimize sequences of VMREADs. * * Declare the trampoline as an opaque label as it's not safe to call from C * code; there is no way to tell the compiler to pass params on the stack for * 64-bit targets. * * void vmread_error_trampoline(unsigned long field, bool fault); */ extern unsigned long vmread_error_trampoline; /* * The second VMREAD error trampoline, called from the assembly trampoline, * exists primarily to enable instrumentation for the VM-Fail path. */ void vmread_error_trampoline2(unsigned long field, bool fault); #endif static __always_inline void vmcs_check16(unsigned long field) { BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000, "16-bit accessor invalid for 64-bit field"); BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001, "16-bit accessor invalid for 64-bit high field"); BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000, "16-bit accessor invalid for 32-bit high field"); BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000, "16-bit accessor invalid for natural width field"); } static __always_inline void vmcs_check32(unsigned long field) { BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0, "32-bit accessor invalid for 16-bit field"); BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000, "32-bit accessor invalid for 64-bit field"); BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001, "32-bit accessor invalid for 64-bit high field"); BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000, "32-bit accessor invalid for natural width field"); } static __always_inline void vmcs_check64(unsigned long field) { BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0, "64-bit accessor invalid for 16-bit field"); BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001, "64-bit accessor invalid for 64-bit high field"); BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000, "64-bit accessor invalid for 32-bit field"); BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000, "64-bit accessor invalid for natural width field"); } static __always_inline void vmcs_checkl(unsigned long field) { BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0, "Natural width accessor invalid for 16-bit field"); BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000, "Natural width accessor invalid for 64-bit field"); BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001, "Natural width accessor invalid for 64-bit high field"); BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000, "Natural width accessor invalid for 32-bit field"); } static __always_inline unsigned long __vmcs_readl(unsigned long field) { unsigned long value; #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT asm_goto_output("1: vmread %[field], %[output]\n\t" "jna %l[do_fail]\n\t" _ASM_EXTABLE(1b, %l[do_exception]) : [output] "=r" (value) : [field] "r" (field) : "cc" : do_fail, do_exception); return value; do_fail: instrumentation_begin(); vmread_error(field); instrumentation_end(); return 0; do_exception: kvm_spurious_fault(); return 0; #else /* !CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ asm volatile("1: vmread %2, %1\n\t" ".byte 0x3e\n\t" /* branch taken hint */ "ja 3f\n\t" /* * VMREAD failed. Push '0' for @fault, push the failing * @field, and bounce through the trampoline to preserve * volatile registers. */ "xorl %k1, %k1\n\t" "2:\n\t" "push %1\n\t" "push %2\n\t" "call vmread_error_trampoline\n\t" /* * Unwind the stack. Note, the trampoline zeros out the * memory for @fault so that the result is '0' on error. */ "pop %2\n\t" "pop %1\n\t" "3:\n\t" /* VMREAD faulted. As above, except push '1' for @fault. */ _ASM_EXTABLE_TYPE_REG(1b, 2b, EX_TYPE_ONE_REG, %1) : ASM_CALL_CONSTRAINT, "=&r"(value) : "r"(field) : "cc"); return value; #endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */ } static __always_inline u16 vmcs_read16(unsigned long field) { vmcs_check16(field); if (kvm_is_using_evmcs()) return evmcs_read16(field); return __vmcs_readl(field); } static __always_inline u32 vmcs_read32(unsigned long field) { vmcs_check32(field); if (kvm_is_using_evmcs()) return evmcs_read32(field); return __vmcs_readl(field); } static __always_inline u64 vmcs_read64(unsigned long field) { vmcs_check64(field); if (kvm_is_using_evmcs()) return evmcs_read64(field); #ifdef CONFIG_X86_64 return __vmcs_readl(field); #else return __vmcs_readl(field) | ((u64)__vmcs_readl(field+1) << 32); #endif } static __always_inline unsigned long vmcs_readl(unsigned long field) { vmcs_checkl(field); if (kvm_is_using_evmcs()) return evmcs_read64(field); return __vmcs_readl(field); } #define vmx_asm1(insn, op1, error_args...) \ do { \ asm goto("1: " __stringify(insn) " %0\n\t" \ ".byte 0x2e\n\t" /* branch not taken hint */ \ "jna %l[error]\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ : : op1 : "cc" : error, fault); \ return; \ error: \ instrumentation_begin(); \ insn##_error(error_args); \ instrumentation_end(); \ return; \ fault: \ kvm_spurious_fault(); \ } while (0) #define vmx_asm2(insn, op1, op2, error_args...) \ do { \ asm goto("1: " __stringify(insn) " %1, %0\n\t" \ ".byte 0x2e\n\t" /* branch not taken hint */ \ "jna %l[error]\n\t" \ _ASM_EXTABLE(1b, %l[fault]) \ : : op1, op2 : "cc" : error, fault); \ return; \ error: \ instrumentation_begin(); \ insn##_error(error_args); \ instrumentation_end(); \ return; \ fault: \ kvm_spurious_fault(); \ } while (0) static __always_inline void __vmcs_writel(unsigned long field, unsigned long value) { vmx_asm2(vmwrite, "r"(field), "rm"(value), field, value); } static __always_inline void vmcs_write16(unsigned long field, u16 value) { vmcs_check16(field); if (kvm_is_using_evmcs()) return evmcs_write16(field, value); __vmcs_writel(field, value); } static __always_inline void vmcs_write32(unsigned long field, u32 value) { vmcs_check32(field); if (kvm_is_using_evmcs()) return evmcs_write32(field, value); __vmcs_writel(field, value); } static __always_inline void vmcs_write64(unsigned long field, u64 value) { vmcs_check64(field); if (kvm_is_using_evmcs()) return evmcs_write64(field, value); __vmcs_writel(field, value); #ifndef CONFIG_X86_64 __vmcs_writel(field+1, value >> 32); #endif } static __always_inline void vmcs_writel(unsigned long field, unsigned long value) { vmcs_checkl(field); if (kvm_is_using_evmcs()) return evmcs_write64(field, value); __vmcs_writel(field, value); } static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask) { BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000, "vmcs_clear_bits does not support 64-bit fields"); if (kvm_is_using_evmcs()) return evmcs_write32(field, evmcs_read32(field) & ~mask); __vmcs_writel(field, __vmcs_readl(field) & ~mask); } static __always_inline void vmcs_set_bits(unsigned long field, u32 mask) { BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000, "vmcs_set_bits does not support 64-bit fields"); if (kvm_is_using_evmcs()) return evmcs_write32(field, evmcs_read32(field) | mask); __vmcs_writel(field, __vmcs_readl(field) | mask); } static inline void vmcs_clear(struct vmcs *vmcs) { u64 phys_addr = __pa(vmcs); vmx_asm1(vmclear, "m"(phys_addr), vmcs, phys_addr); } static inline void vmcs_load(struct vmcs *vmcs) { u64 phys_addr = __pa(vmcs); if (kvm_is_using_evmcs()) return evmcs_load(phys_addr); vmx_asm1(vmptrld, "m"(phys_addr), vmcs, phys_addr); } static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva) { struct { u64 vpid : 16; u64 rsvd : 48; u64 gva; } operand = { vpid, 0, gva }; vmx_asm2(invvpid, "r"(ext), "m"(operand), ext, vpid, gva); } static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa) { struct { u64 eptp, gpa; } operand = {eptp, gpa}; vmx_asm2(invept, "r"(ext), "m"(operand), ext, eptp, gpa); } static inline void vpid_sync_vcpu_single(int vpid) { if (vpid == 0) return; __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vpid, 0); } static inline void vpid_sync_vcpu_global(void) { __invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0); } static inline void vpid_sync_context(int vpid) { if (cpu_has_vmx_invvpid_single()) vpid_sync_vcpu_single(vpid); else if (vpid != 0) vpid_sync_vcpu_global(); } static inline void vpid_sync_vcpu_addr(int vpid, gva_t addr) { if (vpid == 0) return; if (cpu_has_vmx_invvpid_individual_addr()) __invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR, vpid, addr); else vpid_sync_context(vpid); } static inline void ept_sync_global(void) { __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0); } static inline void ept_sync_context(u64 eptp) { if (cpu_has_vmx_invept_context()) __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0); else ept_sync_global(); } #endif /* __KVM_X86_VMX_INSN_H */ |
| 8824 7543 4938 5599 3296 307 26 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM timer #if !defined(_TRACE_TIMER_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_TIMER_H #include <linux/tracepoint.h> #include <linux/hrtimer.h> #include <linux/timer.h> DECLARE_EVENT_CLASS(timer_class, TP_PROTO(struct timer_list *timer), TP_ARGS(timer), TP_STRUCT__entry( __field( void *, timer ) ), TP_fast_assign( __entry->timer = timer; ), TP_printk("timer=%p", __entry->timer) ); /** * timer_init - called when the timer is initialized * @timer: pointer to struct timer_list */ DEFINE_EVENT(timer_class, timer_init, TP_PROTO(struct timer_list *timer), TP_ARGS(timer) ); #define decode_timer_flags(flags) \ __print_flags(flags, "|", \ { TIMER_MIGRATING, "M" }, \ { TIMER_DEFERRABLE, "D" }, \ { TIMER_PINNED, "P" }, \ { TIMER_IRQSAFE, "I" }) /** * timer_start - called when the timer is started * @timer: pointer to struct timer_list * @bucket_expiry: the bucket expiry time */ TRACE_EVENT(timer_start, TP_PROTO(struct timer_list *timer, unsigned long bucket_expiry), TP_ARGS(timer, bucket_expiry), TP_STRUCT__entry( __field( void *, timer ) __field( void *, function ) __field( unsigned long, expires ) __field( unsigned long, bucket_expiry ) __field( unsigned long, now ) __field( unsigned int, flags ) ), TP_fast_assign( __entry->timer = timer; __entry->function = timer->function; __entry->expires = timer->expires; __entry->bucket_expiry = bucket_expiry; __entry->now = jiffies; __entry->flags = timer->flags; ), TP_printk("timer=%p function=%ps expires=%lu [timeout=%ld] bucket_expiry=%lu cpu=%u idx=%u flags=%s", __entry->timer, __entry->function, __entry->expires, (long)__entry->expires - __entry->now, __entry->bucket_expiry, __entry->flags & TIMER_CPUMASK, __entry->flags >> TIMER_ARRAYSHIFT, decode_timer_flags(__entry->flags & TIMER_TRACE_FLAGMASK)) ); /** * timer_expire_entry - called immediately before the timer callback * @timer: pointer to struct timer_list * @baseclk: value of timer_base::clk when timer expires * * Allows to determine the timer latency. */ TRACE_EVENT(timer_expire_entry, TP_PROTO(struct timer_list *timer, unsigned long baseclk), TP_ARGS(timer, baseclk), TP_STRUCT__entry( __field( void *, timer ) __field( unsigned long, now ) __field( void *, function) __field( unsigned long, baseclk ) ), TP_fast_assign( __entry->timer = timer; __entry->now = jiffies; __entry->function = timer->function; __entry->baseclk = baseclk; ), TP_printk("timer=%p function=%ps now=%lu baseclk=%lu", __entry->timer, __entry->function, __entry->now, __entry->baseclk) ); /** * timer_expire_exit - called immediately after the timer callback returns * @timer: pointer to struct timer_list * * When used in combination with the timer_expire_entry tracepoint we can * determine the runtime of the timer callback function. * * NOTE: Do NOT dereference timer in TP_fast_assign. The pointer might * be invalid. We solely track the pointer. */ DEFINE_EVENT(timer_class, timer_expire_exit, TP_PROTO(struct timer_list *timer), TP_ARGS(timer) ); /** * timer_cancel - called when the timer is canceled * @timer: pointer to struct timer_list */ DEFINE_EVENT(timer_class, timer_cancel, TP_PROTO(struct timer_list *timer), TP_ARGS(timer) ); TRACE_EVENT(timer_base_idle, TP_PROTO(bool is_idle, unsigned int cpu), TP_ARGS(is_idle, cpu), TP_STRUCT__entry( __field( bool, is_idle ) __field( unsigned int, cpu ) ), TP_fast_assign( __entry->is_idle = is_idle; __entry->cpu = cpu; ), TP_printk("is_idle=%d cpu=%d", __entry->is_idle, __entry->cpu) ); #define decode_clockid(type) \ __print_symbolic(type, \ { CLOCK_REALTIME, "CLOCK_REALTIME" }, \ { CLOCK_MONOTONIC, "CLOCK_MONOTONIC" }, \ { CLOCK_BOOTTIME, "CLOCK_BOOTTIME" }, \ { CLOCK_TAI, "CLOCK_TAI" }) #define decode_hrtimer_mode(mode) \ __print_symbolic(mode, \ { HRTIMER_MODE_ABS, "ABS" }, \ { HRTIMER_MODE_REL, "REL" }, \ { HRTIMER_MODE_ABS_PINNED, "ABS|PINNED" }, \ { HRTIMER_MODE_REL_PINNED, "REL|PINNED" }, \ { HRTIMER_MODE_ABS_SOFT, "ABS|SOFT" }, \ { HRTIMER_MODE_REL_SOFT, "REL|SOFT" }, \ { HRTIMER_MODE_ABS_PINNED_SOFT, "ABS|PINNED|SOFT" }, \ { HRTIMER_MODE_REL_PINNED_SOFT, "REL|PINNED|SOFT" }, \ { HRTIMER_MODE_ABS_HARD, "ABS|HARD" }, \ { HRTIMER_MODE_REL_HARD, "REL|HARD" }, \ { HRTIMER_MODE_ABS_PINNED_HARD, "ABS|PINNED|HARD" }, \ { HRTIMER_MODE_REL_PINNED_HARD, "REL|PINNED|HARD" }) /** * hrtimer_init - called when the hrtimer is initialized * @hrtimer: pointer to struct hrtimer * @clockid: the hrtimers clock * @mode: the hrtimers mode */ TRACE_EVENT(hrtimer_init, TP_PROTO(struct hrtimer *hrtimer, clockid_t clockid, enum hrtimer_mode mode), TP_ARGS(hrtimer, clockid, mode), TP_STRUCT__entry( __field( void *, hrtimer ) __field( clockid_t, clockid ) __field( enum hrtimer_mode, mode ) ), TP_fast_assign( __entry->hrtimer = hrtimer; __entry->clockid = clockid; __entry->mode = mode; ), TP_printk("hrtimer=%p clockid=%s mode=%s", __entry->hrtimer, decode_clockid(__entry->clockid), decode_hrtimer_mode(__entry->mode)) ); /** * hrtimer_start - called when the hrtimer is started * @hrtimer: pointer to struct hrtimer * @mode: the hrtimers mode */ TRACE_EVENT(hrtimer_start, TP_PROTO(struct hrtimer *hrtimer, enum hrtimer_mode mode), TP_ARGS(hrtimer, mode), TP_STRUCT__entry( __field( void *, hrtimer ) __field( void *, function ) __field( s64, expires ) __field( s64, softexpires ) __field( enum hrtimer_mode, mode ) ), TP_fast_assign( __entry->hrtimer = hrtimer; __entry->function = hrtimer->function; __entry->expires = hrtimer_get_expires(hrtimer); __entry->softexpires = hrtimer_get_softexpires(hrtimer); __entry->mode = mode; ), TP_printk("hrtimer=%p function=%ps expires=%llu softexpires=%llu " "mode=%s", __entry->hrtimer, __entry->function, (unsigned long long) __entry->expires, (unsigned long long) __entry->softexpires, decode_hrtimer_mode(__entry->mode)) ); /** * hrtimer_expire_entry - called immediately before the hrtimer callback * @hrtimer: pointer to struct hrtimer * @now: pointer to variable which contains current time of the * timers base. * * Allows to determine the timer latency. */ TRACE_EVENT(hrtimer_expire_entry, TP_PROTO(struct hrtimer *hrtimer, ktime_t *now), TP_ARGS(hrtimer, now), TP_STRUCT__entry( __field( void *, hrtimer ) __field( s64, now ) __field( void *, function) ), TP_fast_assign( __entry->hrtimer = hrtimer; __entry->now = *now; __entry->function = hrtimer->function; ), TP_printk("hrtimer=%p function=%ps now=%llu", __entry->hrtimer, __entry->function, (unsigned long long) __entry->now) ); DECLARE_EVENT_CLASS(hrtimer_class, TP_PROTO(struct hrtimer *hrtimer), TP_ARGS(hrtimer), TP_STRUCT__entry( __field( void *, hrtimer ) ), TP_fast_assign( __entry->hrtimer = hrtimer; ), TP_printk("hrtimer=%p", __entry->hrtimer) ); /** * hrtimer_expire_exit - called immediately after the hrtimer callback returns * @hrtimer: pointer to struct hrtimer * * When used in combination with the hrtimer_expire_entry tracepoint we can * determine the runtime of the callback function. */ DEFINE_EVENT(hrtimer_class, hrtimer_expire_exit, TP_PROTO(struct hrtimer *hrtimer), TP_ARGS(hrtimer) ); /** * hrtimer_cancel - called when the hrtimer is canceled * @hrtimer: pointer to struct hrtimer */ DEFINE_EVENT(hrtimer_class, hrtimer_cancel, TP_PROTO(struct hrtimer *hrtimer), TP_ARGS(hrtimer) ); /** * itimer_state - called when itimer is started or canceled * @which: name of the interval timer * @value: the itimers value, itimer is canceled if value->it_value is * zero, otherwise it is started * @expires: the itimers expiry time */ TRACE_EVENT(itimer_state, TP_PROTO(int which, const struct itimerspec64 *const value, unsigned long long expires), TP_ARGS(which, value, expires), TP_STRUCT__entry( __field( int, which ) __field( unsigned long long, expires ) __field( long, value_sec ) __field( long, value_nsec ) __field( long, interval_sec ) __field( long, interval_nsec ) ), TP_fast_assign( __entry->which = which; __entry->expires = expires; __entry->value_sec = value->it_value.tv_sec; __entry->value_nsec = value->it_value.tv_nsec; __entry->interval_sec = value->it_interval.tv_sec; __entry->interval_nsec = value->it_interval.tv_nsec; ), TP_printk("which=%d expires=%llu it_value=%ld.%06ld it_interval=%ld.%06ld", __entry->which, __entry->expires, __entry->value_sec, __entry->value_nsec / NSEC_PER_USEC, __entry->interval_sec, __entry->interval_nsec / NSEC_PER_USEC) ); /** * itimer_expire - called when itimer expires * @which: type of the interval timer * @pid: pid of the process which owns the timer * @now: current time, used to calculate the latency of itimer */ TRACE_EVENT(itimer_expire, TP_PROTO(int which, struct pid *pid, unsigned long long now), TP_ARGS(which, pid, now), TP_STRUCT__entry( __field( int , which ) __field( pid_t, pid ) __field( unsigned long long, now ) ), TP_fast_assign( __entry->which = which; __entry->now = now; __entry->pid = pid_nr(pid); ), TP_printk("which=%d pid=%d now=%llu", __entry->which, (int) __entry->pid, __entry->now) ); #ifdef CONFIG_NO_HZ_COMMON #define TICK_DEP_NAMES \ tick_dep_mask_name(NONE) \ tick_dep_name(POSIX_TIMER) \ tick_dep_name(PERF_EVENTS) \ tick_dep_name(SCHED) \ tick_dep_name(CLOCK_UNSTABLE) \ tick_dep_name(RCU) \ tick_dep_name_end(RCU_EXP) #undef tick_dep_name #undef tick_dep_mask_name #undef tick_dep_name_end /* The MASK will convert to their bits and they need to be processed too */ #define tick_dep_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_BIT_##sdep); \ TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep); #define tick_dep_name_end(sdep) TRACE_DEFINE_ENUM(TICK_DEP_BIT_##sdep); \ TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep); /* NONE only has a mask defined for it */ #define tick_dep_mask_name(sdep) TRACE_DEFINE_ENUM(TICK_DEP_MASK_##sdep); TICK_DEP_NAMES #undef tick_dep_name #undef tick_dep_mask_name #undef tick_dep_name_end #define tick_dep_name(sdep) { TICK_DEP_MASK_##sdep, #sdep }, #define tick_dep_mask_name(sdep) { TICK_DEP_MASK_##sdep, #sdep }, #define tick_dep_name_end(sdep) { TICK_DEP_MASK_##sdep, #sdep } #define show_tick_dep_name(val) \ __print_symbolic(val, TICK_DEP_NAMES) TRACE_EVENT(tick_stop, TP_PROTO(int success, int dependency), TP_ARGS(success, dependency), TP_STRUCT__entry( __field( int , success ) __field( int , dependency ) ), TP_fast_assign( __entry->success = success; __entry->dependency = dependency; ), TP_printk("success=%d dependency=%s", __entry->success, \ show_tick_dep_name(__entry->dependency)) ); #endif #endif /* _TRACE_TIMER_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
| 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright 1996, 1997, 1998 Hans Reiser, see reiserfs/README for * licensing and copyright details */ #include <linux/reiserfs_fs.h> #include <linux/slab.h> #include <linux/interrupt.h> #include <linux/sched.h> #include <linux/bug.h> #include <linux/workqueue.h> #include <asm/unaligned.h> #include <linux/bitops.h> #include <linux/proc_fs.h> #include <linux/buffer_head.h> /* the 32 bit compat definitions with int argument */ #define REISERFS_IOC32_UNPACK _IOW(0xCD, 1, int) #define REISERFS_IOC32_GETVERSION FS_IOC32_GETVERSION #define REISERFS_IOC32_SETVERSION FS_IOC32_SETVERSION struct reiserfs_journal_list; /* bitmasks for i_flags field in reiserfs-specific part of inode */ typedef enum { /* * this says what format of key do all items (but stat data) of * an object have. If this is set, that format is 3.6 otherwise - 3.5 */ i_item_key_version_mask = 0x0001, /* * If this is unset, object has 3.5 stat data, otherwise, * it has 3.6 stat data with 64bit size, 32bit nlink etc. */ i_stat_data_version_mask = 0x0002, /* file might need tail packing on close */ i_pack_on_close_mask = 0x0004, /* don't pack tail of file */ i_nopack_mask = 0x0008, /* * If either of these are set, "safe link" was created for this * file during truncate or unlink. Safe link is used to avoid * leakage of disk space on crash with some files open, but unlinked. */ i_link_saved_unlink_mask = 0x0010, i_link_saved_truncate_mask = 0x0020, i_has_xattr_dir = 0x0040, i_data_log = 0x0080, } reiserfs_inode_flags; struct reiserfs_inode_info { __u32 i_key[4]; /* key is still 4 32 bit integers */ /* * transient inode flags that are never stored on disk. Bitmasks * for this field are defined above. */ __u32 i_flags; /* offset of first byte stored in direct item. */ __u32 i_first_direct_byte; /* copy of persistent inode flags read from sd_attrs. */ __u32 i_attrs; /* first unused block of a sequence of unused blocks */ int i_prealloc_block; int i_prealloc_count; /* length of that sequence */ /* per-transaction list of inodes which have preallocated blocks */ struct list_head i_prealloc_list; /* * new_packing_locality is created; new blocks for the contents * of this directory should be displaced */ unsigned new_packing_locality:1; /* * we use these for fsync or O_SYNC to decide which transaction * needs to be committed in order for this inode to be properly * flushed */ unsigned int i_trans_id; struct reiserfs_journal_list *i_jl; atomic_t openers; struct mutex tailpack; #ifdef CONFIG_REISERFS_FS_XATTR struct rw_semaphore i_xattr_sem; #endif #ifdef CONFIG_QUOTA struct dquot __rcu *i_dquot[MAXQUOTAS]; #endif struct inode vfs_inode; }; typedef enum { reiserfs_attrs_cleared = 0x00000001, } reiserfs_super_block_flags; /* * struct reiserfs_super_block accessors/mutators since this is a disk * structure, it will always be in little endian format. */ #define sb_block_count(sbp) (le32_to_cpu((sbp)->s_v1.s_block_count)) #define set_sb_block_count(sbp,v) ((sbp)->s_v1.s_block_count = cpu_to_le32(v)) #define sb_free_blocks(sbp) (le32_to_cpu((sbp)->s_v1.s_free_blocks)) #define set_sb_free_blocks(sbp,v) ((sbp)->s_v1.s_free_blocks = cpu_to_le32(v)) #define sb_root_block(sbp) (le32_to_cpu((sbp)->s_v1.s_root_block)) #define set_sb_root_block(sbp,v) ((sbp)->s_v1.s_root_block = cpu_to_le32(v)) #define sb_jp_journal_1st_block(sbp) \ (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_1st_block)) #define set_sb_jp_journal_1st_block(sbp,v) \ ((sbp)->s_v1.s_journal.jp_journal_1st_block = cpu_to_le32(v)) #define sb_jp_journal_dev(sbp) \ (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_dev)) #define set_sb_jp_journal_dev(sbp,v) \ ((sbp)->s_v1.s_journal.jp_journal_dev = cpu_to_le32(v)) #define sb_jp_journal_size(sbp) \ (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_size)) #define set_sb_jp_journal_size(sbp,v) \ ((sbp)->s_v1.s_journal.jp_journal_size = cpu_to_le32(v)) #define sb_jp_journal_trans_max(sbp) \ (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_trans_max)) #define set_sb_jp_journal_trans_max(sbp,v) \ ((sbp)->s_v1.s_journal.jp_journal_trans_max = cpu_to_le32(v)) #define sb_jp_journal_magic(sbp) \ (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_magic)) #define set_sb_jp_journal_magic(sbp,v) \ ((sbp)->s_v1.s_journal.jp_journal_magic = cpu_to_le32(v)) #define sb_jp_journal_max_batch(sbp) \ (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_max_batch)) #define set_sb_jp_journal_max_batch(sbp,v) \ ((sbp)->s_v1.s_journal.jp_journal_max_batch = cpu_to_le32(v)) #define sb_jp_jourmal_max_commit_age(sbp) \ (le32_to_cpu((sbp)->s_v1.s_journal.jp_journal_max_commit_age)) #define set_sb_jp_journal_max_commit_age(sbp,v) \ ((sbp)->s_v1.s_journal.jp_journal_max_commit_age = cpu_to_le32(v)) #define sb_blocksize(sbp) (le16_to_cpu((sbp)->s_v1.s_blocksize)) #define set_sb_blocksize(sbp,v) ((sbp)->s_v1.s_blocksize = cpu_to_le16(v)) #define sb_oid_maxsize(sbp) (le16_to_cpu((sbp)->s_v1.s_oid_maxsize)) #define set_sb_oid_maxsize(sbp,v) ((sbp)->s_v1.s_oid_maxsize = cpu_to_le16(v)) #define sb_oid_cursize(sbp) (le16_to_cpu((sbp)->s_v1.s_oid_cursize)) #define set_sb_oid_cursize(sbp,v) ((sbp)->s_v1.s_oid_cursize = cpu_to_le16(v)) #define sb_umount_state(sbp) (le16_to_cpu((sbp)->s_v1.s_umount_state)) #define set_sb_umount_state(sbp,v) ((sbp)->s_v1.s_umount_state = cpu_to_le16(v)) #define sb_fs_state(sbp) (le16_to_cpu((sbp)->s_v1.s_fs_state)) #define set_sb_fs_state(sbp,v) ((sbp)->s_v1.s_fs_state = cpu_to_le16(v)) #define sb_hash_function_code(sbp) \ (le32_to_cpu((sbp)->s_v1.s_hash_function_code)) #define set_sb_hash_function_code(sbp,v) \ ((sbp)->s_v1.s_hash_function_code = cpu_to_le32(v)) #define sb_tree_height(sbp) (le16_to_cpu((sbp)->s_v1.s_tree_height)) #define set_sb_tree_height(sbp,v) ((sbp)->s_v1.s_tree_height = cpu_to_le16(v)) #define sb_bmap_nr(sbp) (le16_to_cpu((sbp)->s_v1.s_bmap_nr)) #define set_sb_bmap_nr(sbp,v) ((sbp)->s_v1.s_bmap_nr = cpu_to_le16(v)) #define sb_version(sbp) (le16_to_cpu((sbp)->s_v1.s_version)) #define set_sb_version(sbp,v) ((sbp)->s_v1.s_version = cpu_to_le16(v)) #define sb_mnt_count(sbp) (le16_to_cpu((sbp)->s_mnt_count)) #define set_sb_mnt_count(sbp, v) ((sbp)->s_mnt_count = cpu_to_le16(v)) #define sb_reserved_for_journal(sbp) \ (le16_to_cpu((sbp)->s_v1.s_reserved_for_journal)) #define set_sb_reserved_for_journal(sbp,v) \ ((sbp)->s_v1.s_reserved_for_journal = cpu_to_le16(v)) /* LOGGING -- */ /* * These all interelate for performance. * * If the journal block count is smaller than n transactions, you lose speed. * I don't know what n is yet, I'm guessing 8-16. * * typical transaction size depends on the application, how often fsync is * called, and how many metadata blocks you dirty in a 30 second period. * The more small files (<16k) you use, the larger your transactions will * be. * * If your journal fills faster than dirty buffers get flushed to disk, it * must flush them before allowing the journal to wrap, which slows things * down. If you need high speed meta data updates, the journal should be * big enough to prevent wrapping before dirty meta blocks get to disk. * * If the batch max is smaller than the transaction max, you'll waste space * at the end of the journal because journal_end sets the next transaction * to start at 0 if the next transaction has any chance of wrapping. * * The large the batch max age, the better the speed, and the more meta * data changes you'll lose after a crash. */ /* don't mess with these for a while */ /* we have a node size define somewhere in reiserfs_fs.h. -Hans */ #define JOURNAL_BLOCK_SIZE 4096 /* BUG gotta get rid of this */ #define JOURNAL_MAX_CNODE 1500 /* max cnodes to allocate. */ #define JOURNAL_HASH_SIZE 8192 /* number of copies of the bitmaps to have floating. Must be >= 2 */ #define JOURNAL_NUM_BITMAPS 5 /* * One of these for every block in every transaction * Each one is in two hash tables. First, a hash of the current transaction, * and after journal_end, a hash of all the in memory transactions. * next and prev are used by the current transaction (journal_hash). * hnext and hprev are used by journal_list_hash. If a block is in more * than one transaction, the journal_list_hash links it in multiple times. * This allows flush_journal_list to remove just the cnode belonging to a * given transaction. */ struct reiserfs_journal_cnode { struct buffer_head *bh; /* real buffer head */ struct super_block *sb; /* dev of real buffer head */ /* block number of real buffer head, == 0 when buffer on disk */ __u32 blocknr; unsigned long state; /* journal list this cnode lives in */ struct reiserfs_journal_list *jlist; struct reiserfs_journal_cnode *next; /* next in transaction list */ struct reiserfs_journal_cnode *prev; /* prev in transaction list */ struct reiserfs_journal_cnode *hprev; /* prev in hash list */ struct reiserfs_journal_cnode *hnext; /* next in hash list */ }; struct reiserfs_bitmap_node { int id; char *data; struct list_head list; }; struct reiserfs_list_bitmap { struct reiserfs_journal_list *journal_list; struct reiserfs_bitmap_node **bitmaps; }; /* * one of these for each transaction. The most important part here is the * j_realblock. this list of cnodes is used to hash all the blocks in all * the commits, to mark all the real buffer heads dirty once all the commits * hit the disk, and to make sure every real block in a transaction is on * disk before allowing the log area to be overwritten */ struct reiserfs_journal_list { unsigned long j_start; unsigned long j_state; unsigned long j_len; atomic_t j_nonzerolen; atomic_t j_commit_left; /* all commits older than this on disk */ atomic_t j_older_commits_done; struct mutex j_commit_mutex; unsigned int j_trans_id; time64_t j_timestamp; /* write-only but useful for crash dump analysis */ struct reiserfs_list_bitmap *j_list_bitmap; struct buffer_head *j_commit_bh; /* commit buffer head */ struct reiserfs_journal_cnode *j_realblock; struct reiserfs_journal_cnode *j_freedlist; /* list of buffers that were freed during this trans. free each of these on flush */ /* time ordered list of all active transactions */ struct list_head j_list; /* * time ordered list of all transactions we haven't tried * to flush yet */ struct list_head j_working_list; /* list of tail conversion targets in need of flush before commit */ struct list_head j_tail_bh_list; /* list of data=ordered buffers in need of flush before commit */ struct list_head j_bh_list; int j_refcount; }; struct reiserfs_journal { struct buffer_head **j_ap_blocks; /* journal blocks on disk */ /* newest journal block */ struct reiserfs_journal_cnode *j_last; /* oldest journal block. start here for traverse */ struct reiserfs_journal_cnode *j_first; struct file *j_bdev_file; /* first block on s_dev of reserved area journal */ int j_1st_reserved_block; unsigned long j_state; unsigned int j_trans_id; unsigned long j_mount_id; /* start of current waiting commit (index into j_ap_blocks) */ unsigned long j_start; unsigned long j_len; /* length of current waiting commit */ /* number of buffers requested by journal_begin() */ unsigned long j_len_alloc; atomic_t j_wcount; /* count of writers for current commit */ /* batch count. allows turning X transactions into 1 */ unsigned long j_bcount; /* first unflushed transactions offset */ unsigned long j_first_unflushed_offset; /* last fully flushed journal timestamp */ unsigned j_last_flush_trans_id; struct buffer_head *j_header_bh; time64_t j_trans_start_time; /* time this transaction started */ struct mutex j_mutex; struct mutex j_flush_mutex; /* wait for current transaction to finish before starting new one */ wait_queue_head_t j_join_wait; atomic_t j_jlock; /* lock for j_join_wait */ int j_list_bitmap_index; /* number of next list bitmap to use */ /* no more journal begins allowed. MUST sleep on j_join_wait */ int j_must_wait; /* next journal_end will flush all journal list */ int j_next_full_flush; /* next journal_end will flush all async commits */ int j_next_async_flush; int j_cnode_used; /* number of cnodes on the used list */ int j_cnode_free; /* number of cnodes on the free list */ /* max number of blocks in a transaction. */ unsigned int j_trans_max; /* max number of blocks to batch into a trans */ unsigned int j_max_batch; /* in seconds, how old can an async commit be */ unsigned int j_max_commit_age; /* in seconds, how old can a transaction be */ unsigned int j_max_trans_age; /* the default for the max commit age */ unsigned int j_default_max_commit_age; struct reiserfs_journal_cnode *j_cnode_free_list; /* orig pointer returned from vmalloc */ struct reiserfs_journal_cnode *j_cnode_free_orig; struct reiserfs_journal_list *j_current_jl; int j_free_bitmap_nodes; int j_used_bitmap_nodes; int j_num_lists; /* total number of active transactions */ int j_num_work_lists; /* number that need attention from kreiserfsd */ /* debugging to make sure things are flushed in order */ unsigned int j_last_flush_id; /* debugging to make sure things are committed in order */ unsigned int j_last_commit_id; struct list_head j_bitmap_nodes; struct list_head j_dirty_buffers; spinlock_t j_dirty_buffers_lock; /* protects j_dirty_buffers */ /* list of all active transactions */ struct list_head j_journal_list; /* lists that haven't been touched by writeback attempts */ struct list_head j_working_list; /* hash table for real buffer heads in current trans */ struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE]; /* hash table for all the real buffer heads in all the transactions */ struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE]; /* array of bitmaps to record the deleted blocks */ struct reiserfs_list_bitmap j_list_bitmap[JOURNAL_NUM_BITMAPS]; /* list of inodes which have preallocated blocks */ struct list_head j_prealloc_list; int j_persistent_trans; unsigned long j_max_trans_size; unsigned long j_max_batch_size; int j_errno; /* when flushing ordered buffers, throttle new ordered writers */ struct delayed_work j_work; struct super_block *j_work_sb; atomic_t j_async_throttle; }; enum journal_state_bits { J_WRITERS_BLOCKED = 1, /* set when new writers not allowed */ J_WRITERS_QUEUED, /* set when log is full due to too many writers */ J_ABORTED, /* set when log is aborted */ }; /* ick. magic string to find desc blocks in the journal */ #define JOURNAL_DESC_MAGIC "ReIsErLB" typedef __u32(*hashf_t) (const signed char *, int); struct reiserfs_bitmap_info { __u32 free_count; }; struct proc_dir_entry; #if defined( CONFIG_PROC_FS ) && defined( CONFIG_REISERFS_PROC_INFO ) typedef unsigned long int stat_cnt_t; typedef struct reiserfs_proc_info_data { spinlock_t lock; int exiting; int max_hash_collisions; stat_cnt_t breads; stat_cnt_t bread_miss; stat_cnt_t search_by_key; stat_cnt_t search_by_key_fs_changed; stat_cnt_t search_by_key_restarted; stat_cnt_t insert_item_restarted; stat_cnt_t paste_into_item_restarted; stat_cnt_t cut_from_item_restarted; stat_cnt_t delete_solid_item_restarted; stat_cnt_t delete_item_restarted; stat_cnt_t leaked_oid; stat_cnt_t leaves_removable; /* * balances per level. * Use explicit 5 as MAX_HEIGHT is not visible yet. */ stat_cnt_t balance_at[5]; /* XXX */ /* sbk == search_by_key */ stat_cnt_t sbk_read_at[5]; /* XXX */ stat_cnt_t sbk_fs_changed[5]; stat_cnt_t sbk_restarted[5]; stat_cnt_t items_at[5]; /* XXX */ stat_cnt_t free_at[5]; /* XXX */ stat_cnt_t can_node_be_removed[5]; /* XXX */ long int lnum[5]; /* XXX */ long int rnum[5]; /* XXX */ long int lbytes[5]; /* XXX */ long int rbytes[5]; /* XXX */ stat_cnt_t get_neighbors[5]; stat_cnt_t get_neighbors_restart[5]; stat_cnt_t need_l_neighbor[5]; stat_cnt_t need_r_neighbor[5]; stat_cnt_t free_block; struct __scan_bitmap_stats { stat_cnt_t call; stat_cnt_t wait; stat_cnt_t bmap; stat_cnt_t retry; stat_cnt_t in_journal_hint; stat_cnt_t in_journal_nohint; stat_cnt_t stolen; } scan_bitmap; struct __journal_stats { stat_cnt_t in_journal; stat_cnt_t in_journal_bitmap; stat_cnt_t in_journal_reusable; stat_cnt_t lock_journal; stat_cnt_t lock_journal_wait; stat_cnt_t journal_being; stat_cnt_t journal_relock_writers; stat_cnt_t journal_relock_wcount; stat_cnt_t mark_dirty; stat_cnt_t mark_dirty_already; stat_cnt_t mark_dirty_notjournal; stat_cnt_t restore_prepared; stat_cnt_t prepare; stat_cnt_t prepare_retry; } journal; } reiserfs_proc_info_data_t; #else typedef struct reiserfs_proc_info_data { } reiserfs_proc_info_data_t; #endif /* Number of quota types we support */ #define REISERFS_MAXQUOTAS 2 /* reiserfs union of in-core super block data */ struct reiserfs_sb_info { /* Buffer containing the super block */ struct buffer_head *s_sbh; /* Pointer to the on-disk super block in the buffer */ struct reiserfs_super_block *s_rs; struct reiserfs_bitmap_info *s_ap_bitmap; /* pointer to journal information */ struct reiserfs_journal *s_journal; unsigned short s_mount_state; /* reiserfs state (valid, invalid) */ /* Serialize writers access, replace the old bkl */ struct mutex lock; /* Owner of the lock (can be recursive) */ struct task_struct *lock_owner; /* Depth of the lock, start from -1 like the bkl */ int lock_depth; struct workqueue_struct *commit_wq; /* Comment? -Hans */ void (*end_io_handler) (struct buffer_head *, int); /* * pointer to function which is used to sort names in directory. * Set on mount */ hashf_t s_hash_function; /* reiserfs's mount options are set here */ unsigned long s_mount_opt; /* This is a structure that describes block allocator options */ struct { /* Bitfield for enable/disable kind of options */ unsigned long bits; /* * size started from which we consider file * to be a large one (in blocks) */ unsigned long large_file_size; int border; /* percentage of disk, border takes */ /* * Minimal file size (in blocks) starting * from which we do preallocations */ int preallocmin; /* * Number of blocks we try to prealloc when file * reaches preallocmin size (in blocks) or prealloc_list is empty. */ int preallocsize; } s_alloc_options; /* Comment? -Hans */ wait_queue_head_t s_wait; /* increased by one every time the tree gets re-balanced */ atomic_t s_generation_counter; /* File system properties. Currently holds on-disk FS format */ unsigned long s_properties; /* session statistics */ int s_disk_reads; int s_disk_writes; int s_fix_nodes; int s_do_balance; int s_unneeded_left_neighbor; int s_good_search_by_key_reada; int s_bmaps; int s_bmaps_without_search; int s_direct2indirect; int s_indirect2direct; /* * set up when it's ok for reiserfs_read_inode2() to read from * disk inode with nlink==0. Currently this is only used during * finish_unfinished() processing at mount time */ int s_is_unlinked_ok; reiserfs_proc_info_data_t s_proc_info_data; struct proc_dir_entry *procdir; /* amount of blocks reserved for further allocations */ int reserved_blocks; /* this lock on now only used to protect reserved_blocks variable */ spinlock_t bitmap_lock; struct dentry *priv_root; /* root of /.reiserfs_priv */ struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */ int j_errno; int work_queued; /* non-zero delayed work is queued */ struct delayed_work old_work; /* old transactions flush delayed work */ spinlock_t old_work_lock; /* protects old_work and work_queued */ #ifdef CONFIG_QUOTA char *s_qf_names[REISERFS_MAXQUOTAS]; int s_jquota_fmt; #endif char *s_jdev; /* Stored jdev for mount option showing */ #ifdef CONFIG_REISERFS_CHECK /* * Detects whether more than one copy of tb exists per superblock * as a means of checking whether do_balance is executing * concurrently against another tree reader/writer on a same * mount point. */ struct tree_balance *cur_tb; #endif }; /* Definitions of reiserfs on-disk properties: */ #define REISERFS_3_5 0 #define REISERFS_3_6 1 #define REISERFS_OLD_FORMAT 2 /* Mount options */ enum reiserfs_mount_options { /* large tails will be created in a session */ REISERFS_LARGETAIL, /* * small (for files less than block size) tails will * be created in a session */ REISERFS_SMALLTAIL, /* replay journal and return 0. Use by fsck */ REPLAYONLY, /* * -o conv: causes conversion of old format super block to the * new format. If not specified - old partition will be dealt * with in a manner of 3.5.x */ REISERFS_CONVERT, /* * -o hash={tea, rupasov, r5, detect} is meant for properly mounting * reiserfs disks from 3.5.19 or earlier. 99% of the time, this * option is not required. If the normal autodection code can't * determine which hash to use (because both hashes had the same * value for a file) use this option to force a specific hash. * It won't allow you to override the existing hash on the FS, so * if you have a tea hash disk, and mount with -o hash=rupasov, * the mount will fail. */ FORCE_TEA_HASH, /* try to force tea hash on mount */ FORCE_RUPASOV_HASH, /* try to force rupasov hash on mount */ FORCE_R5_HASH, /* try to force rupasov hash on mount */ FORCE_HASH_DETECT, /* try to detect hash function on mount */ REISERFS_DATA_LOG, REISERFS_DATA_ORDERED, REISERFS_DATA_WRITEBACK, /* * used for testing experimental features, makes benchmarking new * features with and without more convenient, should never be used by * users in any code shipped to users (ideally) */ REISERFS_NO_BORDER, REISERFS_NO_UNHASHED_RELOCATION, REISERFS_HASHED_RELOCATION, REISERFS_ATTRS, REISERFS_XATTRS_USER, REISERFS_POSIXACL, REISERFS_EXPOSE_PRIVROOT, REISERFS_BARRIER_NONE, REISERFS_BARRIER_FLUSH, /* Actions on error */ REISERFS_ERROR_PANIC, REISERFS_ERROR_RO, REISERFS_ERROR_CONTINUE, REISERFS_USRQUOTA, /* User quota option specified */ REISERFS_GRPQUOTA, /* Group quota option specified */ REISERFS_TEST1, REISERFS_TEST2, REISERFS_TEST3, REISERFS_TEST4, REISERFS_UNSUPPORTED_OPT, }; #define reiserfs_r5_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_R5_HASH)) #define reiserfs_rupasov_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_RUPASOV_HASH)) #define reiserfs_tea_hash(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_TEA_HASH)) #define reiserfs_hash_detect(s) (REISERFS_SB(s)->s_mount_opt & (1 << FORCE_HASH_DETECT)) #define reiserfs_no_border(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NO_BORDER)) #define reiserfs_no_unhashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_NO_UNHASHED_RELOCATION)) #define reiserfs_hashed_relocation(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_HASHED_RELOCATION)) #define reiserfs_test4(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_TEST4)) #define have_large_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_LARGETAIL)) #define have_small_tails(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_SMALLTAIL)) #define replay_only(s) (REISERFS_SB(s)->s_mount_opt & (1 << REPLAYONLY)) #define reiserfs_attrs(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ATTRS)) #define old_format_only(s) (REISERFS_SB(s)->s_properties & (1 << REISERFS_3_5)) #define convert_reiserfs(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_CONVERT)) #define reiserfs_data_log(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_LOG)) #define reiserfs_data_ordered(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_ORDERED)) #define reiserfs_data_writeback(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_DATA_WRITEBACK)) #define reiserfs_xattrs_user(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_XATTRS_USER)) #define reiserfs_posixacl(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_POSIXACL)) #define reiserfs_expose_privroot(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_EXPOSE_PRIVROOT)) #define reiserfs_xattrs_optional(s) (reiserfs_xattrs_user(s) || reiserfs_posixacl(s)) #define reiserfs_barrier_none(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_NONE)) #define reiserfs_barrier_flush(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_BARRIER_FLUSH)) #define reiserfs_error_panic(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ERROR_PANIC)) #define reiserfs_error_ro(s) (REISERFS_SB(s)->s_mount_opt & (1 << REISERFS_ERROR_RO)) void reiserfs_file_buffer(struct buffer_head *bh, int list); extern struct file_system_type reiserfs_fs_type; int reiserfs_resize(struct super_block *, unsigned long); #define CARRY_ON 0 #define SCHEDULE_OCCURRED 1 #define SB_BUFFER_WITH_SB(s) (REISERFS_SB(s)->s_sbh) #define SB_JOURNAL(s) (REISERFS_SB(s)->s_journal) #define SB_JOURNAL_1st_RESERVED_BLOCK(s) (SB_JOURNAL(s)->j_1st_reserved_block) #define SB_JOURNAL_LEN_FREE(s) (SB_JOURNAL(s)->j_journal_len_free) #define SB_AP_BITMAP(s) (REISERFS_SB(s)->s_ap_bitmap) #define SB_DISK_JOURNAL_HEAD(s) (SB_JOURNAL(s)->j_header_bh->) #define reiserfs_is_journal_aborted(journal) (unlikely (__reiserfs_is_journal_aborted (journal))) static inline int __reiserfs_is_journal_aborted(struct reiserfs_journal *journal) { return test_bit(J_ABORTED, &journal->j_state); } /* * Locking primitives. The write lock is a per superblock * special mutex that has properties close to the Big Kernel Lock * which was used in the previous locking scheme. */ void reiserfs_write_lock(struct super_block *s); void reiserfs_write_unlock(struct super_block *s); int __must_check reiserfs_write_unlock_nested(struct super_block *s); void reiserfs_write_lock_nested(struct super_block *s, int depth); #ifdef CONFIG_REISERFS_CHECK void reiserfs_lock_check_recursive(struct super_block *s); #else static inline void reiserfs_lock_check_recursive(struct super_block *s) { } #endif /* * Several mutexes depend on the write lock. * However sometimes we want to relax the write lock while we hold * these mutexes, according to the release/reacquire on schedule() * properties of the Bkl that were used. * Reiserfs performances and locking were based on this scheme. * Now that the write lock is a mutex and not the bkl anymore, doing so * may result in a deadlock: * * A acquire write_lock * A acquire j_commit_mutex * A release write_lock and wait for something * B acquire write_lock * B can't acquire j_commit_mutex and sleep * A can't acquire write lock anymore * deadlock * * What we do here is avoiding such deadlock by playing the same game * than the Bkl: if we can't acquire a mutex that depends on the write lock, * we release the write lock, wait a bit and then retry. * * The mutexes concerned by this hack are: * - The commit mutex of a journal list * - The flush mutex * - The journal lock * - The inode mutex */ static inline void reiserfs_mutex_lock_safe(struct mutex *m, struct super_block *s) { int depth; depth = reiserfs_write_unlock_nested(s); mutex_lock(m); reiserfs_write_lock_nested(s, depth); } static inline void reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass, struct super_block *s) { int depth; depth = reiserfs_write_unlock_nested(s); mutex_lock_nested(m, subclass); reiserfs_write_lock_nested(s, depth); } static inline void reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s) { int depth; depth = reiserfs_write_unlock_nested(s); down_read(sem); reiserfs_write_lock_nested(s, depth); } /* * When we schedule, we usually want to also release the write lock, * according to the previous bkl based locking scheme of reiserfs. */ static inline void reiserfs_cond_resched(struct super_block *s) { if (need_resched()) { int depth; depth = reiserfs_write_unlock_nested(s); schedule(); reiserfs_write_lock_nested(s, depth); } } struct fid; /* * in reading the #defines, it may help to understand that they employ * the following abbreviations: * * B = Buffer * I = Item header * H = Height within the tree (should be changed to LEV) * N = Number of the item in the node * STAT = stat data * DEH = Directory Entry Header * EC = Entry Count * E = Entry number * UL = Unsigned Long * BLKH = BLocK Header * UNFM = UNForMatted node * DC = Disk Child * P = Path * * These #defines are named by concatenating these abbreviations, * where first comes the arguments, and last comes the return value, * of the macro. */ #define USE_INODE_GENERATION_COUNTER #define REISERFS_PREALLOCATE #define DISPLACE_NEW_PACKING_LOCALITIES #define PREALLOCATION_SIZE 9 /* n must be power of 2 */ #define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u)) /* * to be ok for alpha and others we have to align structures to 8 byte * boundary. * FIXME: do not change 4 by anything else: there is code which relies on that */ #define ROUND_UP(x) _ROUND_UP(x,8LL) /* * debug levels. Right now, CONFIG_REISERFS_CHECK means print all debug * messages. */ #define REISERFS_DEBUG_CODE 5 /* extra messages to help find/debug errors */ void __reiserfs_warning(struct super_block *s, const char *id, const char *func, const char *fmt, ...); #define reiserfs_warning(s, id, fmt, args...) \ __reiserfs_warning(s, id, __func__, fmt, ##args) /* assertions handling */ /* always check a condition and panic if it's false. */ #define __RASSERT(cond, scond, format, args...) \ do { \ if (!(cond)) \ reiserfs_panic(NULL, "assertion failure", "(" #cond ") at " \ __FILE__ ":%i:%s: " format "\n", \ __LINE__, __func__ , ##args); \ } while (0) #define RASSERT(cond, format, args...) __RASSERT(cond, #cond, format, ##args) #if defined( CONFIG_REISERFS_CHECK ) #define RFALSE(cond, format, args...) __RASSERT(!(cond), "!(" #cond ")", format, ##args) #else #define RFALSE( cond, format, args... ) do {;} while( 0 ) #endif #define CONSTF __attribute_const__ /* * Disk Data Structures */ /*************************************************************************** * SUPER BLOCK * ***************************************************************************/ /* * Structure of super block on disk, a version of which in RAM is often * accessed as REISERFS_SB(s)->s_rs. The version in RAM is part of a larger * structure containing fields never written to disk. */ #define UNSET_HASH 0 /* Detect hash on disk */ #define TEA_HASH 1 #define YURA_HASH 2 #define R5_HASH 3 #define DEFAULT_HASH R5_HASH struct journal_params { /* where does journal start from on its * device */ __le32 jp_journal_1st_block; /* journal device st_rdev */ __le32 jp_journal_dev; /* size of the journal */ __le32 jp_journal_size; /* max number of blocks in a transaction. */ __le32 jp_journal_trans_max; /* * random value made on fs creation * (this was sb_journal_block_count) */ __le32 jp_journal_magic; /* max number of blocks to batch into a trans */ __le32 jp_journal_max_batch; /* in seconds, how old can an async commit be */ __le32 jp_journal_max_commit_age; /* in seconds, how old can a transaction be */ __le32 jp_journal_max_trans_age; }; /* this is the super from 3.5.X, where X >= 10 */ struct reiserfs_super_block_v1 { __le32 s_block_count; /* blocks count */ __le32 s_free_blocks; /* free blocks count */ __le32 s_root_block; /* root block number */ struct journal_params s_journal; __le16 s_blocksize; /* block size */ /* max size of object id array, see get_objectid() commentary */ __le16 s_oid_maxsize; __le16 s_oid_cursize; /* current size of object id array */ /* this is set to 1 when filesystem was umounted, to 2 - when not */ __le16 s_umount_state; /* * reiserfs magic string indicates that file system is reiserfs: * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs" */ char s_magic[10]; /* * it is set to used by fsck to mark which * phase of rebuilding is done */ __le16 s_fs_state; /* * indicate, what hash function is being use * to sort names in a directory */ __le32 s_hash_function_code; __le16 s_tree_height; /* height of disk tree */ /* * amount of bitmap blocks needed to address * each block of file system */ __le16 s_bmap_nr; /* * this field is only reliable on filesystem with non-standard journal */ __le16 s_version; /* * size in blocks of journal area on main device, we need to * keep after making fs with non-standard journal */ __le16 s_reserved_for_journal; } __attribute__ ((__packed__)); #define SB_SIZE_V1 (sizeof(struct reiserfs_super_block_v1)) /* this is the on disk super block */ struct reiserfs_super_block { struct reiserfs_super_block_v1 s_v1; __le32 s_inode_generation; /* Right now used only by inode-attributes, if enabled */ __le32 s_flags; unsigned char s_uuid[16]; /* filesystem unique identifier */ unsigned char s_label[16]; /* filesystem volume label */ __le16 s_mnt_count; /* Count of mounts since last fsck */ __le16 s_max_mnt_count; /* Maximum mounts before check */ __le32 s_lastcheck; /* Timestamp of last fsck */ __le32 s_check_interval; /* Interval between checks */ /* * zero filled by mkreiserfs and reiserfs_convert_objectid_map_v1() * so any additions must be updated there as well. */ char s_unused[76]; } __attribute__ ((__packed__)); #define SB_SIZE (sizeof(struct reiserfs_super_block)) #define REISERFS_VERSION_1 0 #define REISERFS_VERSION_2 2 /* on-disk super block fields converted to cpu form */ #define SB_DISK_SUPER_BLOCK(s) (REISERFS_SB(s)->s_rs) #define SB_V1_DISK_SUPER_BLOCK(s) (&(SB_DISK_SUPER_BLOCK(s)->s_v1)) #define SB_BLOCKSIZE(s) \ le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_blocksize)) #define SB_BLOCK_COUNT(s) \ le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_block_count)) #define SB_FREE_BLOCKS(s) \ le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_free_blocks)) #define SB_REISERFS_MAGIC(s) \ (SB_V1_DISK_SUPER_BLOCK(s)->s_magic) #define SB_ROOT_BLOCK(s) \ le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_root_block)) #define SB_TREE_HEIGHT(s) \ le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_tree_height)) #define SB_REISERFS_STATE(s) \ le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_umount_state)) #define SB_VERSION(s) le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_version)) #define SB_BMAP_NR(s) le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_bmap_nr)) #define PUT_SB_BLOCK_COUNT(s, val) \ do { SB_V1_DISK_SUPER_BLOCK(s)->s_block_count = cpu_to_le32(val); } while (0) #define PUT_SB_FREE_BLOCKS(s, val) \ do { SB_V1_DISK_SUPER_BLOCK(s)->s_free_blocks = cpu_to_le32(val); } while (0) #define PUT_SB_ROOT_BLOCK(s, val) \ do { SB_V1_DISK_SUPER_BLOCK(s)->s_root_block = cpu_to_le32(val); } while (0) #define PUT_SB_TREE_HEIGHT(s, val) \ do { SB_V1_DISK_SUPER_BLOCK(s)->s_tree_height = cpu_to_le16(val); } while (0) #define PUT_SB_REISERFS_STATE(s, val) \ do { SB_V1_DISK_SUPER_BLOCK(s)->s_umount_state = cpu_to_le16(val); } while (0) #define PUT_SB_VERSION(s, val) \ do { SB_V1_DISK_SUPER_BLOCK(s)->s_version = cpu_to_le16(val); } while (0) #define PUT_SB_BMAP_NR(s, val) \ do { SB_V1_DISK_SUPER_BLOCK(s)->s_bmap_nr = cpu_to_le16 (val); } while (0) #define SB_ONDISK_JP(s) (&SB_V1_DISK_SUPER_BLOCK(s)->s_journal) #define SB_ONDISK_JOURNAL_SIZE(s) \ le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_size)) #define SB_ONDISK_JOURNAL_1st_BLOCK(s) \ le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_1st_block)) #define SB_ONDISK_JOURNAL_DEVICE(s) \ le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_dev)) #define SB_ONDISK_RESERVED_FOR_JOURNAL(s) \ le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_reserved_for_journal)) #define is_block_in_log_or_reserved_area(s, block) \ block >= SB_JOURNAL_1st_RESERVED_BLOCK(s) \ && block < SB_JOURNAL_1st_RESERVED_BLOCK(s) + \ ((!is_reiserfs_jr(SB_DISK_SUPER_BLOCK(s)) ? \ SB_ONDISK_JOURNAL_SIZE(s) + 1 : SB_ONDISK_RESERVED_FOR_JOURNAL(s))) int is_reiserfs_3_5(struct reiserfs_super_block *rs); int is_reiserfs_3_6(struct reiserfs_super_block *rs); int is_reiserfs_jr(struct reiserfs_super_block *rs); /* * ReiserFS leaves the first 64k unused, so that partition labels have * enough space. If someone wants to write a fancy bootloader that * needs more than 64k, let us know, and this will be increased in size. * This number must be larger than the largest block size on any * platform, or code will break. -Hans */ #define REISERFS_DISK_OFFSET_IN_BYTES (64 * 1024) #define REISERFS_FIRST_BLOCK unused_define #define REISERFS_JOURNAL_OFFSET_IN_BYTES REISERFS_DISK_OFFSET_IN_BYTES /* the spot for the super in versions 3.5 - 3.5.10 (inclusive) */ #define REISERFS_OLD_DISK_OFFSET_IN_BYTES (8 * 1024) /* reiserfs internal error code (used by search_by_key and fix_nodes)) */ #define CARRY_ON 0 #define REPEAT_SEARCH -1 #define IO_ERROR -2 #define NO_DISK_SPACE -3 #define NO_BALANCING_NEEDED (-4) #define NO_MORE_UNUSED_CONTIGUOUS_BLOCKS (-5) #define QUOTA_EXCEEDED -6 typedef __u32 b_blocknr_t; typedef __le32 unp_t; struct unfm_nodeinfo { unp_t unfm_nodenum; unsigned short unfm_freespace; }; /* there are two formats of keys: 3.5 and 3.6 */ #define KEY_FORMAT_3_5 0 #define KEY_FORMAT_3_6 1 /* there are two stat datas */ #define STAT_DATA_V1 0 #define STAT_DATA_V2 1 static inline struct reiserfs_inode_info *REISERFS_I(const struct inode *inode) { return container_of(inode, struct reiserfs_inode_info, vfs_inode); } static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb) { return sb->s_fs_info; } /* * Don't trust REISERFS_SB(sb)->s_bmap_nr, it's a u16 * which overflows on large file systems. */ static inline __u32 reiserfs_bmap_count(struct super_block *sb) { return (SB_BLOCK_COUNT(sb) - 1) / (sb->s_blocksize * 8) + 1; } static inline int bmap_would_wrap(unsigned bmap_nr) { return bmap_nr > ((1LL << 16) - 1); } extern const struct xattr_handler * const reiserfs_xattr_handlers[]; /* * this says about version of key of all items (but stat data) the * object consists of */ #define get_inode_item_key_version( inode ) \ ((REISERFS_I(inode)->i_flags & i_item_key_version_mask) ? KEY_FORMAT_3_6 : KEY_FORMAT_3_5) #define set_inode_item_key_version( inode, version ) \ ({ if((version)==KEY_FORMAT_3_6) \ REISERFS_I(inode)->i_flags |= i_item_key_version_mask; \ else \ REISERFS_I(inode)->i_flags &= ~i_item_key_version_mask; }) #define get_inode_sd_version(inode) \ ((REISERFS_I(inode)->i_flags & i_stat_data_version_mask) ? STAT_DATA_V2 : STAT_DATA_V1) #define set_inode_sd_version(inode, version) \ ({ if((version)==STAT_DATA_V2) \ REISERFS_I(inode)->i_flags |= i_stat_data_version_mask; \ else \ REISERFS_I(inode)->i_flags &= ~i_stat_data_version_mask; }) /* * This is an aggressive tail suppression policy, I am hoping it * improves our benchmarks. The principle behind it is that percentage * space saving is what matters, not absolute space saving. This is * non-intuitive, but it helps to understand it if you consider that the * cost to access 4 blocks is not much more than the cost to access 1 * block, if you have to do a seek and rotate. A tail risks a * non-linear disk access that is significant as a percentage of total * time cost for a 4 block file and saves an amount of space that is * less significant as a percentage of space, or so goes the hypothesis. * -Hans */ #define STORE_TAIL_IN_UNFM_S1(n_file_size,n_tail_size,n_block_size) \ (\ (!(n_tail_size)) || \ (((n_tail_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) || \ ( (n_file_size) >= (n_block_size) * 4 ) || \ ( ( (n_file_size) >= (n_block_size) * 3 ) && \ ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size))/4) ) || \ ( ( (n_file_size) >= (n_block_size) * 2 ) && \ ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size))/2) ) || \ ( ( (n_file_size) >= (n_block_size) ) && \ ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \ ) /* * Another strategy for tails, this one means only create a tail if all the * file would fit into one DIRECT item. * Primary intention for this one is to increase performance by decreasing * seeking. */ #define STORE_TAIL_IN_UNFM_S2(n_file_size,n_tail_size,n_block_size) \ (\ (!(n_tail_size)) || \ (((n_file_size) > MAX_DIRECT_ITEM_LEN(n_block_size)) ) \ ) /* * values for s_umount_state field */ #define REISERFS_VALID_FS 1 #define REISERFS_ERROR_FS 2 /* * there are 5 item types currently */ #define TYPE_STAT_DATA 0 #define TYPE_INDIRECT 1 #define TYPE_DIRECT 2 #define TYPE_DIRENTRY 3 #define TYPE_MAXTYPE 3 #define TYPE_ANY 15 /* FIXME: comment is required */ /*************************************************************************** * KEY & ITEM HEAD * ***************************************************************************/ /* * directories use this key as well as old files */ struct offset_v1 { __le32 k_offset; __le32 k_uniqueness; } __attribute__ ((__packed__)); struct offset_v2 { __le64 v; } __attribute__ ((__packed__)); static inline __u16 offset_v2_k_type(const struct offset_v2 *v2) { __u8 type = le64_to_cpu(v2->v) >> 60; return (type <= TYPE_MAXTYPE) ? type : TYPE_ANY; } static inline void set_offset_v2_k_type(struct offset_v2 *v2, int type) { v2->v = (v2->v & cpu_to_le64(~0ULL >> 4)) | cpu_to_le64((__u64) type << 60); } static inline loff_t offset_v2_k_offset(const struct offset_v2 *v2) { return le64_to_cpu(v2->v) & (~0ULL >> 4); } static inline void set_offset_v2_k_offset(struct offset_v2 *v2, loff_t offset) { offset &= (~0ULL >> 4); v2->v = (v2->v & cpu_to_le64(15ULL << 60)) | cpu_to_le64(offset); } /* * Key of an item determines its location in the S+tree, and * is composed of 4 components */ struct reiserfs_key { /* packing locality: by default parent directory object id */ __le32 k_dir_id; __le32 k_objectid; /* object identifier */ union { struct offset_v1 k_offset_v1; struct offset_v2 k_offset_v2; } __attribute__ ((__packed__)) u; } __attribute__ ((__packed__)); struct in_core_key { /* packing locality: by default parent directory object id */ __u32 k_dir_id; __u32 k_objectid; /* object identifier */ __u64 k_offset; __u8 k_type; }; struct cpu_key { struct in_core_key on_disk_key; int version; /* 3 in all cases but direct2indirect and indirect2direct conversion */ int key_length; }; /* * Our function for comparing keys can compare keys of different * lengths. It takes as a parameter the length of the keys it is to * compare. These defines are used in determining what is to be passed * to it as that parameter. */ #define REISERFS_FULL_KEY_LEN 4 #define REISERFS_SHORT_KEY_LEN 2 /* The result of the key compare */ #define FIRST_GREATER 1 #define SECOND_GREATER -1 #define KEYS_IDENTICAL 0 #define KEY_FOUND 1 #define KEY_NOT_FOUND 0 #define KEY_SIZE (sizeof(struct reiserfs_key)) /* return values for search_by_key and clones */ #define ITEM_FOUND 1 #define ITEM_NOT_FOUND 0 #define ENTRY_FOUND 1 #define ENTRY_NOT_FOUND 0 #define DIRECTORY_NOT_FOUND -1 #define REGULAR_FILE_FOUND -2 #define DIRECTORY_FOUND -3 #define BYTE_FOUND 1 #define BYTE_NOT_FOUND 0 #define FILE_NOT_FOUND -1 #define POSITION_FOUND 1 #define POSITION_NOT_FOUND 0 /* return values for reiserfs_find_entry and search_by_entry_key */ #define NAME_FOUND 1 #define NAME_NOT_FOUND 0 #define GOTO_PREVIOUS_ITEM 2 #define NAME_FOUND_INVISIBLE 3 /* * Everything in the filesystem is stored as a set of items. The * item head contains the key of the item, its free space (for * indirect items) and specifies the location of the item itself * within the block. */ struct item_head { /* * Everything in the tree is found by searching for it based on * its key. */ struct reiserfs_key ih_key; union { /* * The free space in the last unformatted node of an * indirect item if this is an indirect item. This * equals 0xFFFF iff this is a direct item or stat data * item. Note that the key, not this field, is used to * determine the item type, and thus which field this * union contains. */ __le16 ih_free_space_reserved; /* * Iff this is a directory item, this field equals the * number of directory entries in the directory item. */ __le16 ih_entry_count; } __attribute__ ((__packed__)) u; __le16 ih_item_len; /* total size of the item body */ /* an offset to the item body within the block */ __le16 ih_item_location; /* * 0 for all old items, 2 for new ones. Highest bit is set by fsck * temporary, cleaned after all done */ __le16 ih_version; } __attribute__ ((__packed__)); /* size of item header */ #define IH_SIZE (sizeof(struct item_head)) #define ih_free_space(ih) le16_to_cpu((ih)->u.ih_free_space_reserved) #define ih_version(ih) le16_to_cpu((ih)->ih_version) #define ih_entry_count(ih) le16_to_cpu((ih)->u.ih_entry_count) #define ih_location(ih) le16_to_cpu((ih)->ih_item_location) #define ih_item_len(ih) le16_to_cpu((ih)->ih_item_len) #define put_ih_free_space(ih, val) do { (ih)->u.ih_free_space_reserved = cpu_to_le16(val); } while(0) #define put_ih_version(ih, val) do { (ih)->ih_version = cpu_to_le16(val); } while (0) #define put_ih_entry_count(ih, val) do { (ih)->u.ih_entry_count = cpu_to_le16(val); } while (0) #define put_ih_location(ih, val) do { (ih)->ih_item_location = cpu_to_le16(val); } while (0) #define put_ih_item_len(ih, val) do { (ih)->ih_item_len = cpu_to_le16(val); } while (0) #define unreachable_item(ih) (ih_version(ih) & (1 << 15)) #define get_ih_free_space(ih) (ih_version (ih) == KEY_FORMAT_3_6 ? 0 : ih_free_space (ih)) #define set_ih_free_space(ih,val) put_ih_free_space((ih), ((ih_version(ih) == KEY_FORMAT_3_6) ? 0 : (val))) /* * these operate on indirect items, where you've got an array of ints * at a possibly unaligned location. These are a noop on ia32 * * p is the array of __u32, i is the index into the array, v is the value * to store there. */ #define get_block_num(p, i) get_unaligned_le32((p) + (i)) #define put_block_num(p, i, v) put_unaligned_le32((v), (p) + (i)) /* * in old version uniqueness field shows key type */ #define V1_SD_UNIQUENESS 0 #define V1_INDIRECT_UNIQUENESS 0xfffffffe #define V1_DIRECT_UNIQUENESS 0xffffffff #define V1_DIRENTRY_UNIQUENESS 500 #define V1_ANY_UNIQUENESS 555 /* FIXME: comment is required */ /* here are conversion routines */ static inline int uniqueness2type(__u32 uniqueness) CONSTF; static inline int uniqueness2type(__u32 uniqueness) { switch ((int)uniqueness) { case V1_SD_UNIQUENESS: return TYPE_STAT_DATA; case V1_INDIRECT_UNIQUENESS: return TYPE_INDIRECT; case V1_DIRECT_UNIQUENESS: return TYPE_DIRECT; case V1_DIRENTRY_UNIQUENESS: return TYPE_DIRENTRY; case V1_ANY_UNIQUENESS: default: return TYPE_ANY; } } static inline __u32 type2uniqueness(int type) CONSTF; static inline __u32 type2uniqueness(int type) { switch (type) { case TYPE_STAT_DATA: return V1_SD_UNIQUENESS; case TYPE_INDIRECT: return V1_INDIRECT_UNIQUENESS; case TYPE_DIRECT: return V1_DIRECT_UNIQUENESS; case TYPE_DIRENTRY: return V1_DIRENTRY_UNIQUENESS; case TYPE_ANY: default: return V1_ANY_UNIQUENESS; } } /* * key is pointer to on disk key which is stored in le, result is cpu, * there is no way to get version of object from key, so, provide * version to these defines */ static inline loff_t le_key_k_offset(int version, const struct reiserfs_key *key) { return (version == KEY_FORMAT_3_5) ? le32_to_cpu(key->u.k_offset_v1.k_offset) : offset_v2_k_offset(&(key->u.k_offset_v2)); } static inline loff_t le_ih_k_offset(const struct item_head *ih) { return le_key_k_offset(ih_version(ih), &(ih->ih_key)); } static inline loff_t le_key_k_type(int version, const struct reiserfs_key *key) { if (version == KEY_FORMAT_3_5) { loff_t val = le32_to_cpu(key->u.k_offset_v1.k_uniqueness); return uniqueness2type(val); } else return offset_v2_k_type(&(key->u.k_offset_v2)); } static inline loff_t le_ih_k_type(const struct item_head *ih) { return le_key_k_type(ih_version(ih), &(ih->ih_key)); } static inline void set_le_key_k_offset(int version, struct reiserfs_key *key, loff_t offset) { if (version == KEY_FORMAT_3_5) key->u.k_offset_v1.k_offset = cpu_to_le32(offset); else set_offset_v2_k_offset(&key->u.k_offset_v2, offset); } static inline void add_le_key_k_offset(int version, struct reiserfs_key *key, loff_t offset) { set_le_key_k_offset(version, key, le_key_k_offset(version, key) + offset); } static inline void add_le_ih_k_offset(struct item_head *ih, loff_t offset) { add_le_key_k_offset(ih_version(ih), &(ih->ih_key), offset); } static inline void set_le_ih_k_offset(struct item_head *ih, loff_t offset) { set_le_key_k_offset(ih_version(ih), &(ih->ih_key), offset); } static inline void set_le_key_k_type(int version, struct reiserfs_key *key, int type) { if (version == KEY_FORMAT_3_5) { type = type2uniqueness(type); key->u.k_offset_v1.k_uniqueness = cpu_to_le32(type); } else set_offset_v2_k_type(&key->u.k_offset_v2, type); } static inline void set_le_ih_k_type(struct item_head *ih, int type) { set_le_key_k_type(ih_version(ih), &(ih->ih_key), type); } static inline int is_direntry_le_key(int version, struct reiserfs_key *key) { return le_key_k_type(version, key) == TYPE_DIRENTRY; } static inline int is_direct_le_key(int version, struct reiserfs_key *key) { return le_key_k_type(version, key) == TYPE_DIRECT; } static inline int is_indirect_le_key(int version, struct reiserfs_key *key) { return le_key_k_type(version, key) == TYPE_INDIRECT; } static inline int is_statdata_le_key(int version, struct reiserfs_key *key) { return le_key_k_type(version, key) == TYPE_STAT_DATA; } /* item header has version. */ static inline int is_direntry_le_ih(struct item_head *ih) { return is_direntry_le_key(ih_version(ih), &ih->ih_key); } static inline int is_direct_le_ih(struct item_head *ih) { return is_direct_le_key(ih_version(ih), &ih->ih_key); } static inline int is_indirect_le_ih(struct item_head *ih) { return is_indirect_le_key(ih_version(ih), &ih->ih_key); } static inline int is_statdata_le_ih(struct item_head *ih) { return is_statdata_le_key(ih_version(ih), &ih->ih_key); } /* key is pointer to cpu key, result is cpu */ static inline loff_t cpu_key_k_offset(const struct cpu_key *key) { return key->on_disk_key.k_offset; } static inline loff_t cpu_key_k_type(const struct cpu_key *key) { return key->on_disk_key.k_type; } static inline void set_cpu_key_k_offset(struct cpu_key *key, loff_t offset) { key->on_disk_key.k_offset = offset; } static inline void set_cpu_key_k_type(struct cpu_key *key, int type) { key->on_disk_key.k_type = type; } static inline void cpu_key_k_offset_dec(struct cpu_key *key) { key->on_disk_key.k_offset--; } #define is_direntry_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRENTRY) #define is_direct_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRECT) #define is_indirect_cpu_key(key) (cpu_key_k_type (key) == TYPE_INDIRECT) #define is_statdata_cpu_key(key) (cpu_key_k_type (key) == TYPE_STAT_DATA) /* are these used ? */ #define is_direntry_cpu_ih(ih) (is_direntry_cpu_key (&((ih)->ih_key))) #define is_direct_cpu_ih(ih) (is_direct_cpu_key (&((ih)->ih_key))) #define is_indirect_cpu_ih(ih) (is_indirect_cpu_key (&((ih)->ih_key))) #define is_statdata_cpu_ih(ih) (is_statdata_cpu_key (&((ih)->ih_key))) #define I_K_KEY_IN_ITEM(ih, key, n_blocksize) \ (!COMP_SHORT_KEYS(ih, key) && \ I_OFF_BYTE_IN_ITEM(ih, k_offset(key), n_blocksize)) /* maximal length of item */ #define MAX_ITEM_LEN(block_size) (block_size - BLKH_SIZE - IH_SIZE) #define MIN_ITEM_LEN 1 /* object identifier for root dir */ #define REISERFS_ROOT_OBJECTID 2 #define REISERFS_ROOT_PARENT_OBJECTID 1 extern struct reiserfs_key root_key; /* * Picture represents a leaf of the S+tree * ______________________________________________________ * | | Array of | | | * |Block | Object-Item | F r e e | Objects- | * | head | Headers | S p a c e | Items | * |______|_______________|___________________|___________| */ /* * Header of a disk block. More precisely, header of a formatted leaf * or internal node, and not the header of an unformatted node. */ struct block_head { __le16 blk_level; /* Level of a block in the tree. */ __le16 blk_nr_item; /* Number of keys/items in a block. */ __le16 blk_free_space; /* Block free space in bytes. */ __le16 blk_reserved; /* dump this in v4/planA */ /* kept only for compatibility */ struct reiserfs_key blk_right_delim_key; }; #define BLKH_SIZE (sizeof(struct block_head)) #define blkh_level(p_blkh) (le16_to_cpu((p_blkh)->blk_level)) #define blkh_nr_item(p_blkh) (le16_to_cpu((p_blkh)->blk_nr_item)) #define blkh_free_space(p_blkh) (le16_to_cpu((p_blkh)->blk_free_space)) #define blkh_reserved(p_blkh) (le16_to_cpu((p_blkh)->blk_reserved)) #define set_blkh_level(p_blkh,val) ((p_blkh)->blk_level = cpu_to_le16(val)) #define set_blkh_nr_item(p_blkh,val) ((p_blkh)->blk_nr_item = cpu_to_le16(val)) #define set_blkh_free_space(p_blkh,val) ((p_blkh)->blk_free_space = cpu_to_le16(val)) #define set_blkh_reserved(p_blkh,val) ((p_blkh)->blk_reserved = cpu_to_le16(val)) #define blkh_right_delim_key(p_blkh) ((p_blkh)->blk_right_delim_key) #define set_blkh_right_delim_key(p_blkh,val) ((p_blkh)->blk_right_delim_key = val) /* values for blk_level field of the struct block_head */ /* * When node gets removed from the tree its blk_level is set to FREE_LEVEL. * It is then used to see whether the node is still in the tree */ #define FREE_LEVEL 0 #define DISK_LEAF_NODE_LEVEL 1 /* Leaf node level. */ /* * Given the buffer head of a formatted node, resolve to the * block head of that node. */ #define B_BLK_HEAD(bh) ((struct block_head *)((bh)->b_data)) /* Number of items that are in buffer. */ #define B_NR_ITEMS(bh) (blkh_nr_item(B_BLK_HEAD(bh))) #define B_LEVEL(bh) (blkh_level(B_BLK_HEAD(bh))) #define B_FREE_SPACE(bh) (blkh_free_space(B_BLK_HEAD(bh))) #define PUT_B_NR_ITEMS(bh, val) do { set_blkh_nr_item(B_BLK_HEAD(bh), val); } while (0) #define PUT_B_LEVEL(bh, val) do { set_blkh_level(B_BLK_HEAD(bh), val); } while (0) #define PUT_B_FREE_SPACE(bh, val) do { set_blkh_free_space(B_BLK_HEAD(bh), val); } while (0) /* Get right delimiting key. -- little endian */ #define B_PRIGHT_DELIM_KEY(bh) (&(blk_right_delim_key(B_BLK_HEAD(bh)))) /* Does the buffer contain a disk leaf. */ #define B_IS_ITEMS_LEVEL(bh) (B_LEVEL(bh) == DISK_LEAF_NODE_LEVEL) /* Does the buffer contain a disk internal node */ #define B_IS_KEYS_LEVEL(bh) (B_LEVEL(bh) > DISK_LEAF_NODE_LEVEL \ && B_LEVEL(bh) <= MAX_HEIGHT) /*************************************************************************** * STAT DATA * ***************************************************************************/ /* * old stat data is 32 bytes long. We are going to distinguish new one by * different size */ struct stat_data_v1 { __le16 sd_mode; /* file type, permissions */ __le16 sd_nlink; /* number of hard links */ __le16 sd_uid; /* owner */ __le16 sd_gid; /* group */ __le32 sd_size; /* file size */ __le32 sd_atime; /* time of last access */ __le32 sd_mtime; /* time file was last modified */ /* * time inode (stat data) was last changed * (except changes to sd_atime and sd_mtime) */ __le32 sd_ctime; union { __le32 sd_rdev; __le32 sd_blocks; /* number of blocks file uses */ } __attribute__ ((__packed__)) u; /* * first byte of file which is stored in a direct item: except that if * it equals 1 it is a symlink and if it equals ~(__u32)0 there is no * direct item. The existence of this field really grates on me. * Let's replace it with a macro based on sd_size and our tail * suppression policy. Someday. -Hans */ __le32 sd_first_direct_byte; } __attribute__ ((__packed__)); #define SD_V1_SIZE (sizeof(struct stat_data_v1)) #define stat_data_v1(ih) (ih_version (ih) == KEY_FORMAT_3_5) #define sd_v1_mode(sdp) (le16_to_cpu((sdp)->sd_mode)) #define set_sd_v1_mode(sdp,v) ((sdp)->sd_mode = cpu_to_le16(v)) #define sd_v1_nlink(sdp) (le16_to_cpu((sdp)->sd_nlink)) #define set_sd_v1_nlink(sdp,v) ((sdp)->sd_nlink = cpu_to_le16(v)) #define sd_v1_uid(sdp) (le16_to_cpu((sdp)->sd_uid)) #define set_sd_v1_uid(sdp,v) ((sdp)->sd_uid = cpu_to_le16(v)) #define sd_v1_gid(sdp) (le16_to_cpu((sdp)->sd_gid)) #define set_sd_v1_gid(sdp,v) ((sdp)->sd_gid = cpu_to_le16(v)) #define sd_v1_size(sdp) (le32_to_cpu((sdp)->sd_size)) #define set_sd_v1_size(sdp,v) ((sdp)->sd_size = cpu_to_le32(v)) #define sd_v1_atime(sdp) (le32_to_cpu((sdp)->sd_atime)) #define set_sd_v1_atime(sdp,v) ((sdp)->sd_atime = cpu_to_le32(v)) #define sd_v1_mtime(sdp) (le32_to_cpu((sdp)->sd_mtime)) #define set_sd_v1_mtime(sdp,v) ((sdp)->sd_mtime = cpu_to_le32(v)) #define sd_v1_ctime(sdp) (le32_to_cpu((sdp)->sd_ctime)) #define set_sd_v1_ctime(sdp,v) ((sdp)->sd_ctime = cpu_to_le32(v)) #define sd_v1_rdev(sdp) (le32_to_cpu((sdp)->u.sd_rdev)) #define set_sd_v1_rdev(sdp,v) ((sdp)->u.sd_rdev = cpu_to_le32(v)) #define sd_v1_blocks(sdp) (le32_to_cpu((sdp)->u.sd_blocks)) #define set_sd_v1_blocks(sdp,v) ((sdp)->u.sd_blocks = cpu_to_le32(v)) #define sd_v1_first_direct_byte(sdp) \ (le32_to_cpu((sdp)->sd_first_direct_byte)) #define set_sd_v1_first_direct_byte(sdp,v) \ ((sdp)->sd_first_direct_byte = cpu_to_le32(v)) /* inode flags stored in sd_attrs (nee sd_reserved) */ /* * we want common flags to have the same values as in ext2, * so chattr(1) will work without problems */ #define REISERFS_IMMUTABLE_FL FS_IMMUTABLE_FL #define REISERFS_APPEND_FL FS_APPEND_FL #define REISERFS_SYNC_FL FS_SYNC_FL #define REISERFS_NOATIME_FL FS_NOATIME_FL #define REISERFS_NODUMP_FL FS_NODUMP_FL #define REISERFS_SECRM_FL FS_SECRM_FL #define REISERFS_UNRM_FL FS_UNRM_FL #define REISERFS_COMPR_FL FS_COMPR_FL #define REISERFS_NOTAIL_FL FS_NOTAIL_FL /* persistent flags that file inherits from the parent directory */ #define REISERFS_INHERIT_MASK ( REISERFS_IMMUTABLE_FL | \ REISERFS_SYNC_FL | \ REISERFS_NOATIME_FL | \ REISERFS_NODUMP_FL | \ REISERFS_SECRM_FL | \ REISERFS_COMPR_FL | \ REISERFS_NOTAIL_FL ) /* * Stat Data on disk (reiserfs version of UFS disk inode minus the * address blocks) */ struct stat_data { __le16 sd_mode; /* file type, permissions */ __le16 sd_attrs; /* persistent inode flags */ __le32 sd_nlink; /* number of hard links */ __le64 sd_size; /* file size */ __le32 sd_uid; /* owner */ __le32 sd_gid; /* group */ __le32 sd_atime; /* time of last access */ __le32 sd_mtime; /* time file was last modified */ /* * time inode (stat data) was last changed * (except changes to sd_atime and sd_mtime) */ __le32 sd_ctime; __le32 sd_blocks; union { __le32 sd_rdev; __le32 sd_generation; } __attribute__ ((__packed__)) u; } __attribute__ ((__packed__)); /* this is 44 bytes long */ #define SD_SIZE (sizeof(struct stat_data)) #define SD_V2_SIZE SD_SIZE #define stat_data_v2(ih) (ih_version (ih) == KEY_FORMAT_3_6) #define sd_v2_mode(sdp) (le16_to_cpu((sdp)->sd_mode)) #define set_sd_v2_mode(sdp,v) ((sdp)->sd_mode = cpu_to_le16(v)) /* sd_reserved */ /* set_sd_reserved */ #define sd_v2_nlink(sdp) (le32_to_cpu((sdp)->sd_nlink)) #define set_sd_v2_nlink(sdp,v) ((sdp)->sd_nlink = cpu_to_le32(v)) #define sd_v2_size(sdp) (le64_to_cpu((sdp)->sd_size)) #define set_sd_v2_size(sdp,v) ((sdp)->sd_size = cpu_to_le64(v)) #define sd_v2_uid(sdp) (le32_to_cpu((sdp)->sd_uid)) #define set_sd_v2_uid(sdp,v) ((sdp)->sd_uid = cpu_to_le32(v)) #define sd_v2_gid(sdp) (le32_to_cpu((sdp)->sd_gid)) #define set_sd_v2_gid(sdp,v) ((sdp)->sd_gid = cpu_to_le32(v)) #define sd_v2_atime(sdp) (le32_to_cpu((sdp)->sd_atime)) #define set_sd_v2_atime(sdp,v) ((sdp)->sd_atime = cpu_to_le32(v)) #define sd_v2_mtime(sdp) (le32_to_cpu((sdp)->sd_mtime)) #define set_sd_v2_mtime(sdp,v) ((sdp)->sd_mtime = cpu_to_le32(v)) #define sd_v2_ctime(sdp) (le32_to_cpu((sdp)->sd_ctime)) #define set_sd_v2_ctime(sdp,v) ((sdp)->sd_ctime = cpu_to_le32(v)) #define sd_v2_blocks(sdp) (le32_to_cpu((sdp)->sd_blocks)) #define set_sd_v2_blocks(sdp,v) ((sdp)->sd_blocks = cpu_to_le32(v)) #define sd_v2_rdev(sdp) (le32_to_cpu((sdp)->u.sd_rdev)) #define set_sd_v2_rdev(sdp,v) ((sdp)->u.sd_rdev = cpu_to_le32(v)) #define sd_v2_generation(sdp) (le32_to_cpu((sdp)->u.sd_generation)) #define set_sd_v2_generation(sdp,v) ((sdp)->u.sd_generation = cpu_to_le32(v)) #define sd_v2_attrs(sdp) (le16_to_cpu((sdp)->sd_attrs)) #define set_sd_v2_attrs(sdp,v) ((sdp)->sd_attrs = cpu_to_le16(v)) /*************************************************************************** * DIRECTORY STRUCTURE * ***************************************************************************/ /* * Picture represents the structure of directory items * ________________________________________________ * | Array of | | | | | | * | directory |N-1| N-2 | .... | 1st |0th| * | entry headers | | | | | | * |_______________|___|_____|________|_______|___| * <---- directory entries ------> * * First directory item has k_offset component 1. We store "." and ".." * in one item, always, we never split "." and ".." into differing * items. This makes, among other things, the code for removing * directories simpler. */ #define SD_OFFSET 0 #define SD_UNIQUENESS 0 #define DOT_OFFSET 1 #define DOT_DOT_OFFSET 2 #define DIRENTRY_UNIQUENESS 500 #define FIRST_ITEM_OFFSET 1 /* * Q: How to get key of object pointed to by entry from entry? * * A: Each directory entry has its header. This header has deh_dir_id * and deh_objectid fields, those are key of object, entry points to */ /* * NOT IMPLEMENTED: * Directory will someday contain stat data of object */ struct reiserfs_de_head { __le32 deh_offset; /* third component of the directory entry key */ /* * objectid of the parent directory of the object, that is referenced * by directory entry */ __le32 deh_dir_id; /* objectid of the object, that is referenced by directory entry */ __le32 deh_objectid; __le16 deh_location; /* offset of name in the whole item */ /* * whether 1) entry contains stat data (for future), and * 2) whether entry is hidden (unlinked) */ __le16 deh_state; } __attribute__ ((__packed__)); #define DEH_SIZE sizeof(struct reiserfs_de_head) #define deh_offset(p_deh) (le32_to_cpu((p_deh)->deh_offset)) #define deh_dir_id(p_deh) (le32_to_cpu((p_deh)->deh_dir_id)) #define deh_objectid(p_deh) (le32_to_cpu((p_deh)->deh_objectid)) #define deh_location(p_deh) (le16_to_cpu((p_deh)->deh_location)) #define deh_state(p_deh) (le16_to_cpu((p_deh)->deh_state)) #define put_deh_offset(p_deh,v) ((p_deh)->deh_offset = cpu_to_le32((v))) #define put_deh_dir_id(p_deh,v) ((p_deh)->deh_dir_id = cpu_to_le32((v))) #define put_deh_objectid(p_deh,v) ((p_deh)->deh_objectid = cpu_to_le32((v))) #define put_deh_location(p_deh,v) ((p_deh)->deh_location = cpu_to_le16((v))) #define put_deh_state(p_deh,v) ((p_deh)->deh_state = cpu_to_le16((v))) /* empty directory contains two entries "." and ".." and their headers */ #define EMPTY_DIR_SIZE \ (DEH_SIZE * 2 + ROUND_UP (sizeof(".") - 1) + ROUND_UP (sizeof("..") - 1)) /* old format directories have this size when empty */ #define EMPTY_DIR_SIZE_V1 (DEH_SIZE * 2 + 3) #define DEH_Statdata 0 /* not used now */ #define DEH_Visible 2 /* 64 bit systems (and the S/390) need to be aligned explicitly -jdm */ #if BITS_PER_LONG == 64 || defined(__s390__) || defined(__hppa__) # define ADDR_UNALIGNED_BITS (3) #endif /* * These are only used to manipulate deh_state. * Because of this, we'll use the ext2_ bit routines, * since they are little endian */ #ifdef ADDR_UNALIGNED_BITS # define aligned_address(addr) ((void *)((long)(addr) & ~((1UL << ADDR_UNALIGNED_BITS) - 1))) # define unaligned_offset(addr) (((int)((long)(addr) & ((1 << ADDR_UNALIGNED_BITS) - 1))) << 3) # define set_bit_unaligned(nr, addr) \ __test_and_set_bit_le((nr) + unaligned_offset(addr), aligned_address(addr)) # define clear_bit_unaligned(nr, addr) \ __test_and_clear_bit_le((nr) + unaligned_offset(addr), aligned_address(addr)) # define test_bit_unaligned(nr, addr) \ test_bit_le((nr) + unaligned_offset(addr), aligned_address(addr)) #else # define set_bit_unaligned(nr, addr) __test_and_set_bit_le(nr, addr) # define clear_bit_unaligned(nr, addr) __test_and_clear_bit_le(nr, addr) # define test_bit_unaligned(nr, addr) test_bit_le(nr, addr) #endif #define mark_de_with_sd(deh) set_bit_unaligned (DEH_Statdata, &((deh)->deh_state)) #define mark_de_without_sd(deh) clear_bit_unaligned (DEH_Statdata, &((deh)->deh_state)) #define mark_de_visible(deh) set_bit_unaligned (DEH_Visible, &((deh)->deh_state)) #define mark_de_hidden(deh) clear_bit_unaligned (DEH_Visible, &((deh)->deh_state)) #define de_with_sd(deh) test_bit_unaligned (DEH_Statdata, &((deh)->deh_state)) #define de_visible(deh) test_bit_unaligned (DEH_Visible, &((deh)->deh_state)) #define de_hidden(deh) !test_bit_unaligned (DEH_Visible, &((deh)->deh_state)) extern void make_empty_dir_item_v1(char *body, __le32 dirid, __le32 objid, __le32 par_dirid, __le32 par_objid); extern void make_empty_dir_item(char *body, __le32 dirid, __le32 objid, __le32 par_dirid, __le32 par_objid); /* two entries per block (at least) */ #define REISERFS_MAX_NAME(block_size) 255 /* * this structure is used for operations on directory entries. It is * not a disk structure. * * When reiserfs_find_entry or search_by_entry_key find directory * entry, they return filled reiserfs_dir_entry structure */ struct reiserfs_dir_entry { struct buffer_head *de_bh; int de_item_num; struct item_head *de_ih; int de_entry_num; struct reiserfs_de_head *de_deh; int de_entrylen; int de_namelen; char *de_name; unsigned long *de_gen_number_bit_string; __u32 de_dir_id; __u32 de_objectid; struct cpu_key de_entry_key; }; /* * these defines are useful when a particular member of * a reiserfs_dir_entry is needed */ /* pointer to file name, stored in entry */ #define B_I_DEH_ENTRY_FILE_NAME(bh, ih, deh) \ (ih_item_body(bh, ih) + deh_location(deh)) /* length of name */ #define I_DEH_N_ENTRY_FILE_NAME_LENGTH(ih,deh,entry_num) \ (I_DEH_N_ENTRY_LENGTH (ih, deh, entry_num) - (de_with_sd (deh) ? SD_SIZE : 0)) /* hash value occupies bits from 7 up to 30 */ #define GET_HASH_VALUE(offset) ((offset) & 0x7fffff80LL) /* generation number occupies 7 bits starting from 0 up to 6 */ #define GET_GENERATION_NUMBER(offset) ((offset) & 0x7fLL) #define MAX_GENERATION_NUMBER 127 #define SET_GENERATION_NUMBER(offset,gen_number) (GET_HASH_VALUE(offset)|(gen_number)) /* * Picture represents an internal node of the reiserfs tree * ______________________________________________________ * | | Array of | Array of | Free | * |block | keys | pointers | space | * | head | N | N+1 | | * |______|_______________|___________________|___________| */ /*************************************************************************** * DISK CHILD * ***************************************************************************/ /* * Disk child pointer: * The pointer from an internal node of the tree to a node that is on disk. */ struct disk_child { __le32 dc_block_number; /* Disk child's block number. */ __le16 dc_size; /* Disk child's used space. */ __le16 dc_reserved; }; #define DC_SIZE (sizeof(struct disk_child)) #define dc_block_number(dc_p) (le32_to_cpu((dc_p)->dc_block_number)) #define dc_size(dc_p) (le16_to_cpu((dc_p)->dc_size)) #define put_dc_block_number(dc_p, val) do { (dc_p)->dc_block_number = cpu_to_le32(val); } while(0) #define put_dc_size(dc_p, val) do { (dc_p)->dc_size = cpu_to_le16(val); } while(0) /* Get disk child by buffer header and position in the tree node. */ #define B_N_CHILD(bh, n_pos) ((struct disk_child *)\ ((bh)->b_data + BLKH_SIZE + B_NR_ITEMS(bh) * KEY_SIZE + DC_SIZE * (n_pos))) /* Get disk child number by buffer header and position in the tree node. */ #define B_N_CHILD_NUM(bh, n_pos) (dc_block_number(B_N_CHILD(bh, n_pos))) #define PUT_B_N_CHILD_NUM(bh, n_pos, val) \ (put_dc_block_number(B_N_CHILD(bh, n_pos), val)) /* maximal value of field child_size in structure disk_child */ /* child size is the combined size of all items and their headers */ #define MAX_CHILD_SIZE(bh) ((int)( (bh)->b_size - BLKH_SIZE )) /* amount of used space in buffer (not including block head) */ #define B_CHILD_SIZE(cur) (MAX_CHILD_SIZE(cur)-(B_FREE_SPACE(cur))) /* max and min number of keys in internal node */ #define MAX_NR_KEY(bh) ( (MAX_CHILD_SIZE(bh)-DC_SIZE)/(KEY_SIZE+DC_SIZE) ) #define MIN_NR_KEY(bh) (MAX_NR_KEY(bh)/2) /*************************************************************************** * PATH STRUCTURES AND DEFINES * ***************************************************************************/ /* * search_by_key fills up the path from the root to the leaf as it descends * the tree looking for the key. It uses reiserfs_bread to try to find * buffers in the cache given their block number. If it does not find * them in the cache it reads them from disk. For each node search_by_key * finds using reiserfs_bread it then uses bin_search to look through that * node. bin_search will find the position of the block_number of the next * node if it is looking through an internal node. If it is looking through * a leaf node bin_search will find the position of the item which has key * either equal to given key, or which is the maximal key less than the * given key. */ struct path_element { /* Pointer to the buffer at the path in the tree. */ struct buffer_head *pe_buffer; /* Position in the tree node which is placed in the buffer above. */ int pe_position; }; /* * maximal height of a tree. don't change this without * changing JOURNAL_PER_BALANCE_CNT */ #define MAX_HEIGHT 5 /* Must be equals MAX_HEIGHT + FIRST_PATH_ELEMENT_OFFSET */ #define EXTENDED_MAX_HEIGHT 7 /* Must be equal to at least 2. */ #define FIRST_PATH_ELEMENT_OFFSET 2 /* Must be equal to FIRST_PATH_ELEMENT_OFFSET - 1 */ #define ILLEGAL_PATH_ELEMENT_OFFSET 1 /* this MUST be MAX_HEIGHT + 1. See about FEB below */ #define MAX_FEB_SIZE 6 /* * We need to keep track of who the ancestors of nodes are. When we * perform a search we record which nodes were visited while * descending the tree looking for the node we searched for. This list * of nodes is called the path. This information is used while * performing balancing. Note that this path information may become * invalid, and this means we must check it when using it to see if it * is still valid. You'll need to read search_by_key and the comments * in it, especially about decrement_counters_in_path(), to understand * this structure. * * Paths make the code so much harder to work with and debug.... An * enormous number of bugs are due to them, and trying to write or modify * code that uses them just makes my head hurt. They are based on an * excessive effort to avoid disturbing the precious VFS code.:-( The * gods only know how we are going to SMP the code that uses them. * znodes are the way! */ #define PATH_READA 0x1 /* do read ahead */ #define PATH_READA_BACK 0x2 /* read backwards */ struct treepath { int path_length; /* Length of the array above. */ int reada; /* Array of the path elements. */ struct path_element path_elements[EXTENDED_MAX_HEIGHT]; int pos_in_item; }; #define pos_in_item(path) ((path)->pos_in_item) #define INITIALIZE_PATH(var) \ struct treepath var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,} /* Get path element by path and path position. */ #define PATH_OFFSET_PELEMENT(path, n_offset) ((path)->path_elements + (n_offset)) /* Get buffer header at the path by path and path position. */ #define PATH_OFFSET_PBUFFER(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_buffer) /* Get position in the element at the path by path and path position. */ #define PATH_OFFSET_POSITION(path, n_offset) (PATH_OFFSET_PELEMENT(path, n_offset)->pe_position) #define PATH_PLAST_BUFFER(path) (PATH_OFFSET_PBUFFER((path), (path)->path_length)) /* * you know, to the person who didn't write this the macro name does not * at first suggest what it does. Maybe POSITION_FROM_PATH_END? Or * maybe we should just focus on dumping paths... -Hans */ #define PATH_LAST_POSITION(path) (PATH_OFFSET_POSITION((path), (path)->path_length)) /* * in do_balance leaf has h == 0 in contrast with path structure, * where root has level == 0. That is why we need these defines */ /* tb->S[h] */ #define PATH_H_PBUFFER(path, h) \ PATH_OFFSET_PBUFFER(path, path->path_length - (h)) /* tb->F[h] or tb->S[0]->b_parent */ #define PATH_H_PPARENT(path, h) PATH_H_PBUFFER(path, (h) + 1) #define PATH_H_POSITION(path, h) \ PATH_OFFSET_POSITION(path, path->path_length - (h)) /* tb->S[h]->b_item_order */ #define PATH_H_B_ITEM_ORDER(path, h) PATH_H_POSITION(path, h + 1) #define PATH_H_PATH_OFFSET(path, n_h) ((path)->path_length - (n_h)) static inline void *reiserfs_node_data(const struct buffer_head *bh) { return bh->b_data + sizeof(struct block_head); } /* get key from internal node */ static inline struct reiserfs_key *internal_key(struct buffer_head *bh, int item_num) { struct reiserfs_key *key = reiserfs_node_data(bh); return &key[item_num]; } /* get the item header from leaf node */ static inline struct item_head *item_head(const struct buffer_head *bh, int item_num) { struct item_head *ih = reiserfs_node_data(bh); return &ih[item_num]; } /* get the key from leaf node */ static inline struct reiserfs_key *leaf_key(const struct buffer_head *bh, int item_num) { return &item_head(bh, item_num)->ih_key; } static inline void *ih_item_body(const struct buffer_head *bh, const struct item_head *ih) { return bh->b_data + ih_location(ih); } /* get item body from leaf node */ static inline void *item_body(const struct buffer_head *bh, int item_num) { return ih_item_body(bh, item_head(bh, item_num)); } static inline struct item_head *tp_item_head(const struct treepath *path) { return item_head(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION(path)); } static inline void *tp_item_body(const struct treepath *path) { return item_body(PATH_PLAST_BUFFER(path), PATH_LAST_POSITION(path)); } #define get_last_bh(path) PATH_PLAST_BUFFER(path) #define get_item_pos(path) PATH_LAST_POSITION(path) #define item_moved(ih,path) comp_items(ih, path) #define path_changed(ih,path) comp_items (ih, path) /* array of the entry headers */ /* get item body */ #define B_I_DEH(bh, ih) ((struct reiserfs_de_head *)(ih_item_body(bh, ih))) /* * length of the directory entry in directory item. This define * calculates length of i-th directory entry using directory entry * locations from dir entry head. When it calculates length of 0-th * directory entry, it uses length of whole item in place of entry * location of the non-existent following entry in the calculation. * See picture above. */ static inline int entry_length(const struct buffer_head *bh, const struct item_head *ih, int pos_in_item) { struct reiserfs_de_head *deh; deh = B_I_DEH(bh, ih) + pos_in_item; if (pos_in_item) return deh_location(deh - 1) - deh_location(deh); return ih_item_len(ih) - deh_location(deh); } /*************************************************************************** * MISC * ***************************************************************************/ /* Size of pointer to the unformatted node. */ #define UNFM_P_SIZE (sizeof(unp_t)) #define UNFM_P_SHIFT 2 /* in in-core inode key is stored on le form */ #define INODE_PKEY(inode) ((struct reiserfs_key *)(REISERFS_I(inode)->i_key)) #define MAX_UL_INT 0xffffffff #define MAX_INT 0x7ffffff #define MAX_US_INT 0xffff // reiserfs version 2 has max offset 60 bits. Version 1 - 32 bit offset static inline loff_t max_reiserfs_offset(struct inode *inode) { if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5) return (loff_t) U32_MAX; return (loff_t) ((~(__u64) 0) >> 4); } #define MAX_KEY_OBJECTID MAX_UL_INT #define MAX_B_NUM MAX_UL_INT #define MAX_FC_NUM MAX_US_INT /* the purpose is to detect overflow of an unsigned short */ #define REISERFS_LINK_MAX (MAX_US_INT - 1000) /* * The following defines are used in reiserfs_insert_item * and reiserfs_append_item */ #define REISERFS_KERNEL_MEM 0 /* kernel memory mode */ #define REISERFS_USER_MEM 1 /* user memory mode */ #define fs_generation(s) (REISERFS_SB(s)->s_generation_counter) #define get_generation(s) atomic_read (&fs_generation(s)) #define FILESYSTEM_CHANGED_TB(tb) (get_generation((tb)->tb_sb) != (tb)->fs_gen) #define __fs_changed(gen,s) (gen != get_generation (s)) #define fs_changed(gen,s) \ ({ \ reiserfs_cond_resched(s); \ __fs_changed(gen, s); \ }) /*************************************************************************** * FIXATE NODES * ***************************************************************************/ #define VI_TYPE_LEFT_MERGEABLE 1 #define VI_TYPE_RIGHT_MERGEABLE 2 /* * To make any changes in the tree we always first find node, that * contains item to be changed/deleted or place to insert a new * item. We call this node S. To do balancing we need to decide what * we will shift to left/right neighbor, or to a new node, where new * item will be etc. To make this analysis simpler we build virtual * node. Virtual node is an array of items, that will replace items of * node S. (For instance if we are going to delete an item, virtual * node does not contain it). Virtual node keeps information about * item sizes and types, mergeability of first and last items, sizes * of all entries in directory item. We use this array of items when * calculating what we can shift to neighbors and how many nodes we * have to have if we do not any shiftings, if we shift to left/right * neighbor or to both. */ struct virtual_item { int vi_index; /* index in the array of item operations */ unsigned short vi_type; /* left/right mergeability */ /* length of item that it will have after balancing */ unsigned short vi_item_len; struct item_head *vi_ih; const char *vi_item; /* body of item (old or new) */ const void *vi_new_data; /* 0 always but paste mode */ void *vi_uarea; /* item specific area */ }; struct virtual_node { /* this is a pointer to the free space in the buffer */ char *vn_free_ptr; unsigned short vn_nr_item; /* number of items in virtual node */ /* * size of node , that node would have if it has * unlimited size and no balancing is performed */ short vn_size; /* mode of balancing (paste, insert, delete, cut) */ short vn_mode; short vn_affected_item_num; short vn_pos_in_item; /* item header of inserted item, 0 for other modes */ struct item_head *vn_ins_ih; const void *vn_data; /* array of items (including a new one, excluding item to be deleted) */ struct virtual_item *vn_vi; }; /* used by directory items when creating virtual nodes */ struct direntry_uarea { int flags; __u16 entry_count; __u16 entry_sizes[]; } __attribute__ ((__packed__)); /*************************************************************************** * TREE BALANCE * ***************************************************************************/ /* * This temporary structure is used in tree balance algorithms, and * constructed as we go to the extent that its various parts are * needed. It contains arrays of nodes that can potentially be * involved in the balancing of node S, and parameters that define how * each of the nodes must be balanced. Note that in these algorithms * for balancing the worst case is to need to balance the current node * S and the left and right neighbors and all of their parents plus * create a new node. We implement S1 balancing for the leaf nodes * and S0 balancing for the internal nodes (S1 and S0 are defined in * our papers.) */ /* size of the array of buffers to free at end of do_balance */ #define MAX_FREE_BLOCK 7 /* maximum number of FEB blocknrs on a single level */ #define MAX_AMOUNT_NEEDED 2 /* someday somebody will prefix every field in this struct with tb_ */ struct tree_balance { int tb_mode; int need_balance_dirty; struct super_block *tb_sb; struct reiserfs_transaction_handle *transaction_handle; struct treepath *tb_path; /* array of left neighbors of nodes in the path */ struct buffer_head *L[MAX_HEIGHT]; /* array of right neighbors of nodes in the path */ struct buffer_head *R[MAX_HEIGHT]; /* array of fathers of the left neighbors */ struct buffer_head *FL[MAX_HEIGHT]; /* array of fathers of the right neighbors */ struct buffer_head *FR[MAX_HEIGHT]; /* array of common parents of center node and its left neighbor */ struct buffer_head *CFL[MAX_HEIGHT]; /* array of common parents of center node and its right neighbor */ struct buffer_head *CFR[MAX_HEIGHT]; /* * array of empty buffers. Number of buffers in array equals * cur_blknum. */ struct buffer_head *FEB[MAX_FEB_SIZE]; struct buffer_head *used[MAX_FEB_SIZE]; struct buffer_head *thrown[MAX_FEB_SIZE]; /* * array of number of items which must be shifted to the left in * order to balance the current node; for leaves includes item that * will be partially shifted; for internal nodes, it is the number * of child pointers rather than items. It includes the new item * being created. The code sometimes subtracts one to get the * number of wholly shifted items for other purposes. */ int lnum[MAX_HEIGHT]; /* substitute right for left in comment above */ int rnum[MAX_HEIGHT]; /* * array indexed by height h mapping the key delimiting L[h] and * S[h] to its item number within the node CFL[h] */ int lkey[MAX_HEIGHT]; /* substitute r for l in comment above */ int rkey[MAX_HEIGHT]; /* * the number of bytes by we are trying to add or remove from * S[h]. A negative value means removing. */ int insert_size[MAX_HEIGHT]; /* * number of nodes that will replace node S[h] after balancing * on the level h of the tree. If 0 then S is being deleted, * if 1 then S is remaining and no new nodes are being created, * if 2 or 3 then 1 or 2 new nodes is being created */ int blknum[MAX_HEIGHT]; /* fields that are used only for balancing leaves of the tree */ /* number of empty blocks having been already allocated */ int cur_blknum; /* number of items that fall into left most node when S[0] splits */ int s0num; /* * number of bytes which can flow to the left neighbor from the left * most liquid item that cannot be shifted from S[0] entirely * if -1 then nothing will be partially shifted */ int lbytes; /* * number of bytes which will flow to the right neighbor from the right * most liquid item that cannot be shifted from S[0] entirely * if -1 then nothing will be partially shifted */ int rbytes; /* * index into the array of item headers in * S[0] of the affected item */ int item_pos; /* new nodes allocated to hold what could not fit into S */ struct buffer_head *S_new[2]; /* * number of items that will be placed into nodes in S_new * when S[0] splits */ int snum[2]; /* * number of bytes which flow to nodes in S_new when S[0] splits * note: if S[0] splits into 3 nodes, then items do not need to be cut */ int sbytes[2]; int pos_in_item; int zeroes_num; /* * buffers which are to be freed after do_balance finishes * by unfix_nodes */ struct buffer_head *buf_to_free[MAX_FREE_BLOCK]; /* * kmalloced memory. Used to create virtual node and keep * map of dirtied bitmap blocks */ char *vn_buf; int vn_buf_size; /* size of the vn_buf */ /* VN starts after bitmap of bitmap blocks */ struct virtual_node *tb_vn; /* * saved value of `reiserfs_generation' counter see * FILESYSTEM_CHANGED() macro in reiserfs_fs.h */ int fs_gen; #ifdef DISPLACE_NEW_PACKING_LOCALITIES /* * key pointer, to pass to block allocator or * another low-level subsystem */ struct in_core_key key; #endif }; /* These are modes of balancing */ /* When inserting an item. */ #define M_INSERT 'i' /* * When inserting into (directories only) or appending onto an already * existent item. */ #define M_PASTE 'p' /* When deleting an item. */ #define M_DELETE 'd' /* When truncating an item or removing an entry from a (directory) item. */ #define M_CUT 'c' /* used when balancing on leaf level skipped (in reiserfsck) */ #define M_INTERNAL 'n' /* * When further balancing is not needed, then do_balance does not need * to be called. */ #define M_SKIP_BALANCING 's' #define M_CONVERT 'v' /* modes of leaf_move_items */ #define LEAF_FROM_S_TO_L 0 #define LEAF_FROM_S_TO_R 1 #define LEAF_FROM_R_TO_L 2 #define LEAF_FROM_L_TO_R 3 #define LEAF_FROM_S_TO_SNEW 4 #define FIRST_TO_LAST 0 #define LAST_TO_FIRST 1 /* * used in do_balance for passing parent of node information that has * been gotten from tb struct */ struct buffer_info { struct tree_balance *tb; struct buffer_head *bi_bh; struct buffer_head *bi_parent; int bi_position; }; static inline struct super_block *sb_from_tb(struct tree_balance *tb) { return tb ? tb->tb_sb : NULL; } static inline struct super_block *sb_from_bi(struct buffer_info *bi) { return bi ? sb_from_tb(bi->tb) : NULL; } /* * there are 4 types of items: stat data, directory item, indirect, direct. * +-------------------+------------+--------------+------------+ * | | k_offset | k_uniqueness | mergeable? | * +-------------------+------------+--------------+------------+ * | stat data | 0 | 0 | no | * +-------------------+------------+--------------+------------+ * | 1st directory item| DOT_OFFSET | DIRENTRY_ .. | no | * | non 1st directory | hash value | UNIQUENESS | yes | * | item | | | | * +-------------------+------------+--------------+------------+ * | indirect item | offset + 1 |TYPE_INDIRECT | [1] | * +-------------------+------------+--------------+------------+ * | direct item | offset + 1 |TYPE_DIRECT | [2] | * +-------------------+------------+--------------+------------+ * * [1] if this is not the first indirect item of the object * [2] if this is not the first direct item of the object */ struct item_operations { int (*bytes_number) (struct item_head * ih, int block_size); void (*decrement_key) (struct cpu_key *); int (*is_left_mergeable) (struct reiserfs_key * ih, unsigned long bsize); void (*print_item) (struct item_head *, char *item); void (*check_item) (struct item_head *, char *item); int (*create_vi) (struct virtual_node * vn, struct virtual_item * vi, int is_affected, int insert_size); int (*check_left) (struct virtual_item * vi, int free, int start_skip, int end_skip); int (*check_right) (struct virtual_item * vi, int free); int (*part_size) (struct virtual_item * vi, int from, int to); int (*unit_num) (struct virtual_item * vi); void (*print_vi) (struct virtual_item * vi); }; extern struct item_operations *item_ops[TYPE_ANY + 1]; #define op_bytes_number(ih,bsize) item_ops[le_ih_k_type (ih)]->bytes_number (ih, bsize) #define op_is_left_mergeable(key,bsize) item_ops[le_key_k_type (le_key_version (key), key)]->is_left_mergeable (key, bsize) #define op_print_item(ih,item) item_ops[le_ih_k_type (ih)]->print_item (ih, item) #define op_check_item(ih,item) item_ops[le_ih_k_type (ih)]->check_item (ih, item) #define op_create_vi(vn,vi,is_affected,insert_size) item_ops[le_ih_k_type ((vi)->vi_ih)]->create_vi (vn,vi,is_affected,insert_size) #define op_check_left(vi,free,start_skip,end_skip) item_ops[(vi)->vi_index]->check_left (vi, free, start_skip, end_skip) #define op_check_right(vi,free) item_ops[(vi)->vi_index]->check_right (vi, free) #define op_part_size(vi,from,to) item_ops[(vi)->vi_index]->part_size (vi, from, to) #define op_unit_num(vi) item_ops[(vi)->vi_index]->unit_num (vi) #define op_print_vi(vi) item_ops[(vi)->vi_index]->print_vi (vi) #define COMP_SHORT_KEYS comp_short_keys /* number of blocks pointed to by the indirect item */ #define I_UNFM_NUM(ih) (ih_item_len(ih) / UNFM_P_SIZE) /* * the used space within the unformatted node corresponding * to pos within the item pointed to by ih */ #define I_POS_UNFM_SIZE(ih,pos,size) (((pos) == I_UNFM_NUM(ih) - 1 ) ? (size) - ih_free_space(ih) : (size)) /* * number of bytes contained by the direct item or the * unformatted nodes the indirect item points to */ /* following defines use reiserfs buffer header and item header */ /* get stat-data */ #define B_I_STAT_DATA(bh, ih) ( (struct stat_data * )((bh)->b_data + ih_location(ih)) ) /* this is 3976 for size==4096 */ #define MAX_DIRECT_ITEM_LEN(size) ((size) - BLKH_SIZE - 2*IH_SIZE - SD_SIZE - UNFM_P_SIZE) /* * indirect items consist of entries which contain blocknrs, pos * indicates which entry, and B_I_POS_UNFM_POINTER resolves to the * blocknr contained by the entry pos points to */ #define B_I_POS_UNFM_POINTER(bh, ih, pos) \ le32_to_cpu(*(((unp_t *)ih_item_body(bh, ih)) + (pos))) #define PUT_B_I_POS_UNFM_POINTER(bh, ih, pos, val) \ (*(((unp_t *)ih_item_body(bh, ih)) + (pos)) = cpu_to_le32(val)) struct reiserfs_iget_args { __u32 objectid; __u32 dirid; }; /*************************************************************************** * FUNCTION DECLARATIONS * ***************************************************************************/ #define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12) #define journal_trans_half(blocksize) \ ((blocksize - sizeof(struct reiserfs_journal_desc) - 12) / sizeof(__u32)) /* journal.c see journal.c for all the comments here */ /* first block written in a commit. */ struct reiserfs_journal_desc { __le32 j_trans_id; /* id of commit */ /* length of commit. len +1 is the commit block */ __le32 j_len; __le32 j_mount_id; /* mount id of this trans */ __le32 j_realblock[]; /* real locations for each block */ }; #define get_desc_trans_id(d) le32_to_cpu((d)->j_trans_id) #define get_desc_trans_len(d) le32_to_cpu((d)->j_len) #define get_desc_mount_id(d) le32_to_cpu((d)->j_mount_id) #define set_desc_trans_id(d,val) do { (d)->j_trans_id = cpu_to_le32 (val); } while (0) #define set_desc_trans_len(d,val) do { (d)->j_len = cpu_to_le32 (val); } while (0) #define set_desc_mount_id(d,val) do { (d)->j_mount_id = cpu_to_le32 (val); } while (0) /* last block written in a commit */ struct reiserfs_journal_commit { __le32 j_trans_id; /* must match j_trans_id from the desc block */ __le32 j_len; /* ditto */ __le32 j_realblock[]; /* real locations for each block */ }; #define get_commit_trans_id(c) le32_to_cpu((c)->j_trans_id) #define get_commit_trans_len(c) le32_to_cpu((c)->j_len) #define get_commit_mount_id(c) le32_to_cpu((c)->j_mount_id) #define set_commit_trans_id(c,val) do { (c)->j_trans_id = cpu_to_le32 (val); } while (0) #define set_commit_trans_len(c,val) do { (c)->j_len = cpu_to_le32 (val); } while (0) /* * this header block gets written whenever a transaction is considered * fully flushed, and is more recent than the last fully flushed transaction. * fully flushed means all the log blocks and all the real blocks are on * disk, and this transaction does not need to be replayed. */ struct reiserfs_journal_header { /* id of last fully flushed transaction */ __le32 j_last_flush_trans_id; /* offset in the log of where to start replay after a crash */ __le32 j_first_unflushed_offset; __le32 j_mount_id; /* 12 */ struct journal_params jh_journal; }; /* biggest tunable defines are right here */ #define JOURNAL_BLOCK_COUNT 8192 /* number of blocks in the journal */ /* biggest possible single transaction, don't change for now (8/3/99) */ #define JOURNAL_TRANS_MAX_DEFAULT 1024 #define JOURNAL_TRANS_MIN_DEFAULT 256 /* * max blocks to batch into one transaction, * don't make this any bigger than 900 */ #define JOURNAL_MAX_BATCH_DEFAULT 900 #define JOURNAL_MIN_RATIO 2 #define JOURNAL_MAX_COMMIT_AGE 30 #define JOURNAL_MAX_TRANS_AGE 30 #define JOURNAL_PER_BALANCE_CNT (3 * (MAX_HEIGHT-2) + 9) #define JOURNAL_BLOCKS_PER_OBJECT(sb) (JOURNAL_PER_BALANCE_CNT * 3 + \ 2 * (REISERFS_QUOTA_INIT_BLOCKS(sb) + \ REISERFS_QUOTA_TRANS_BLOCKS(sb))) #ifdef CONFIG_QUOTA #define REISERFS_QUOTA_OPTS ((1 << REISERFS_USRQUOTA) | (1 << REISERFS_GRPQUOTA)) /* We need to update data and inode (atime) */ #define REISERFS_QUOTA_TRANS_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? 2 : 0) /* 1 balancing, 1 bitmap, 1 data per write + stat data update */ #define REISERFS_QUOTA_INIT_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? \ (DQUOT_INIT_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_INIT_REWRITE+1) : 0) /* same as with INIT */ #define REISERFS_QUOTA_DEL_BLOCKS(s) (REISERFS_SB(s)->s_mount_opt & REISERFS_QUOTA_OPTS ? \ (DQUOT_DEL_ALLOC*(JOURNAL_PER_BALANCE_CNT+2)+DQUOT_DEL_REWRITE+1) : 0) #else #define REISERFS_QUOTA_TRANS_BLOCKS(s) 0 #define REISERFS_QUOTA_INIT_BLOCKS(s) 0 #define REISERFS_QUOTA_DEL_BLOCKS(s) 0 #endif /* * both of these can be as low as 1, or as high as you want. The min is the * number of 4k bitmap nodes preallocated on mount. New nodes are allocated * as needed, and released when transactions are committed. On release, if * the current number of nodes is > max, the node is freed, otherwise, * it is put on a free list for faster use later. */ #define REISERFS_MIN_BITMAP_NODES 10 #define REISERFS_MAX_BITMAP_NODES 100 /* these are based on journal hash size of 8192 */ #define JBH_HASH_SHIFT 13 #define JBH_HASH_MASK 8191 #define _jhashfn(sb,block) \ (((unsigned long)sb>>L1_CACHE_SHIFT) ^ \ (((block)<<(JBH_HASH_SHIFT - 6)) ^ ((block) >> 13) ^ ((block) << (JBH_HASH_SHIFT - 12)))) #define journal_hash(t,sb,block) ((t)[_jhashfn((sb),(block)) & JBH_HASH_MASK]) /* We need these to make journal.c code more readable */ #define journal_find_get_block(s, block) __find_get_block(\ file_bdev(SB_JOURNAL(s)->j_bdev_file), block, s->s_blocksize) #define journal_getblk(s, block) __getblk(file_bdev(SB_JOURNAL(s)->j_bdev_file),\ block, s->s_blocksize) #define journal_bread(s, block) __bread(file_bdev(SB_JOURNAL(s)->j_bdev_file),\ block, s->s_blocksize) enum reiserfs_bh_state_bits { BH_JDirty = BH_PrivateStart, /* buffer is in current transaction */ BH_JDirty_wait, /* * disk block was taken off free list before being in a * finished transaction, or written to disk. Can be reused immed. */ BH_JNew, BH_JPrepared, BH_JRestore_dirty, BH_JTest, /* debugging only will go away */ }; BUFFER_FNS(JDirty, journaled); TAS_BUFFER_FNS(JDirty, journaled); BUFFER_FNS(JDirty_wait, journal_dirty); TAS_BUFFER_FNS(JDirty_wait, journal_dirty); BUFFER_FNS(JNew, journal_new); TAS_BUFFER_FNS(JNew, journal_new); BUFFER_FNS(JPrepared, journal_prepared); TAS_BUFFER_FNS(JPrepared, journal_prepared); BUFFER_FNS(JRestore_dirty, journal_restore_dirty); TAS_BUFFER_FNS(JRestore_dirty, journal_restore_dirty); BUFFER_FNS(JTest, journal_test); TAS_BUFFER_FNS(JTest, journal_test); /* transaction handle which is passed around for all journal calls */ struct reiserfs_transaction_handle { /* * super for this FS when journal_begin was called. saves calls to * reiserfs_get_super also used by nested transactions to make * sure they are nesting on the right FS _must_ be first * in the handle */ struct super_block *t_super; int t_refcount; int t_blocks_logged; /* number of blocks this writer has logged */ int t_blocks_allocated; /* number of blocks this writer allocated */ /* sanity check, equals the current trans id */ unsigned int t_trans_id; void *t_handle_save; /* save existing current->journal_info */ /* * if new block allocation occurres, that block * should be displaced from others */ unsigned displace_new_blocks:1; struct list_head t_list; }; /* * used to keep track of ordered and tail writes, attached to the buffer * head through b_journal_head. */ struct reiserfs_jh { struct reiserfs_journal_list *jl; struct buffer_head *bh; struct list_head list; }; void reiserfs_free_jh(struct buffer_head *bh); int reiserfs_add_tail_list(struct inode *inode, struct buffer_head *bh); int reiserfs_add_ordered_list(struct inode *inode, struct buffer_head *bh); int journal_mark_dirty(struct reiserfs_transaction_handle *, struct buffer_head *bh); static inline int reiserfs_file_data_log(struct inode *inode) { if (reiserfs_data_log(inode->i_sb) || (REISERFS_I(inode)->i_flags & i_data_log)) return 1; return 0; } static inline int reiserfs_transaction_running(struct super_block *s) { struct reiserfs_transaction_handle *th = current->journal_info; if (th && th->t_super == s) return 1; if (th && th->t_super == NULL) BUG(); return 0; } static inline int reiserfs_transaction_free_space(struct reiserfs_transaction_handle *th) { return th->t_blocks_allocated - th->t_blocks_logged; } struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct super_block *, int count); int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *); void reiserfs_vfs_truncate_file(struct inode *inode); int reiserfs_commit_page(struct inode *inode, struct page *page, unsigned from, unsigned to); void reiserfs_flush_old_commits(struct super_block *); int reiserfs_commit_for_inode(struct inode *); int reiserfs_inode_needs_commit(struct inode *); void reiserfs_update_inode_transaction(struct inode *); void reiserfs_wait_on_write_block(struct super_block *s); void reiserfs_block_writes(struct reiserfs_transaction_handle *th); void reiserfs_allow_writes(struct super_block *s); void reiserfs_check_lock_depth(struct super_block *s, char *caller); int reiserfs_prepare_for_journal(struct super_block *, struct buffer_head *bh, int wait); void reiserfs_restore_prepared_buffer(struct super_block *, struct buffer_head *bh); int journal_init(struct super_block *, const char *j_dev_name, int old_format, unsigned int); int journal_release(struct reiserfs_transaction_handle *, struct super_block *); int journal_release_error(struct reiserfs_transaction_handle *, struct super_block *); int journal_end(struct reiserfs_transaction_handle *); int journal_end_sync(struct reiserfs_transaction_handle *); int journal_mark_freed(struct reiserfs_transaction_handle *, struct super_block *, b_blocknr_t blocknr); int journal_transaction_should_end(struct reiserfs_transaction_handle *, int); int reiserfs_in_journal(struct super_block *sb, unsigned int bmap_nr, int bit_nr, int searchall, b_blocknr_t *next); int journal_begin(struct reiserfs_transaction_handle *, struct super_block *sb, unsigned long); int journal_join_abort(struct reiserfs_transaction_handle *, struct super_block *sb); void reiserfs_abort_journal(struct super_block *sb, int errno); void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...); int reiserfs_allocate_list_bitmaps(struct super_block *s, struct reiserfs_list_bitmap *, unsigned int); void reiserfs_schedule_old_flush(struct super_block *s); void reiserfs_cancel_old_flush(struct super_block *s); void add_save_link(struct reiserfs_transaction_handle *th, struct inode *inode, int truncate); int remove_save_link(struct inode *inode, int truncate); /* objectid.c */ __u32 reiserfs_get_unused_objectid(struct reiserfs_transaction_handle *th); void reiserfs_release_objectid(struct reiserfs_transaction_handle *th, __u32 objectid_to_release); int reiserfs_convert_objectid_map_v1(struct super_block *); /* stree.c */ int B_IS_IN_TREE(const struct buffer_head *); extern void copy_item_head(struct item_head *to, const struct item_head *from); /* first key is in cpu form, second - le */ extern int comp_short_keys(const struct reiserfs_key *le_key, const struct cpu_key *cpu_key); extern void le_key2cpu_key(struct cpu_key *to, const struct reiserfs_key *from); /* both are in le form */ extern int comp_le_keys(const struct reiserfs_key *, const struct reiserfs_key *); extern int comp_short_le_keys(const struct reiserfs_key *, const struct reiserfs_key *); /* * get key version from on disk key - kludge */ static inline int le_key_version(const struct reiserfs_key *key) { int type; type = offset_v2_k_type(&(key->u.k_offset_v2)); if (type != TYPE_DIRECT && type != TYPE_INDIRECT && type != TYPE_DIRENTRY) return KEY_FORMAT_3_5; return KEY_FORMAT_3_6; } static inline void copy_key(struct reiserfs_key *to, const struct reiserfs_key *from) { memcpy(to, from, KEY_SIZE); } int comp_items(const struct item_head *stored_ih, const struct treepath *path); const struct reiserfs_key *get_rkey(const struct treepath *chk_path, const struct super_block *sb); int search_by_key(struct super_block *, const struct cpu_key *, struct treepath *, int); #define search_item(s,key,path) search_by_key (s, key, path, DISK_LEAF_NODE_LEVEL) int search_for_position_by_key(struct super_block *sb, const struct cpu_key *cpu_key, struct treepath *search_path); extern void decrement_bcount(struct buffer_head *bh); void decrement_counters_in_path(struct treepath *search_path); void pathrelse(struct treepath *search_path); int reiserfs_check_path(struct treepath *p); void pathrelse_and_restore(struct super_block *s, struct treepath *search_path); int reiserfs_insert_item(struct reiserfs_transaction_handle *th, struct treepath *path, const struct cpu_key *key, struct item_head *ih, struct inode *inode, const char *body); int reiserfs_paste_into_item(struct reiserfs_transaction_handle *th, struct treepath *path, const struct cpu_key *key, struct inode *inode, const char *body, int paste_size); int reiserfs_cut_from_item(struct reiserfs_transaction_handle *th, struct treepath *path, struct cpu_key *key, struct inode *inode, struct page *page, loff_t new_file_size); int reiserfs_delete_item(struct reiserfs_transaction_handle *th, struct treepath *path, const struct cpu_key *key, struct inode *inode, struct buffer_head *un_bh); void reiserfs_delete_solid_item(struct reiserfs_transaction_handle *th, struct inode *inode, struct reiserfs_key *key); int reiserfs_delete_object(struct reiserfs_transaction_handle *th, struct inode *inode); int reiserfs_do_truncate(struct reiserfs_transaction_handle *th, struct inode *inode, struct page *, int update_timestamps); #define i_block_size(inode) ((inode)->i_sb->s_blocksize) #define file_size(inode) ((inode)->i_size) #define tail_size(inode) (file_size (inode) & (i_block_size (inode) - 1)) #define tail_has_to_be_packed(inode) (have_large_tails ((inode)->i_sb)?\ !STORE_TAIL_IN_UNFM_S1(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):have_small_tails ((inode)->i_sb)?!STORE_TAIL_IN_UNFM_S2(file_size (inode), tail_size(inode), inode->i_sb->s_blocksize):0 ) void padd_item(char *item, int total_length, int length); /* inode.c */ /* args for the create parameter of reiserfs_get_block */ #define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */ #define GET_BLOCK_CREATE 1 /* add anything you need to find block */ #define GET_BLOCK_NO_HOLE 2 /* return -ENOENT for file holes */ #define GET_BLOCK_READ_DIRECT 4 /* read the tail if indirect item not found */ #define GET_BLOCK_NO_IMUX 8 /* i_mutex is not held, don't preallocate */ #define GET_BLOCK_NO_DANGLE 16 /* don't leave any transactions running */ void reiserfs_read_locked_inode(struct inode *inode, struct reiserfs_iget_args *args); int reiserfs_find_actor(struct inode *inode, void *p); int reiserfs_init_locked_inode(struct inode *inode, void *p); void reiserfs_evict_inode(struct inode *inode); int reiserfs_write_inode(struct inode *inode, struct writeback_control *wbc); int reiserfs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_result, int create); struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); int reiserfs_encode_fh(struct inode *inode, __u32 * data, int *lenp, struct inode *parent); int reiserfs_truncate_file(struct inode *, int update_timestamps); void make_cpu_key(struct cpu_key *cpu_key, struct inode *inode, loff_t offset, int type, int key_length); void make_le_item_head(struct item_head *ih, const struct cpu_key *key, int version, loff_t offset, int type, int length, int entry_count); struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key); struct reiserfs_security_handle; int reiserfs_new_inode(struct reiserfs_transaction_handle *th, struct inode *dir, umode_t mode, const char *symname, loff_t i_size, struct dentry *dentry, struct inode *inode, struct reiserfs_security_handle *security); void reiserfs_update_sd_size(struct reiserfs_transaction_handle *th, struct inode *inode, loff_t size); static inline void reiserfs_update_sd(struct reiserfs_transaction_handle *th, struct inode *inode) { reiserfs_update_sd_size(th, inode, inode->i_size); } void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode); int reiserfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr); int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len); /* namei.c */ void reiserfs_init_priv_inode(struct inode *inode); void set_de_name_and_namelen(struct reiserfs_dir_entry *de); int search_by_entry_key(struct super_block *sb, const struct cpu_key *key, struct treepath *path, struct reiserfs_dir_entry *de); struct dentry *reiserfs_get_parent(struct dentry *); #ifdef CONFIG_REISERFS_PROC_INFO int reiserfs_proc_info_init(struct super_block *sb); int reiserfs_proc_info_done(struct super_block *sb); int reiserfs_proc_info_global_init(void); int reiserfs_proc_info_global_done(void); #define PROC_EXP( e ) e #define __PINFO( sb ) REISERFS_SB(sb) -> s_proc_info_data #define PROC_INFO_MAX( sb, field, value ) \ __PINFO( sb ).field = \ max( REISERFS_SB( sb ) -> s_proc_info_data.field, value ) #define PROC_INFO_INC( sb, field ) ( ++ ( __PINFO( sb ).field ) ) #define PROC_INFO_ADD( sb, field, val ) ( __PINFO( sb ).field += ( val ) ) #define PROC_INFO_BH_STAT( sb, bh, level ) \ PROC_INFO_INC( sb, sbk_read_at[ ( level ) ] ); \ PROC_INFO_ADD( sb, free_at[ ( level ) ], B_FREE_SPACE( bh ) ); \ PROC_INFO_ADD( sb, items_at[ ( level ) ], B_NR_ITEMS( bh ) ) #else static inline int reiserfs_proc_info_init(struct super_block *sb) { return 0; } static inline int reiserfs_proc_info_done(struct super_block *sb) { return 0; } static inline int reiserfs_proc_info_global_init(void) { return 0; } static inline int reiserfs_proc_info_global_done(void) { return 0; } #define PROC_EXP( e ) #define VOID_V ( ( void ) 0 ) #define PROC_INFO_MAX( sb, field, value ) VOID_V #define PROC_INFO_INC( sb, field ) VOID_V #define PROC_INFO_ADD( sb, field, val ) VOID_V #define PROC_INFO_BH_STAT(sb, bh, n_node_level) VOID_V #endif /* dir.c */ extern const struct inode_operations reiserfs_dir_inode_operations; extern const struct inode_operations reiserfs_symlink_inode_operations; extern const struct inode_operations reiserfs_special_inode_operations; extern const struct file_operations reiserfs_dir_operations; int reiserfs_readdir_inode(struct inode *, struct dir_context *); /* tail_conversion.c */ int direct2indirect(struct reiserfs_transaction_handle *, struct inode *, struct treepath *, struct buffer_head *, loff_t); int indirect2direct(struct reiserfs_transaction_handle *, struct inode *, struct page *, struct treepath *, const struct cpu_key *, loff_t, char *); void reiserfs_unmap_buffer(struct buffer_head *); /* file.c */ extern const struct inode_operations reiserfs_file_inode_operations; extern const struct inode_operations reiserfs_priv_file_inode_operations; extern const struct file_operations reiserfs_file_operations; extern const struct address_space_operations reiserfs_address_space_operations; /* fix_nodes.c */ int fix_nodes(int n_op_mode, struct tree_balance *tb, struct item_head *ins_ih, const void *); void unfix_nodes(struct tree_balance *); /* prints.c */ void __reiserfs_panic(struct super_block *s, const char *id, const char *function, const char *fmt, ...) __attribute__ ((noreturn)); #define reiserfs_panic(s, id, fmt, args...) \ __reiserfs_panic(s, id, __func__, fmt, ##args) void __reiserfs_error(struct super_block *s, const char *id, const char *function, const char *fmt, ...); #define reiserfs_error(s, id, fmt, args...) \ __reiserfs_error(s, id, __func__, fmt, ##args) void reiserfs_info(struct super_block *s, const char *fmt, ...); void reiserfs_debug(struct super_block *s, int level, const char *fmt, ...); void print_indirect_item(struct buffer_head *bh, int item_num); void store_print_tb(struct tree_balance *tb); void print_cur_tb(char *mes); void print_de(struct reiserfs_dir_entry *de); void print_bi(struct buffer_info *bi, char *mes); #define PRINT_LEAF_ITEMS 1 /* print all items */ #define PRINT_DIRECTORY_ITEMS 2 /* print directory items */ #define PRINT_DIRECT_ITEMS 4 /* print contents of direct items */ void print_block(struct buffer_head *bh, ...); void print_bmap(struct super_block *s, int silent); void print_bmap_block(int i, char *data, int size, int silent); /*void print_super_block (struct super_block * s, char * mes);*/ void print_objectid_map(struct super_block *s); void print_block_head(struct buffer_head *bh, char *mes); void check_leaf(struct buffer_head *bh); void check_internal(struct buffer_head *bh); void print_statistics(struct super_block *s); char *reiserfs_hashname(int code); /* lbalance.c */ int leaf_move_items(int shift_mode, struct tree_balance *tb, int mov_num, int mov_bytes, struct buffer_head *Snew); int leaf_shift_left(struct tree_balance *tb, int shift_num, int shift_bytes); int leaf_shift_right(struct tree_balance *tb, int shift_num, int shift_bytes); void leaf_delete_items(struct buffer_info *cur_bi, int last_first, int first, int del_num, int del_bytes); void leaf_insert_into_buf(struct buffer_info *bi, int before, struct item_head * const inserted_item_ih, const char * const inserted_item_body, int zeros_number); void leaf_paste_in_buffer(struct buffer_info *bi, int pasted_item_num, int pos_in_item, int paste_size, const char * const body, int zeros_number); void leaf_cut_from_buffer(struct buffer_info *bi, int cut_item_num, int pos_in_item, int cut_size); void leaf_paste_entries(struct buffer_info *bi, int item_num, int before, int new_entry_count, struct reiserfs_de_head *new_dehs, const char *records, int paste_size); /* ibalance.c */ int balance_internal(struct tree_balance *, int, int, struct item_head *, struct buffer_head **); /* do_balance.c */ void do_balance_mark_leaf_dirty(struct tree_balance *tb, struct buffer_head *bh, int flag); #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty void do_balance(struct tree_balance *tb, struct item_head *ih, const char *body, int flag); void reiserfs_invalidate_buffer(struct tree_balance *tb, struct buffer_head *bh); int get_left_neighbor_position(struct tree_balance *tb, int h); int get_right_neighbor_position(struct tree_balance *tb, int h); void replace_key(struct tree_balance *tb, struct buffer_head *, int, struct buffer_head *, int); void make_empty_node(struct buffer_info *); struct buffer_head *get_FEB(struct tree_balance *); /* bitmap.c */ /* * structure contains hints for block allocator, and it is a container for * arguments, such as node, search path, transaction_handle, etc. */ struct __reiserfs_blocknr_hint { /* inode passed to allocator, if we allocate unf. nodes */ struct inode *inode; sector_t block; /* file offset, in blocks */ struct in_core_key key; /* * search path, used by allocator to deternine search_start by * various ways */ struct treepath *path; /* * transaction handle is needed to log super blocks * and bitmap blocks changes */ struct reiserfs_transaction_handle *th; b_blocknr_t beg, end; /* * a field used to transfer search start value (block number) * between different block allocator procedures * (determine_search_start() and others) */ b_blocknr_t search_start; /* * is set in determine_prealloc_size() function, * used by underlayed function that do actual allocation */ int prealloc_size; /* * the allocator uses different polices for getting disk * space for formatted/unformatted blocks with/without preallocation */ unsigned formatted_node:1; unsigned preallocate:1; }; typedef struct __reiserfs_blocknr_hint reiserfs_blocknr_hint_t; int reiserfs_parse_alloc_options(struct super_block *, char *); void reiserfs_init_alloc_options(struct super_block *s); /* * given a directory, this will tell you what packing locality * to use for a new object underneat it. The locality is returned * in disk byte order (le). */ __le32 reiserfs_choose_packing(struct inode *dir); void show_alloc_options(struct seq_file *seq, struct super_block *s); int reiserfs_init_bitmap_cache(struct super_block *sb); void reiserfs_free_bitmap_cache(struct super_block *sb); void reiserfs_cache_bitmap_metadata(struct super_block *sb, struct buffer_head *bh, struct reiserfs_bitmap_info *info); struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, unsigned int bitmap); int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value); void reiserfs_free_block(struct reiserfs_transaction_handle *th, struct inode *, b_blocknr_t, int for_unformatted); int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t *, b_blocknr_t *, int, int); static inline int reiserfs_new_form_blocknrs(struct tree_balance *tb, b_blocknr_t * new_blocknrs, int amount_needed) { reiserfs_blocknr_hint_t hint = { .th = tb->transaction_handle, .path = tb->tb_path, .inode = NULL, .key = tb->key, .block = 0, .formatted_node = 1 }; return reiserfs_allocate_blocknrs(&hint, new_blocknrs, amount_needed, 0); } static inline int reiserfs_new_unf_blocknrs(struct reiserfs_transaction_handle *th, struct inode *inode, b_blocknr_t * new_blocknrs, struct treepath *path, sector_t block) { reiserfs_blocknr_hint_t hint = { .th = th, .path = path, .inode = inode, .block = block, .formatted_node = 0, .preallocate = 0 }; return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0); } #ifdef REISERFS_PREALLOCATE static inline int reiserfs_new_unf_blocknrs2(struct reiserfs_transaction_handle *th, struct inode *inode, b_blocknr_t * new_blocknrs, struct treepath *path, sector_t block) { reiserfs_blocknr_hint_t hint = { .th = th, .path = path, .inode = inode, .block = block, .formatted_node = 0, .preallocate = 1 }; return reiserfs_allocate_blocknrs(&hint, new_blocknrs, 1, 0); } void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th, struct inode *inode); void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th); #endif /* hashes.c */ __u32 keyed_hash(const signed char *msg, int len); __u32 yura_hash(const signed char *msg, int len); __u32 r5_hash(const signed char *msg, int len); #define reiserfs_set_le_bit __set_bit_le #define reiserfs_test_and_set_le_bit __test_and_set_bit_le #define reiserfs_clear_le_bit __clear_bit_le #define reiserfs_test_and_clear_le_bit __test_and_clear_bit_le #define reiserfs_test_le_bit test_bit_le #define reiserfs_find_next_zero_le_bit find_next_zero_bit_le /* * sometimes reiserfs_truncate may require to allocate few new blocks * to perform indirect2direct conversion. People probably used to * think, that truncate should work without problems on a filesystem * without free disk space. They may complain that they can not * truncate due to lack of free disk space. This spare space allows us * to not worry about it. 500 is probably too much, but it should be * absolutely safe */ #define SPARE_SPACE 500 /* prototypes from ioctl.c */ int reiserfs_fileattr_get(struct dentry *dentry, struct fileattr *fa); int reiserfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); long reiserfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); int reiserfs_unpack(struct inode *inode); |
| 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 57 57 57 57 142 142 142 142 142 142 142 142 142 142 57 57 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 | // SPDX-License-Identifier: GPL-2.0 #define pr_fmt(fmt) "irq: " fmt #include <linux/acpi.h> #include <linux/debugfs.h> #include <linux/hardirq.h> #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/irqdesc.h> #include <linux/irqdomain.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_irq.h> #include <linux/topology.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/smp.h> #include <linux/fs.h> static LIST_HEAD(irq_domain_list); static DEFINE_MUTEX(irq_domain_mutex); static struct irq_domain *irq_default_domain; static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base, unsigned int nr_irqs, int node, void *arg, bool realloc, const struct irq_affinity_desc *affinity); static void irq_domain_check_hierarchy(struct irq_domain *domain); static void irq_domain_free_one_irq(struct irq_domain *domain, unsigned int virq); struct irqchip_fwid { struct fwnode_handle fwnode; unsigned int type; char *name; phys_addr_t *pa; }; #ifdef CONFIG_GENERIC_IRQ_DEBUGFS static void debugfs_add_domain_dir(struct irq_domain *d); static void debugfs_remove_domain_dir(struct irq_domain *d); #else static inline void debugfs_add_domain_dir(struct irq_domain *d) { } static inline void debugfs_remove_domain_dir(struct irq_domain *d) { } #endif static const char *irqchip_fwnode_get_name(const struct fwnode_handle *fwnode) { struct irqchip_fwid *fwid = container_of(fwnode, struct irqchip_fwid, fwnode); return fwid->name; } const struct fwnode_operations irqchip_fwnode_ops = { .get_name = irqchip_fwnode_get_name, }; EXPORT_SYMBOL_GPL(irqchip_fwnode_ops); /** * __irq_domain_alloc_fwnode - Allocate a fwnode_handle suitable for * identifying an irq domain * @type: Type of irqchip_fwnode. See linux/irqdomain.h * @id: Optional user provided id if name != NULL * @name: Optional user provided domain name * @pa: Optional user-provided physical address * * Allocate a struct irqchip_fwid, and return a pointer to the embedded * fwnode_handle (or NULL on failure). * * Note: The types IRQCHIP_FWNODE_NAMED and IRQCHIP_FWNODE_NAMED_ID are * solely to transport name information to irqdomain creation code. The * node is not stored. For other types the pointer is kept in the irq * domain struct. */ struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id, const char *name, phys_addr_t *pa) { struct irqchip_fwid *fwid; char *n; fwid = kzalloc(sizeof(*fwid), GFP_KERNEL); switch (type) { case IRQCHIP_FWNODE_NAMED: n = kasprintf(GFP_KERNEL, "%s", name); break; case IRQCHIP_FWNODE_NAMED_ID: n = kasprintf(GFP_KERNEL, "%s-%d", name, id); break; default: n = kasprintf(GFP_KERNEL, "irqchip@%pa", pa); break; } if (!fwid || !n) { kfree(fwid); kfree(n); return NULL; } fwid->type = type; fwid->name = n; fwid->pa = pa; fwnode_init(&fwid->fwnode, &irqchip_fwnode_ops); return &fwid->fwnode; } EXPORT_SYMBOL_GPL(__irq_domain_alloc_fwnode); /** * irq_domain_free_fwnode - Free a non-OF-backed fwnode_handle * @fwnode: fwnode_handle to free * * Free a fwnode_handle allocated with irq_domain_alloc_fwnode. */ void irq_domain_free_fwnode(struct fwnode_handle *fwnode) { struct irqchip_fwid *fwid; if (!fwnode || WARN_ON(!is_fwnode_irqchip(fwnode))) return; fwid = container_of(fwnode, struct irqchip_fwid, fwnode); kfree(fwid->name); kfree(fwid); } EXPORT_SYMBOL_GPL(irq_domain_free_fwnode); static int irq_domain_set_name(struct irq_domain *domain, const struct fwnode_handle *fwnode, enum irq_domain_bus_token bus_token) { static atomic_t unknown_domains; struct irqchip_fwid *fwid; if (is_fwnode_irqchip(fwnode)) { fwid = container_of(fwnode, struct irqchip_fwid, fwnode); switch (fwid->type) { case IRQCHIP_FWNODE_NAMED: case IRQCHIP_FWNODE_NAMED_ID: domain->name = bus_token ? kasprintf(GFP_KERNEL, "%s-%d", fwid->name, bus_token) : kstrdup(fwid->name, GFP_KERNEL); if (!domain->name) return -ENOMEM; domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; break; default: domain->name = fwid->name; if (bus_token) { domain->name = kasprintf(GFP_KERNEL, "%s-%d", fwid->name, bus_token); if (!domain->name) return -ENOMEM; domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; } break; } } else if (is_of_node(fwnode) || is_acpi_device_node(fwnode) || is_software_node(fwnode)) { char *name; /* * fwnode paths contain '/', which debugfs is legitimately * unhappy about. Replace them with ':', which does * the trick and is not as offensive as '\'... */ name = bus_token ? kasprintf(GFP_KERNEL, "%pfw-%d", fwnode, bus_token) : kasprintf(GFP_KERNEL, "%pfw", fwnode); if (!name) return -ENOMEM; domain->name = strreplace(name, '/', ':'); domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; } if (!domain->name) { if (fwnode) pr_err("Invalid fwnode type for irqdomain\n"); domain->name = bus_token ? kasprintf(GFP_KERNEL, "unknown-%d-%d", atomic_inc_return(&unknown_domains), bus_token) : kasprintf(GFP_KERNEL, "unknown-%d", atomic_inc_return(&unknown_domains)); if (!domain->name) return -ENOMEM; domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; } return 0; } static struct irq_domain *__irq_domain_create(const struct irq_domain_info *info) { struct irq_domain *domain; int err; if (WARN_ON((info->size && info->direct_max) || (!IS_ENABLED(CONFIG_IRQ_DOMAIN_NOMAP) && info->direct_max) || (info->direct_max && info->direct_max != info->hwirq_max))) return ERR_PTR(-EINVAL); domain = kzalloc_node(struct_size(domain, revmap, info->size), GFP_KERNEL, of_node_to_nid(to_of_node(info->fwnode))); if (!domain) return ERR_PTR(-ENOMEM); err = irq_domain_set_name(domain, info->fwnode, info->bus_token); if (err) { kfree(domain); return ERR_PTR(err); } domain->fwnode = fwnode_handle_get(info->fwnode); fwnode_dev_initialized(domain->fwnode, true); /* Fill structure */ INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL); domain->ops = info->ops; domain->host_data = info->host_data; domain->bus_token = info->bus_token; domain->hwirq_max = info->hwirq_max; if (info->direct_max) domain->flags |= IRQ_DOMAIN_FLAG_NO_MAP; domain->revmap_size = info->size; /* * Hierarchical domains use the domain lock of the root domain * (innermost domain). * * For non-hierarchical domains (as for root domains), the root * pointer is set to the domain itself so that &domain->root->mutex * always points to the right lock. */ mutex_init(&domain->mutex); domain->root = domain; irq_domain_check_hierarchy(domain); return domain; } static void __irq_domain_publish(struct irq_domain *domain) { mutex_lock(&irq_domain_mutex); debugfs_add_domain_dir(domain); list_add(&domain->link, &irq_domain_list); mutex_unlock(&irq_domain_mutex); pr_debug("Added domain %s\n", domain->name); } static void irq_domain_free(struct irq_domain *domain) { fwnode_dev_initialized(domain->fwnode, false); fwnode_handle_put(domain->fwnode); if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED) kfree(domain->name); kfree(domain); } /** * irq_domain_instantiate() - Instantiate a new irq domain data structure * @info: Domain information pointer pointing to the information for this domain * * Return: A pointer to the instantiated irq domain or an ERR_PTR value. */ struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info) { struct irq_domain *domain; int err; domain = __irq_domain_create(info); if (IS_ERR(domain)) return domain; domain->flags |= info->domain_flags; domain->exit = info->exit; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY if (info->parent) { domain->root = info->parent->root; domain->parent = info->parent; } #endif if (info->dgc_info) { err = irq_domain_alloc_generic_chips(domain, info->dgc_info); if (err) goto err_domain_free; } if (info->init) { err = info->init(domain); if (err) goto err_domain_gc_remove; } __irq_domain_publish(domain); return domain; err_domain_gc_remove: if (info->dgc_info) irq_domain_remove_generic_chips(domain); err_domain_free: irq_domain_free(domain); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(irq_domain_instantiate); /** * irq_domain_remove() - Remove an irq domain. * @domain: domain to remove * * This routine is used to remove an irq domain. The caller must ensure * that all mappings within the domain have been disposed of prior to * use, depending on the revmap type. */ void irq_domain_remove(struct irq_domain *domain) { if (domain->exit) domain->exit(domain); mutex_lock(&irq_domain_mutex); debugfs_remove_domain_dir(domain); WARN_ON(!radix_tree_empty(&domain->revmap_tree)); list_del(&domain->link); /* * If the going away domain is the default one, reset it. */ if (unlikely(irq_default_domain == domain)) irq_set_default_host(NULL); mutex_unlock(&irq_domain_mutex); if (domain->flags & IRQ_DOMAIN_FLAG_DESTROY_GC) irq_domain_remove_generic_chips(domain); pr_debug("Removed domain %s\n", domain->name); irq_domain_free(domain); } EXPORT_SYMBOL_GPL(irq_domain_remove); void irq_domain_update_bus_token(struct irq_domain *domain, enum irq_domain_bus_token bus_token) { char *name; if (domain->bus_token == bus_token) return; mutex_lock(&irq_domain_mutex); domain->bus_token = bus_token; name = kasprintf(GFP_KERNEL, "%s-%d", domain->name, bus_token); if (!name) { mutex_unlock(&irq_domain_mutex); return; } debugfs_remove_domain_dir(domain); if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED) kfree(domain->name); else domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; domain->name = name; debugfs_add_domain_dir(domain); mutex_unlock(&irq_domain_mutex); } EXPORT_SYMBOL_GPL(irq_domain_update_bus_token); /** * irq_domain_create_simple() - Register an irq_domain and optionally map a range of irqs * @fwnode: firmware node for the interrupt controller * @size: total number of irqs in mapping * @first_irq: first number of irq block assigned to the domain, * pass zero to assign irqs on-the-fly. If first_irq is non-zero, then * pre-map all of the irqs in the domain to virqs starting at first_irq. * @ops: domain callbacks * @host_data: Controller private data pointer * * Allocates an irq_domain, and optionally if first_irq is positive then also * allocate irq_descs and map all of the hwirqs to virqs starting at first_irq. * * This is intended to implement the expected behaviour for most * interrupt controllers. If device tree is used, then first_irq will be 0 and * irqs get mapped dynamically on the fly. However, if the controller requires * static virq assignments (non-DT boot) then it will set that up correctly. */ struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode, unsigned int size, unsigned int first_irq, const struct irq_domain_ops *ops, void *host_data) { struct irq_domain_info info = { .fwnode = fwnode, .size = size, .hwirq_max = size, .ops = ops, .host_data = host_data, }; struct irq_domain *domain; domain = irq_domain_instantiate(&info); if (IS_ERR(domain)) return NULL; if (first_irq > 0) { if (IS_ENABLED(CONFIG_SPARSE_IRQ)) { /* attempt to allocated irq_descs */ int rc = irq_alloc_descs(first_irq, first_irq, size, of_node_to_nid(to_of_node(fwnode))); if (rc < 0) pr_info("Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n", first_irq); } irq_domain_associate_many(domain, first_irq, 0, size); } return domain; } EXPORT_SYMBOL_GPL(irq_domain_create_simple); /** * irq_domain_add_legacy() - Allocate and register a legacy revmap irq_domain. * @of_node: pointer to interrupt controller's device tree node. * @size: total number of irqs in legacy mapping * @first_irq: first number of irq block assigned to the domain * @first_hwirq: first hwirq number to use for the translation. Should normally * be '0', but a positive integer can be used if the effective * hwirqs numbering does not begin at zero. * @ops: map/unmap domain callbacks * @host_data: Controller private data pointer * * Note: the map() callback will be called before this function returns * for all legacy interrupts except 0 (which is always the invalid irq for * a legacy controller). */ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, unsigned int size, unsigned int first_irq, irq_hw_number_t first_hwirq, const struct irq_domain_ops *ops, void *host_data) { return irq_domain_create_legacy(of_node_to_fwnode(of_node), size, first_irq, first_hwirq, ops, host_data); } EXPORT_SYMBOL_GPL(irq_domain_add_legacy); struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, unsigned int size, unsigned int first_irq, irq_hw_number_t first_hwirq, const struct irq_domain_ops *ops, void *host_data) { struct irq_domain_info info = { .fwnode = fwnode, .size = first_hwirq + size, .hwirq_max = first_hwirq + size, .ops = ops, .host_data = host_data, }; struct irq_domain *domain; domain = irq_domain_instantiate(&info); if (IS_ERR(domain)) return NULL; irq_domain_associate_many(domain, first_irq, first_hwirq, size); return domain; } EXPORT_SYMBOL_GPL(irq_domain_create_legacy); /** * irq_find_matching_fwspec() - Locates a domain for a given fwspec * @fwspec: FW specifier for an interrupt * @bus_token: domain-specific data */ struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token) { struct irq_domain *h, *found = NULL; struct fwnode_handle *fwnode = fwspec->fwnode; int rc; /* * We might want to match the legacy controller last since * it might potentially be set to match all interrupts in * the absence of a device node. This isn't a problem so far * yet though... * * bus_token == DOMAIN_BUS_ANY matches any domain, any other * values must generate an exact match for the domain to be * selected. */ mutex_lock(&irq_domain_mutex); list_for_each_entry(h, &irq_domain_list, link) { if (h->ops->select && bus_token != DOMAIN_BUS_ANY) rc = h->ops->select(h, fwspec, bus_token); else if (h->ops->match) rc = h->ops->match(h, to_of_node(fwnode), bus_token); else rc = ((fwnode != NULL) && (h->fwnode == fwnode) && ((bus_token == DOMAIN_BUS_ANY) || (h->bus_token == bus_token))); if (rc) { found = h; break; } } mutex_unlock(&irq_domain_mutex); return found; } EXPORT_SYMBOL_GPL(irq_find_matching_fwspec); /** * irq_set_default_host() - Set a "default" irq domain * @domain: default domain pointer * * For convenience, it's possible to set a "default" domain that will be used * whenever NULL is passed to irq_create_mapping(). It makes life easier for * platforms that want to manipulate a few hard coded interrupt numbers that * aren't properly represented in the device-tree. */ void irq_set_default_host(struct irq_domain *domain) { pr_debug("Default domain set to @0x%p\n", domain); irq_default_domain = domain; } EXPORT_SYMBOL_GPL(irq_set_default_host); /** * irq_get_default_host() - Retrieve the "default" irq domain * * Returns: the default domain, if any. * * Modern code should never use this. This should only be used on * systems that cannot implement a firmware->fwnode mapping (which * both DT and ACPI provide). */ struct irq_domain *irq_get_default_host(void) { return irq_default_domain; } EXPORT_SYMBOL_GPL(irq_get_default_host); static bool irq_domain_is_nomap(struct irq_domain *domain) { return IS_ENABLED(CONFIG_IRQ_DOMAIN_NOMAP) && (domain->flags & IRQ_DOMAIN_FLAG_NO_MAP); } static void irq_domain_clear_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) { lockdep_assert_held(&domain->root->mutex); if (irq_domain_is_nomap(domain)) return; if (hwirq < domain->revmap_size) rcu_assign_pointer(domain->revmap[hwirq], NULL); else radix_tree_delete(&domain->revmap_tree, hwirq); } static void irq_domain_set_mapping(struct irq_domain *domain, irq_hw_number_t hwirq, struct irq_data *irq_data) { /* * This also makes sure that all domains point to the same root when * called from irq_domain_insert_irq() for each domain in a hierarchy. */ lockdep_assert_held(&domain->root->mutex); if (irq_domain_is_nomap(domain)) return; if (hwirq < domain->revmap_size) rcu_assign_pointer(domain->revmap[hwirq], irq_data); else radix_tree_insert(&domain->revmap_tree, hwirq, irq_data); } static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq) { struct irq_data *irq_data = irq_get_irq_data(irq); irq_hw_number_t hwirq; if (WARN(!irq_data || irq_data->domain != domain, "virq%i doesn't exist; cannot disassociate\n", irq)) return; hwirq = irq_data->hwirq; mutex_lock(&domain->root->mutex); irq_set_status_flags(irq, IRQ_NOREQUEST); /* remove chip and handler */ irq_set_chip_and_handler(irq, NULL, NULL); /* Make sure it's completed */ synchronize_irq(irq); /* Tell the PIC about it */ if (domain->ops->unmap) domain->ops->unmap(domain, irq); smp_mb(); irq_data->domain = NULL; irq_data->hwirq = 0; domain->mapcount--; /* Clear reverse map for this hwirq */ irq_domain_clear_mapping(domain, hwirq); mutex_unlock(&domain->root->mutex); } static int irq_domain_associate_locked(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq) { struct irq_data *irq_data = irq_get_irq_data(virq); int ret; if (WARN(hwirq >= domain->hwirq_max, "error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain->name)) return -EINVAL; if (WARN(!irq_data, "error: virq%i is not allocated", virq)) return -EINVAL; if (WARN(irq_data->domain, "error: virq%i is already associated", virq)) return -EINVAL; irq_data->hwirq = hwirq; irq_data->domain = domain; if (domain->ops->map) { ret = domain->ops->map(domain, virq, hwirq); if (ret != 0) { /* * If map() returns -EPERM, this interrupt is protected * by the firmware or some other service and shall not * be mapped. Don't bother telling the user about it. */ if (ret != -EPERM) { pr_info("%s didn't like hwirq-0x%lx to VIRQ%i mapping (rc=%d)\n", domain->name, hwirq, virq, ret); } irq_data->domain = NULL; irq_data->hwirq = 0; return ret; } } domain->mapcount++; irq_domain_set_mapping(domain, hwirq, irq_data); irq_clear_status_flags(virq, IRQ_NOREQUEST); return 0; } int irq_domain_associate(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq) { int ret; mutex_lock(&domain->root->mutex); ret = irq_domain_associate_locked(domain, virq, hwirq); mutex_unlock(&domain->root->mutex); return ret; } EXPORT_SYMBOL_GPL(irq_domain_associate); void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, irq_hw_number_t hwirq_base, int count) { struct device_node *of_node; int i; of_node = irq_domain_get_of_node(domain); pr_debug("%s(%s, irqbase=%i, hwbase=%i, count=%i)\n", __func__, of_node_full_name(of_node), irq_base, (int)hwirq_base, count); for (i = 0; i < count; i++) irq_domain_associate(domain, irq_base + i, hwirq_base + i); } EXPORT_SYMBOL_GPL(irq_domain_associate_many); #ifdef CONFIG_IRQ_DOMAIN_NOMAP /** * irq_create_direct_mapping() - Allocate an irq for direct mapping * @domain: domain to allocate the irq for or NULL for default domain * * This routine is used for irq controllers which can choose the hardware * interrupt numbers they generate. In such a case it's simplest to use * the linux irq as the hardware interrupt number. It still uses the linear * or radix tree to store the mapping, but the irq controller can optimize * the revmap path by using the hwirq directly. */ unsigned int irq_create_direct_mapping(struct irq_domain *domain) { struct device_node *of_node; unsigned int virq; if (domain == NULL) domain = irq_default_domain; of_node = irq_domain_get_of_node(domain); virq = irq_alloc_desc_from(1, of_node_to_nid(of_node)); if (!virq) { pr_debug("create_direct virq allocation failed\n"); return 0; } if (virq >= domain->hwirq_max) { pr_err("ERROR: no free irqs available below %lu maximum\n", domain->hwirq_max); irq_free_desc(virq); return 0; } pr_debug("create_direct obtained virq %d\n", virq); if (irq_domain_associate(domain, virq, virq)) { irq_free_desc(virq); return 0; } return virq; } EXPORT_SYMBOL_GPL(irq_create_direct_mapping); #endif static unsigned int irq_create_mapping_affinity_locked(struct irq_domain *domain, irq_hw_number_t hwirq, const struct irq_affinity_desc *affinity) { struct device_node *of_node = irq_domain_get_of_node(domain); int virq; pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq); /* Allocate a virtual interrupt number */ virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node), affinity); if (virq <= 0) { pr_debug("-> virq allocation failed\n"); return 0; } if (irq_domain_associate_locked(domain, virq, hwirq)) { irq_free_desc(virq); return 0; } pr_debug("irq %lu on domain %s mapped to virtual irq %u\n", hwirq, of_node_full_name(of_node), virq); return virq; } /** * irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space * @domain: domain owning this hardware interrupt or NULL for default domain * @hwirq: hardware irq number in that domain space * @affinity: irq affinity * * Only one mapping per hardware interrupt is permitted. Returns a linux * irq number. * If the sense/trigger is to be specified, set_irq_type() should be called * on the number returned from that call. */ unsigned int irq_create_mapping_affinity(struct irq_domain *domain, irq_hw_number_t hwirq, const struct irq_affinity_desc *affinity) { int virq; /* Look for default domain if necessary */ if (domain == NULL) domain = irq_default_domain; if (domain == NULL) { WARN(1, "%s(, %lx) called with NULL domain\n", __func__, hwirq); return 0; } mutex_lock(&domain->root->mutex); /* Check if mapping already exists */ virq = irq_find_mapping(domain, hwirq); if (virq) { pr_debug("existing mapping on virq %d\n", virq); goto out; } virq = irq_create_mapping_affinity_locked(domain, hwirq, affinity); out: mutex_unlock(&domain->root->mutex); return virq; } EXPORT_SYMBOL_GPL(irq_create_mapping_affinity); static int irq_domain_translate(struct irq_domain *d, struct irq_fwspec *fwspec, irq_hw_number_t *hwirq, unsigned int *type) { #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY if (d->ops->translate) return d->ops->translate(d, fwspec, hwirq, type); #endif if (d->ops->xlate) return d->ops->xlate(d, to_of_node(fwspec->fwnode), fwspec->param, fwspec->param_count, hwirq, type); /* If domain has no translation, then we assume interrupt line */ *hwirq = fwspec->param[0]; return 0; } void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, unsigned int count, struct irq_fwspec *fwspec) { int i; fwspec->fwnode = of_node_to_fwnode(np); fwspec->param_count = count; for (i = 0; i < count; i++) fwspec->param[i] = args[i]; } EXPORT_SYMBOL_GPL(of_phandle_args_to_fwspec); unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) { struct irq_domain *domain; struct irq_data *irq_data; irq_hw_number_t hwirq; unsigned int type = IRQ_TYPE_NONE; int virq; if (fwspec->fwnode) { domain = irq_find_matching_fwspec(fwspec, DOMAIN_BUS_WIRED); if (!domain) domain = irq_find_matching_fwspec(fwspec, DOMAIN_BUS_ANY); } else { domain = irq_default_domain; } if (!domain) { pr_warn("no irq domain found for %s !\n", of_node_full_name(to_of_node(fwspec->fwnode))); return 0; } if (irq_domain_translate(domain, fwspec, &hwirq, &type)) return 0; /* * WARN if the irqchip returns a type with bits * outside the sense mask set and clear these bits. */ if (WARN_ON(type & ~IRQ_TYPE_SENSE_MASK)) type &= IRQ_TYPE_SENSE_MASK; mutex_lock(&domain->root->mutex); /* * If we've already configured this interrupt, * don't do it again, or hell will break loose. */ virq = irq_find_mapping(domain, hwirq); if (virq) { /* * If the trigger type is not specified or matches the * current trigger type then we are done so return the * interrupt number. */ if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq)) goto out; /* * If the trigger type has not been set yet, then set * it now and return the interrupt number. */ if (irq_get_trigger_type(virq) == IRQ_TYPE_NONE) { irq_data = irq_get_irq_data(virq); if (!irq_data) { virq = 0; goto out; } irqd_set_trigger_type(irq_data, type); goto out; } pr_warn("type mismatch, failed to map hwirq-%lu for %s!\n", hwirq, of_node_full_name(to_of_node(fwspec->fwnode))); virq = 0; goto out; } if (irq_domain_is_hierarchy(domain)) { if (irq_domain_is_msi_device(domain)) { mutex_unlock(&domain->root->mutex); virq = msi_device_domain_alloc_wired(domain, hwirq, type); mutex_lock(&domain->root->mutex); } else virq = irq_domain_alloc_irqs_locked(domain, -1, 1, NUMA_NO_NODE, fwspec, false, NULL); if (virq <= 0) { virq = 0; goto out; } } else { /* Create mapping */ virq = irq_create_mapping_affinity_locked(domain, hwirq, NULL); if (!virq) goto out; } irq_data = irq_get_irq_data(virq); if (WARN_ON(!irq_data)) { virq = 0; goto out; } /* Store trigger type */ irqd_set_trigger_type(irq_data, type); out: mutex_unlock(&domain->root->mutex); return virq; } EXPORT_SYMBOL_GPL(irq_create_fwspec_mapping); unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data) { struct irq_fwspec fwspec; of_phandle_args_to_fwspec(irq_data->np, irq_data->args, irq_data->args_count, &fwspec); return irq_create_fwspec_mapping(&fwspec); } EXPORT_SYMBOL_GPL(irq_create_of_mapping); /** * irq_dispose_mapping() - Unmap an interrupt * @virq: linux irq number of the interrupt to unmap */ void irq_dispose_mapping(unsigned int virq) { struct irq_data *irq_data; struct irq_domain *domain; irq_data = virq ? irq_get_irq_data(virq) : NULL; if (!irq_data) return; domain = irq_data->domain; if (WARN_ON(domain == NULL)) return; if (irq_domain_is_hierarchy(domain)) { irq_domain_free_one_irq(domain, virq); } else { irq_domain_disassociate(domain, virq); irq_free_desc(virq); } } EXPORT_SYMBOL_GPL(irq_dispose_mapping); /** * __irq_resolve_mapping() - Find a linux irq from a hw irq number. * @domain: domain owning this hardware interrupt * @hwirq: hardware irq number in that domain space * @irq: optional pointer to return the Linux irq if required * * Returns the interrupt descriptor. */ struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain, irq_hw_number_t hwirq, unsigned int *irq) { struct irq_desc *desc = NULL; struct irq_data *data; /* Look for default domain if necessary */ if (domain == NULL) domain = irq_default_domain; if (domain == NULL) return desc; if (irq_domain_is_nomap(domain)) { if (hwirq < domain->hwirq_max) { data = irq_domain_get_irq_data(domain, hwirq); if (data && data->hwirq == hwirq) desc = irq_data_to_desc(data); if (irq && desc) *irq = hwirq; } return desc; } rcu_read_lock(); /* Check if the hwirq is in the linear revmap. */ if (hwirq < domain->revmap_size) data = rcu_dereference(domain->revmap[hwirq]); else data = radix_tree_lookup(&domain->revmap_tree, hwirq); if (likely(data)) { desc = irq_data_to_desc(data); if (irq) *irq = data->irq; } rcu_read_unlock(); return desc; } EXPORT_SYMBOL_GPL(__irq_resolve_mapping); /** * irq_domain_xlate_onecell() - Generic xlate for direct one cell bindings * @d: Interrupt domain involved in the translation * @ctrlr: The device tree node for the device whose interrupt is translated * @intspec: The interrupt specifier data from the device tree * @intsize: The number of entries in @intspec * @out_hwirq: Pointer to storage for the hardware interrupt number * @out_type: Pointer to storage for the interrupt type * * Device Tree IRQ specifier translation function which works with one cell * bindings where the cell value maps directly to the hwirq number. */ int irq_domain_xlate_onecell(struct irq_domain *d, struct device_node *ctrlr, const u32 *intspec, unsigned int intsize, unsigned long *out_hwirq, unsigned int *out_type) { if (WARN_ON(intsize < 1)) return -EINVAL; *out_hwirq = intspec[0]; *out_type = IRQ_TYPE_NONE; return 0; } EXPORT_SYMBOL_GPL(irq_domain_xlate_onecell); /** * irq_domain_xlate_twocell() - Generic xlate for direct two cell bindings * @d: Interrupt domain involved in the translation * @ctrlr: The device tree node for the device whose interrupt is translated * @intspec: The interrupt specifier data from the device tree * @intsize: The number of entries in @intspec * @out_hwirq: Pointer to storage for the hardware interrupt number * @out_type: Pointer to storage for the interrupt type * * Device Tree IRQ specifier translation function which works with two cell * bindings where the cell values map directly to the hwirq number * and linux irq flags. */ int irq_domain_xlate_twocell(struct irq_domain *d, struct device_node *ctrlr, const u32 *intspec, unsigned int intsize, irq_hw_number_t *out_hwirq, unsigned int *out_type) { struct irq_fwspec fwspec; of_phandle_args_to_fwspec(ctrlr, intspec, intsize, &fwspec); return irq_domain_translate_twocell(d, &fwspec, out_hwirq, out_type); } EXPORT_SYMBOL_GPL(irq_domain_xlate_twocell); /** * irq_domain_xlate_onetwocell() - Generic xlate for one or two cell bindings * @d: Interrupt domain involved in the translation * @ctrlr: The device tree node for the device whose interrupt is translated * @intspec: The interrupt specifier data from the device tree * @intsize: The number of entries in @intspec * @out_hwirq: Pointer to storage for the hardware interrupt number * @out_type: Pointer to storage for the interrupt type * * Device Tree IRQ specifier translation function which works with either one * or two cell bindings where the cell values map directly to the hwirq number * and linux irq flags. * * Note: don't use this function unless your interrupt controller explicitly * supports both one and two cell bindings. For the majority of controllers * the _onecell() or _twocell() variants above should be used. */ int irq_domain_xlate_onetwocell(struct irq_domain *d, struct device_node *ctrlr, const u32 *intspec, unsigned int intsize, unsigned long *out_hwirq, unsigned int *out_type) { if (WARN_ON(intsize < 1)) return -EINVAL; *out_hwirq = intspec[0]; if (intsize > 1) *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK; else *out_type = IRQ_TYPE_NONE; return 0; } EXPORT_SYMBOL_GPL(irq_domain_xlate_onetwocell); const struct irq_domain_ops irq_domain_simple_ops = { .xlate = irq_domain_xlate_onetwocell, }; EXPORT_SYMBOL_GPL(irq_domain_simple_ops); /** * irq_domain_translate_onecell() - Generic translate for direct one cell * bindings * @d: Interrupt domain involved in the translation * @fwspec: The firmware interrupt specifier to translate * @out_hwirq: Pointer to storage for the hardware interrupt number * @out_type: Pointer to storage for the interrupt type */ int irq_domain_translate_onecell(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *out_hwirq, unsigned int *out_type) { if (WARN_ON(fwspec->param_count < 1)) return -EINVAL; *out_hwirq = fwspec->param[0]; *out_type = IRQ_TYPE_NONE; return 0; } EXPORT_SYMBOL_GPL(irq_domain_translate_onecell); /** * irq_domain_translate_twocell() - Generic translate for direct two cell * bindings * @d: Interrupt domain involved in the translation * @fwspec: The firmware interrupt specifier to translate * @out_hwirq: Pointer to storage for the hardware interrupt number * @out_type: Pointer to storage for the interrupt type * * Device Tree IRQ specifier translation function which works with two cell * bindings where the cell values map directly to the hwirq number * and linux irq flags. */ int irq_domain_translate_twocell(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *out_hwirq, unsigned int *out_type) { if (WARN_ON(fwspec->param_count < 2)) return -EINVAL; *out_hwirq = fwspec->param[0]; *out_type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK; return 0; } EXPORT_SYMBOL_GPL(irq_domain_translate_twocell); int irq_domain_alloc_descs(int virq, unsigned int cnt, irq_hw_number_t hwirq, int node, const struct irq_affinity_desc *affinity) { unsigned int hint; if (virq >= 0) { virq = __irq_alloc_descs(virq, virq, cnt, node, THIS_MODULE, affinity); } else { hint = hwirq % nr_irqs; if (hint == 0) hint++; virq = __irq_alloc_descs(-1, hint, cnt, node, THIS_MODULE, affinity); if (virq <= 0 && hint > 1) { virq = __irq_alloc_descs(-1, 1, cnt, node, THIS_MODULE, affinity); } } return virq; } /** * irq_domain_reset_irq_data - Clear hwirq, chip and chip_data in @irq_data * @irq_data: The pointer to irq_data */ void irq_domain_reset_irq_data(struct irq_data *irq_data) { irq_data->hwirq = 0; irq_data->chip = &no_irq_chip; irq_data->chip_data = NULL; } EXPORT_SYMBOL_GPL(irq_domain_reset_irq_data); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY /** * irq_domain_create_hierarchy - Add a irqdomain into the hierarchy * @parent: Parent irq domain to associate with the new domain * @flags: Irq domain flags associated to the domain * @size: Size of the domain. See below * @fwnode: Optional fwnode of the interrupt controller * @ops: Pointer to the interrupt domain callbacks * @host_data: Controller private data pointer * * If @size is 0 a tree domain is created, otherwise a linear domain. * * If successful the parent is associated to the new domain and the * domain flags are set. * Returns pointer to IRQ domain, or NULL on failure. */ struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent, unsigned int flags, unsigned int size, struct fwnode_handle *fwnode, const struct irq_domain_ops *ops, void *host_data) { struct irq_domain_info info = { .fwnode = fwnode, .size = size, .hwirq_max = size, .ops = ops, .host_data = host_data, .domain_flags = flags, .parent = parent, }; struct irq_domain *d; if (!info.size) info.hwirq_max = ~0U; d = irq_domain_instantiate(&info); return IS_ERR(d) ? NULL : d; } EXPORT_SYMBOL_GPL(irq_domain_create_hierarchy); static void irq_domain_insert_irq(int virq) { struct irq_data *data; for (data = irq_get_irq_data(virq); data; data = data->parent_data) { struct irq_domain *domain = data->domain; domain->mapcount++; irq_domain_set_mapping(domain, data->hwirq, data); } irq_clear_status_flags(virq, IRQ_NOREQUEST); } static void irq_domain_remove_irq(int virq) { struct irq_data *data; irq_set_status_flags(virq, IRQ_NOREQUEST); irq_set_chip_and_handler(virq, NULL, NULL); synchronize_irq(virq); smp_mb(); for (data = irq_get_irq_data(virq); data; data = data->parent_data) { struct irq_domain *domain = data->domain; irq_hw_number_t hwirq = data->hwirq; domain->mapcount--; irq_domain_clear_mapping(domain, hwirq); } } static struct irq_data *irq_domain_insert_irq_data(struct irq_domain *domain, struct irq_data *child) { struct irq_data *irq_data; irq_data = kzalloc_node(sizeof(*irq_data), GFP_KERNEL, irq_data_get_node(child)); if (irq_data) { child->parent_data = irq_data; irq_data->irq = child->irq; irq_data->common = child->common; irq_data->domain = domain; } return irq_data; } static void __irq_domain_free_hierarchy(struct irq_data *irq_data) { struct irq_data *tmp; while (irq_data) { tmp = irq_data; irq_data = irq_data->parent_data; kfree(tmp); } } static void irq_domain_free_irq_data(unsigned int virq, unsigned int nr_irqs) { struct irq_data *irq_data, *tmp; int i; for (i = 0; i < nr_irqs; i++) { irq_data = irq_get_irq_data(virq + i); tmp = irq_data->parent_data; irq_data->parent_data = NULL; irq_data->domain = NULL; __irq_domain_free_hierarchy(tmp); } } /** * irq_domain_disconnect_hierarchy - Mark the first unused level of a hierarchy * @domain: IRQ domain from which the hierarchy is to be disconnected * @virq: IRQ number where the hierarchy is to be trimmed * * Marks the @virq level belonging to @domain as disconnected. * Returns -EINVAL if @virq doesn't have a valid irq_data pointing * to @domain. * * Its only use is to be able to trim levels of hierarchy that do not * have any real meaning for this interrupt, and that the driver marks * as such from its .alloc() callback. */ int irq_domain_disconnect_hierarchy(struct irq_domain *domain, unsigned int virq) { struct irq_data *irqd; irqd = irq_domain_get_irq_data(domain, virq); if (!irqd) return -EINVAL; irqd->chip = ERR_PTR(-ENOTCONN); return 0; } EXPORT_SYMBOL_GPL(irq_domain_disconnect_hierarchy); static int irq_domain_trim_hierarchy(unsigned int virq) { struct irq_data *tail, *irqd, *irq_data; irq_data = irq_get_irq_data(virq); tail = NULL; /* The first entry must have a valid irqchip */ if (!irq_data->chip || IS_ERR(irq_data->chip)) return -EINVAL; /* * Validate that the irq_data chain is sane in the presence of * a hierarchy trimming marker. */ for (irqd = irq_data->parent_data; irqd; irq_data = irqd, irqd = irqd->parent_data) { /* Can't have a valid irqchip after a trim marker */ if (irqd->chip && tail) return -EINVAL; /* Can't have an empty irqchip before a trim marker */ if (!irqd->chip && !tail) return -EINVAL; if (IS_ERR(irqd->chip)) { /* Only -ENOTCONN is a valid trim marker */ if (PTR_ERR(irqd->chip) != -ENOTCONN) return -EINVAL; tail = irq_data; } } /* No trim marker, nothing to do */ if (!tail) return 0; pr_info("IRQ%d: trimming hierarchy from %s\n", virq, tail->parent_data->domain->name); /* Sever the inner part of the hierarchy... */ irqd = tail; tail = tail->parent_data; irqd->parent_data = NULL; __irq_domain_free_hierarchy(tail); return 0; } static int irq_domain_alloc_irq_data(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs) { struct irq_data *irq_data; struct irq_domain *parent; int i; /* The outermost irq_data is embedded in struct irq_desc */ for (i = 0; i < nr_irqs; i++) { irq_data = irq_get_irq_data(virq + i); irq_data->domain = domain; for (parent = domain->parent; parent; parent = parent->parent) { irq_data = irq_domain_insert_irq_data(parent, irq_data); if (!irq_data) { irq_domain_free_irq_data(virq, i + 1); return -ENOMEM; } } } return 0; } /** * irq_domain_get_irq_data - Get irq_data associated with @virq and @domain * @domain: domain to match * @virq: IRQ number to get irq_data */ struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, unsigned int virq) { struct irq_data *irq_data; for (irq_data = irq_get_irq_data(virq); irq_data; irq_data = irq_data->parent_data) if (irq_data->domain == domain) return irq_data; return NULL; } EXPORT_SYMBOL_GPL(irq_domain_get_irq_data); /** * irq_domain_set_hwirq_and_chip - Set hwirq and irqchip of @virq at @domain * @domain: Interrupt domain to match * @virq: IRQ number * @hwirq: The hwirq number * @chip: The associated interrupt chip * @chip_data: The associated chip data */ int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq, const struct irq_chip *chip, void *chip_data) { struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq); if (!irq_data) return -ENOENT; irq_data->hwirq = hwirq; irq_data->chip = (struct irq_chip *)(chip ? chip : &no_irq_chip); irq_data->chip_data = chip_data; return 0; } EXPORT_SYMBOL_GPL(irq_domain_set_hwirq_and_chip); /** * irq_domain_set_info - Set the complete data for a @virq in @domain * @domain: Interrupt domain to match * @virq: IRQ number * @hwirq: The hardware interrupt number * @chip: The associated interrupt chip * @chip_data: The associated interrupt chip data * @handler: The interrupt flow handler * @handler_data: The interrupt flow handler data * @handler_name: The interrupt handler name */ void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq, const struct irq_chip *chip, void *chip_data, irq_flow_handler_t handler, void *handler_data, const char *handler_name) { irq_domain_set_hwirq_and_chip(domain, virq, hwirq, chip, chip_data); __irq_set_handler(virq, handler, 0, handler_name); irq_set_handler_data(virq, handler_data); } EXPORT_SYMBOL(irq_domain_set_info); /** * irq_domain_free_irqs_common - Clear irq_data and free the parent * @domain: Interrupt domain to match * @virq: IRQ number to start with * @nr_irqs: The number of irqs to free */ void irq_domain_free_irqs_common(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs) { struct irq_data *irq_data; int i; for (i = 0; i < nr_irqs; i++) { irq_data = irq_domain_get_irq_data(domain, virq + i); if (irq_data) irq_domain_reset_irq_data(irq_data); } irq_domain_free_irqs_parent(domain, virq, nr_irqs); } EXPORT_SYMBOL_GPL(irq_domain_free_irqs_common); /** * irq_domain_free_irqs_top - Clear handler and handler data, clear irqdata and free parent * @domain: Interrupt domain to match * @virq: IRQ number to start with * @nr_irqs: The number of irqs to free */ void irq_domain_free_irqs_top(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs) { int i; for (i = 0; i < nr_irqs; i++) { irq_set_handler_data(virq + i, NULL); irq_set_handler(virq + i, NULL); } irq_domain_free_irqs_common(domain, virq, nr_irqs); } static void irq_domain_free_irqs_hierarchy(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs) { unsigned int i; if (!domain->ops->free) return; for (i = 0; i < nr_irqs; i++) { if (irq_domain_get_irq_data(domain, irq_base + i)) domain->ops->free(domain, irq_base + i, 1); } } int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs, void *arg) { if (!domain->ops->alloc) { pr_debug("domain->ops->alloc() is NULL\n"); return -ENOSYS; } return domain->ops->alloc(domain, irq_base, nr_irqs, arg); } static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base, unsigned int nr_irqs, int node, void *arg, bool realloc, const struct irq_affinity_desc *affinity) { int i, ret, virq; if (realloc && irq_base >= 0) { virq = irq_base; } else { virq = irq_domain_alloc_descs(irq_base, nr_irqs, 0, node, affinity); if (virq < 0) { pr_debug("cannot allocate IRQ(base %d, count %d)\n", irq_base, nr_irqs); return virq; } } if (irq_domain_alloc_irq_data(domain, virq, nr_irqs)) { pr_debug("cannot allocate memory for IRQ%d\n", virq); ret = -ENOMEM; goto out_free_desc; } ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg); if (ret < 0) goto out_free_irq_data; for (i = 0; i < nr_irqs; i++) { ret = irq_domain_trim_hierarchy(virq + i); if (ret) goto out_free_irq_data; } for (i = 0; i < nr_irqs; i++) irq_domain_insert_irq(virq + i); return virq; out_free_irq_data: irq_domain_free_irq_data(virq, nr_irqs); out_free_desc: irq_free_descs(virq, nr_irqs); return ret; } /** * __irq_domain_alloc_irqs - Allocate IRQs from domain * @domain: domain to allocate from * @irq_base: allocate specified IRQ number if irq_base >= 0 * @nr_irqs: number of IRQs to allocate * @node: NUMA node id for memory allocation * @arg: domain specific argument * @realloc: IRQ descriptors have already been allocated if true * @affinity: Optional irq affinity mask for multiqueue devices * * Allocate IRQ numbers and initialized all data structures to support * hierarchy IRQ domains. * Parameter @realloc is mainly to support legacy IRQs. * Returns error code or allocated IRQ number * * The whole process to setup an IRQ has been split into two steps. * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ * descriptor and required hardware resources. The second step, * irq_domain_activate_irq(), is to program the hardware with preallocated * resources. In this way, it's easier to rollback when failing to * allocate resources. */ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, unsigned int nr_irqs, int node, void *arg, bool realloc, const struct irq_affinity_desc *affinity) { int ret; if (domain == NULL) { domain = irq_default_domain; if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n")) return -EINVAL; } mutex_lock(&domain->root->mutex); ret = irq_domain_alloc_irqs_locked(domain, irq_base, nr_irqs, node, arg, realloc, affinity); mutex_unlock(&domain->root->mutex); return ret; } EXPORT_SYMBOL_GPL(__irq_domain_alloc_irqs); /* The irq_data was moved, fix the revmap to refer to the new location */ static void irq_domain_fix_revmap(struct irq_data *d) { void __rcu **slot; lockdep_assert_held(&d->domain->root->mutex); if (irq_domain_is_nomap(d->domain)) return; /* Fix up the revmap. */ if (d->hwirq < d->domain->revmap_size) { /* Not using radix tree */ rcu_assign_pointer(d->domain->revmap[d->hwirq], d); } else { slot = radix_tree_lookup_slot(&d->domain->revmap_tree, d->hwirq); if (slot) radix_tree_replace_slot(&d->domain->revmap_tree, slot, d); } } /** * irq_domain_push_irq() - Push a domain in to the top of a hierarchy. * @domain: Domain to push. * @virq: Irq to push the domain in to. * @arg: Passed to the irq_domain_ops alloc() function. * * For an already existing irqdomain hierarchy, as might be obtained * via a call to pci_enable_msix(), add an additional domain to the * head of the processing chain. Must be called before request_irq() * has been called. */ int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg) { struct irq_data *irq_data = irq_get_irq_data(virq); struct irq_data *parent_irq_data; struct irq_desc *desc; int rv = 0; /* * Check that no action has been set, which indicates the virq * is in a state where this function doesn't have to deal with * races between interrupt handling and maintaining the * hierarchy. This will catch gross misuse. Attempting to * make the check race free would require holding locks across * calls to struct irq_domain_ops->alloc(), which could lead * to deadlock, so we just do a simple check before starting. */ desc = irq_to_desc(virq); if (!desc) return -EINVAL; if (WARN_ON(desc->action)) return -EBUSY; if (domain == NULL) return -EINVAL; if (WARN_ON(!irq_domain_is_hierarchy(domain))) return -EINVAL; if (!irq_data) return -EINVAL; if (domain->parent != irq_data->domain) return -EINVAL; parent_irq_data = kzalloc_node(sizeof(*parent_irq_data), GFP_KERNEL, irq_data_get_node(irq_data)); if (!parent_irq_data) return -ENOMEM; mutex_lock(&domain->root->mutex); /* Copy the original irq_data. */ *parent_irq_data = *irq_data; /* * Overwrite the irq_data, which is embedded in struct irq_desc, with * values for this domain. */ irq_data->parent_data = parent_irq_data; irq_data->domain = domain; irq_data->mask = 0; irq_data->hwirq = 0; irq_data->chip = NULL; irq_data->chip_data = NULL; /* May (probably does) set hwirq, chip, etc. */ rv = irq_domain_alloc_irqs_hierarchy(domain, virq, 1, arg); if (rv) { /* Restore the original irq_data. */ *irq_data = *parent_irq_data; kfree(parent_irq_data); goto error; } irq_domain_fix_revmap(parent_irq_data); irq_domain_set_mapping(domain, irq_data->hwirq, irq_data); error: mutex_unlock(&domain->root->mutex); return rv; } EXPORT_SYMBOL_GPL(irq_domain_push_irq); /** * irq_domain_pop_irq() - Remove a domain from the top of a hierarchy. * @domain: Domain to remove. * @virq: Irq to remove the domain from. * * Undo the effects of a call to irq_domain_push_irq(). Must be * called either before request_irq() or after free_irq(). */ int irq_domain_pop_irq(struct irq_domain *domain, int virq) { struct irq_data *irq_data = irq_get_irq_data(virq); struct irq_data *parent_irq_data; struct irq_data *tmp_irq_data; struct irq_desc *desc; /* * Check that no action is set, which indicates the virq is in * a state where this function doesn't have to deal with races * between interrupt handling and maintaining the hierarchy. * This will catch gross misuse. Attempting to make the check * race free would require holding locks across calls to * struct irq_domain_ops->free(), which could lead to * deadlock, so we just do a simple check before starting. */ desc = irq_to_desc(virq); if (!desc) return -EINVAL; if (WARN_ON(desc->action)) return -EBUSY; if (domain == NULL) return -EINVAL; if (!irq_data) return -EINVAL; tmp_irq_data = irq_domain_get_irq_data(domain, virq); /* We can only "pop" if this domain is at the top of the list */ if (WARN_ON(irq_data != tmp_irq_data)) return -EINVAL; if (WARN_ON(irq_data->domain != domain)) return -EINVAL; parent_irq_data = irq_data->parent_data; if (WARN_ON(!parent_irq_data)) return -EINVAL; mutex_lock(&domain->root->mutex); irq_data->parent_data = NULL; irq_domain_clear_mapping(domain, irq_data->hwirq); irq_domain_free_irqs_hierarchy(domain, virq, 1); /* Restore the original irq_data. */ *irq_data = *parent_irq_data; irq_domain_fix_revmap(irq_data); mutex_unlock(&domain->root->mutex); kfree(parent_irq_data); return 0; } EXPORT_SYMBOL_GPL(irq_domain_pop_irq); /** * irq_domain_free_irqs - Free IRQ number and associated data structures * @virq: base IRQ number * @nr_irqs: number of IRQs to free */ void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs) { struct irq_data *data = irq_get_irq_data(virq); struct irq_domain *domain; int i; if (WARN(!data || !data->domain || !data->domain->ops->free, "NULL pointer, cannot free irq\n")) return; domain = data->domain; mutex_lock(&domain->root->mutex); for (i = 0; i < nr_irqs; i++) irq_domain_remove_irq(virq + i); irq_domain_free_irqs_hierarchy(domain, virq, nr_irqs); mutex_unlock(&domain->root->mutex); irq_domain_free_irq_data(virq, nr_irqs); irq_free_descs(virq, nr_irqs); } static void irq_domain_free_one_irq(struct irq_domain *domain, unsigned int virq) { if (irq_domain_is_msi_device(domain)) msi_device_domain_free_wired(domain, virq); else irq_domain_free_irqs(virq, 1); } /** * irq_domain_alloc_irqs_parent - Allocate interrupts from parent domain * @domain: Domain below which interrupts must be allocated * @irq_base: Base IRQ number * @nr_irqs: Number of IRQs to allocate * @arg: Allocation data (arch/domain specific) */ int irq_domain_alloc_irqs_parent(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs, void *arg) { if (!domain->parent) return -ENOSYS; return irq_domain_alloc_irqs_hierarchy(domain->parent, irq_base, nr_irqs, arg); } EXPORT_SYMBOL_GPL(irq_domain_alloc_irqs_parent); /** * irq_domain_free_irqs_parent - Free interrupts from parent domain * @domain: Domain below which interrupts must be freed * @irq_base: Base IRQ number * @nr_irqs: Number of IRQs to free */ void irq_domain_free_irqs_parent(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs) { if (!domain->parent) return; irq_domain_free_irqs_hierarchy(domain->parent, irq_base, nr_irqs); } EXPORT_SYMBOL_GPL(irq_domain_free_irqs_parent); static void __irq_domain_deactivate_irq(struct irq_data *irq_data) { if (irq_data && irq_data->domain) { struct irq_domain *domain = irq_data->domain; if (domain->ops->deactivate) domain->ops->deactivate(domain, irq_data); if (irq_data->parent_data) __irq_domain_deactivate_irq(irq_data->parent_data); } } static int __irq_domain_activate_irq(struct irq_data *irqd, bool reserve) { int ret = 0; if (irqd && irqd->domain) { struct irq_domain *domain = irqd->domain; if (irqd->parent_data) ret = __irq_domain_activate_irq(irqd->parent_data, reserve); if (!ret && domain->ops->activate) { ret = domain->ops->activate(domain, irqd, reserve); /* Rollback in case of error */ if (ret && irqd->parent_data) __irq_domain_deactivate_irq(irqd->parent_data); } } return ret; } /** * irq_domain_activate_irq - Call domain_ops->activate recursively to activate * interrupt * @irq_data: Outermost irq_data associated with interrupt * @reserve: If set only reserve an interrupt vector instead of assigning one * * This is the second step to call domain_ops->activate to program interrupt * controllers, so the interrupt could actually get delivered. */ int irq_domain_activate_irq(struct irq_data *irq_data, bool reserve) { int ret = 0; if (!irqd_is_activated(irq_data)) ret = __irq_domain_activate_irq(irq_data, reserve); if (!ret) irqd_set_activated(irq_data); return ret; } /** * irq_domain_deactivate_irq - Call domain_ops->deactivate recursively to * deactivate interrupt * @irq_data: outermost irq_data associated with interrupt * * It calls domain_ops->deactivate to program interrupt controllers to disable * interrupt delivery. */ void irq_domain_deactivate_irq(struct irq_data *irq_data) { if (irqd_is_activated(irq_data)) { __irq_domain_deactivate_irq(irq_data); irqd_clr_activated(irq_data); } } static void irq_domain_check_hierarchy(struct irq_domain *domain) { /* Hierarchy irq_domains must implement callback alloc() */ if (domain->ops->alloc) domain->flags |= IRQ_DOMAIN_FLAG_HIERARCHY; } #else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ /** * irq_domain_get_irq_data - Get irq_data associated with @virq and @domain * @domain: domain to match * @virq: IRQ number to get irq_data */ struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, unsigned int virq) { struct irq_data *irq_data = irq_get_irq_data(virq); return (irq_data && irq_data->domain == domain) ? irq_data : NULL; } EXPORT_SYMBOL_GPL(irq_domain_get_irq_data); /** * irq_domain_set_info - Set the complete data for a @virq in @domain * @domain: Interrupt domain to match * @virq: IRQ number * @hwirq: The hardware interrupt number * @chip: The associated interrupt chip * @chip_data: The associated interrupt chip data * @handler: The interrupt flow handler * @handler_data: The interrupt flow handler data * @handler_name: The interrupt handler name */ void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq, const struct irq_chip *chip, void *chip_data, irq_flow_handler_t handler, void *handler_data, const char *handler_name) { irq_set_chip_and_handler_name(virq, chip, handler, handler_name); irq_set_chip_data(virq, chip_data); irq_set_handler_data(virq, handler_data); } static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base, unsigned int nr_irqs, int node, void *arg, bool realloc, const struct irq_affinity_desc *affinity) { return -EINVAL; } static void irq_domain_check_hierarchy(struct irq_domain *domain) { } static void irq_domain_free_one_irq(struct irq_domain *domain, unsigned int virq) { } #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ #ifdef CONFIG_GENERIC_IRQ_DEBUGFS #include "internals.h" static struct dentry *domain_dir; static const struct irq_bit_descr irqdomain_flags[] = { BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_HIERARCHY), BIT_MASK_DESCR(IRQ_DOMAIN_NAME_ALLOCATED), BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_IPI_PER_CPU), BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_IPI_SINGLE), BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_MSI), BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_ISOLATED_MSI), BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_NO_MAP), BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_MSI_PARENT), BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_MSI_DEVICE), BIT_MASK_DESCR(IRQ_DOMAIN_FLAG_NONCORE), }; static void irq_domain_debug_show_one(struct seq_file *m, struct irq_domain *d, int ind) { seq_printf(m, "%*sname: %s\n", ind, "", d->name); seq_printf(m, "%*ssize: %u\n", ind + 1, "", d->revmap_size); seq_printf(m, "%*smapped: %u\n", ind + 1, "", d->mapcount); seq_printf(m, "%*sflags: 0x%08x\n", ind +1 , "", d->flags); irq_debug_show_bits(m, ind, d->flags, irqdomain_flags, ARRAY_SIZE(irqdomain_flags)); if (d->ops && d->ops->debug_show) d->ops->debug_show(m, d, NULL, ind + 1); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY if (!d->parent) return; seq_printf(m, "%*sparent: %s\n", ind + 1, "", d->parent->name); irq_domain_debug_show_one(m, d->parent, ind + 4); #endif } static int irq_domain_debug_show(struct seq_file *m, void *p) { struct irq_domain *d = m->private; /* Default domain? Might be NULL */ if (!d) { if (!irq_default_domain) return 0; d = irq_default_domain; } irq_domain_debug_show_one(m, d, 0); return 0; } DEFINE_SHOW_ATTRIBUTE(irq_domain_debug); static void debugfs_add_domain_dir(struct irq_domain *d) { if (!d->name || !domain_dir) return; debugfs_create_file(d->name, 0444, domain_dir, d, &irq_domain_debug_fops); } static void debugfs_remove_domain_dir(struct irq_domain *d) { debugfs_lookup_and_remove(d->name, domain_dir); } void __init irq_domain_debugfs_init(struct dentry *root) { struct irq_domain *d; domain_dir = debugfs_create_dir("domains", root); debugfs_create_file("default", 0444, domain_dir, NULL, &irq_domain_debug_fops); mutex_lock(&irq_domain_mutex); list_for_each_entry(d, &irq_domain_list, link) debugfs_add_domain_dir(d); mutex_unlock(&irq_domain_mutex); } #endif |
| 8764 8761 8133 8136 8759 8136 8764 4868 4870 4867 3979 3978 1019 3743 3644 91 543 3979 3823 4871 4870 8448 4379 4375 558 4379 8451 155 155 155 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 | // SPDX-License-Identifier: GPL-2.0-or-later /* * lib/plist.c * * Descending-priority-sorted double-linked list * * (C) 2002-2003 Intel Corp * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>. * * 2001-2005 (c) MontaVista Software, Inc. * Daniel Walker <dwalker@mvista.com> * * (C) 2005 Thomas Gleixner <tglx@linutronix.de> * * Simplifications of the original code by * Oleg Nesterov <oleg@tv-sign.ru> * * Based on simple lists (include/linux/list.h). * * This file contains the add / del functions which are considered to * be too large to inline. See include/linux/plist.h for further * information. */ #include <linux/bug.h> #include <linux/plist.h> #ifdef CONFIG_DEBUG_PLIST static struct plist_head test_head; static void plist_check_prev_next(struct list_head *t, struct list_head *p, struct list_head *n) { WARN(n->prev != p || p->next != n, "top: %p, n: %p, p: %p\n" "prev: %p, n: %p, p: %p\n" "next: %p, n: %p, p: %p\n", t, t->next, t->prev, p, p->next, p->prev, n, n->next, n->prev); } static void plist_check_list(struct list_head *top) { struct list_head *prev = top, *next = top->next; plist_check_prev_next(top, prev, next); while (next != top) { WRITE_ONCE(prev, next); WRITE_ONCE(next, prev->next); plist_check_prev_next(top, prev, next); } } static void plist_check_head(struct plist_head *head) { if (!plist_head_empty(head)) plist_check_list(&plist_first(head)->prio_list); plist_check_list(&head->node_list); } #else # define plist_check_head(h) do { } while (0) #endif /** * plist_add - add @node to @head * * @node: &struct plist_node pointer * @head: &struct plist_head pointer */ void plist_add(struct plist_node *node, struct plist_head *head) { struct plist_node *first, *iter, *prev = NULL, *last, *reverse_iter; struct list_head *node_next = &head->node_list; plist_check_head(head); WARN_ON(!plist_node_empty(node)); WARN_ON(!list_empty(&node->prio_list)); if (plist_head_empty(head)) goto ins_node; first = iter = plist_first(head); last = reverse_iter = list_entry(first->prio_list.prev, struct plist_node, prio_list); do { if (node->prio < iter->prio) { node_next = &iter->node_list; break; } else if (node->prio >= reverse_iter->prio) { prev = reverse_iter; iter = list_entry(reverse_iter->prio_list.next, struct plist_node, prio_list); if (likely(reverse_iter != last)) node_next = &iter->node_list; break; } prev = iter; iter = list_entry(iter->prio_list.next, struct plist_node, prio_list); reverse_iter = list_entry(reverse_iter->prio_list.prev, struct plist_node, prio_list); } while (iter != first); if (!prev || prev->prio != node->prio) list_add_tail(&node->prio_list, &iter->prio_list); ins_node: list_add_tail(&node->node_list, node_next); plist_check_head(head); } /** * plist_del - Remove a @node from plist. * * @node: &struct plist_node pointer - entry to be removed * @head: &struct plist_head pointer - list head */ void plist_del(struct plist_node *node, struct plist_head *head) { plist_check_head(head); if (!list_empty(&node->prio_list)) { if (node->node_list.next != &head->node_list) { struct plist_node *next; next = list_entry(node->node_list.next, struct plist_node, node_list); /* add the next plist_node into prio_list */ if (list_empty(&next->prio_list)) list_add(&next->prio_list, &node->prio_list); } list_del_init(&node->prio_list); } list_del_init(&node->node_list); plist_check_head(head); } /** * plist_requeue - Requeue @node at end of same-prio entries. * * This is essentially an optimized plist_del() followed by * plist_add(). It moves an entry already in the plist to * after any other same-priority entries. * * @node: &struct plist_node pointer - entry to be moved * @head: &struct plist_head pointer - list head */ void plist_requeue(struct plist_node *node, struct plist_head *head) { struct plist_node *iter; struct list_head *node_next = &head->node_list; plist_check_head(head); BUG_ON(plist_head_empty(head)); BUG_ON(plist_node_empty(node)); if (node == plist_last(head)) return; iter = plist_next(node); if (node->prio != iter->prio) return; plist_del(node, head); plist_for_each_continue(iter, head) { if (node->prio != iter->prio) { node_next = &iter->node_list; break; } } list_add_tail(&node->node_list, node_next); plist_check_head(head); } #ifdef CONFIG_DEBUG_PLIST #include <linux/sched.h> #include <linux/sched/clock.h> #include <linux/module.h> #include <linux/init.h> static struct plist_node __initdata test_node[241]; static void __init plist_test_check(int nr_expect) { struct plist_node *first, *prio_pos, *node_pos; if (plist_head_empty(&test_head)) { BUG_ON(nr_expect != 0); return; } prio_pos = first = plist_first(&test_head); plist_for_each(node_pos, &test_head) { if (nr_expect-- < 0) break; if (node_pos == first) continue; if (node_pos->prio == prio_pos->prio) { BUG_ON(!list_empty(&node_pos->prio_list)); continue; } BUG_ON(prio_pos->prio > node_pos->prio); BUG_ON(prio_pos->prio_list.next != &node_pos->prio_list); prio_pos = node_pos; } BUG_ON(nr_expect != 0); BUG_ON(prio_pos->prio_list.next != &first->prio_list); } static void __init plist_test_requeue(struct plist_node *node) { plist_requeue(node, &test_head); if (node != plist_last(&test_head)) BUG_ON(node->prio == plist_next(node)->prio); } static int __init plist_test(void) { int nr_expect = 0, i, loop; unsigned int r = local_clock(); printk(KERN_DEBUG "start plist test\n"); plist_head_init(&test_head); for (i = 0; i < ARRAY_SIZE(test_node); i++) plist_node_init(test_node + i, 0); for (loop = 0; loop < 1000; loop++) { r = r * 193939 % 47629; i = r % ARRAY_SIZE(test_node); if (plist_node_empty(test_node + i)) { r = r * 193939 % 47629; test_node[i].prio = r % 99; plist_add(test_node + i, &test_head); nr_expect++; } else { plist_del(test_node + i, &test_head); nr_expect--; } plist_test_check(nr_expect); if (!plist_node_empty(test_node + i)) { plist_test_requeue(test_node + i); plist_test_check(nr_expect); } } for (i = 0; i < ARRAY_SIZE(test_node); i++) { if (plist_node_empty(test_node + i)) continue; plist_del(test_node + i, &test_head); nr_expect--; plist_test_check(nr_expect); } printk(KERN_DEBUG "end plist test\n"); /* Worst case test for plist_add() */ unsigned int test_data[241]; for (i = 0; i < ARRAY_SIZE(test_data); i++) test_data[i] = i; ktime_t start, end, time_elapsed = 0; plist_head_init(&test_head); for (i = 0; i < ARRAY_SIZE(test_node); i++) { plist_node_init(test_node + i, 0); test_node[i].prio = test_data[i]; } for (i = 0; i < ARRAY_SIZE(test_node); i++) { if (plist_node_empty(test_node + i)) { start = ktime_get(); plist_add(test_node + i, &test_head); end = ktime_get(); time_elapsed += (end - start); } } pr_debug("plist_add worst case test time elapsed %lld\n", time_elapsed); return 0; } module_init(plist_test); #endif |
| 11 11 11 11 11 2 2 2 2 2 2 8 8 8 2 2 2 2 2 8 8 8 10 10 7 8 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 11 11 11 11 11 11 2 9 8 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 | // SPDX-License-Identifier: GPL-2.0+ /* * module/drivers.c * functions for manipulating drivers * * COMEDI - Linux Control and Measurement Device Interface * Copyright (C) 1997-2000 David A. Schleef <ds@schleef.org> * Copyright (C) 2002 Frank Mori Hess <fmhess@users.sourceforge.net> */ #include <linux/device.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/ioport.h> #include <linux/slab.h> #include <linux/dma-direction.h> #include <linux/interrupt.h> #include <linux/firmware.h> #include <linux/comedi/comedidev.h> #include "comedi_internal.h" struct comedi_driver *comedi_drivers; /* protects access to comedi_drivers */ DEFINE_MUTEX(comedi_drivers_list_lock); /** * comedi_set_hw_dev() - Set hardware device associated with COMEDI device * @dev: COMEDI device. * @hw_dev: Hardware device. * * For automatically configured COMEDI devices (resulting from a call to * comedi_auto_config() or one of its wrappers from the low-level COMEDI * driver), comedi_set_hw_dev() is called automatically by the COMEDI core * to associate the COMEDI device with the hardware device. It can also be * called directly by "legacy" low-level COMEDI drivers that rely on the * %COMEDI_DEVCONFIG ioctl to configure the hardware as long as the hardware * has a &struct device. * * If @dev->hw_dev is NULL, it gets a reference to @hw_dev and sets * @dev->hw_dev, otherwise, it does nothing. Calling it multiple times * with the same hardware device is not considered an error. If it gets * a reference to the hardware device, it will be automatically 'put' when * the device is detached from COMEDI. * * Returns 0 if @dev->hw_dev was NULL or the same as @hw_dev, otherwise * returns -EEXIST. */ int comedi_set_hw_dev(struct comedi_device *dev, struct device *hw_dev) { if (hw_dev == dev->hw_dev) return 0; if (dev->hw_dev) return -EEXIST; dev->hw_dev = get_device(hw_dev); return 0; } EXPORT_SYMBOL_GPL(comedi_set_hw_dev); static void comedi_clear_hw_dev(struct comedi_device *dev) { put_device(dev->hw_dev); dev->hw_dev = NULL; } /** * comedi_alloc_devpriv() - Allocate memory for the device private data * @dev: COMEDI device. * @size: Size of the memory to allocate. * * The allocated memory is zero-filled. @dev->private points to it on * return. The memory will be automatically freed when the COMEDI device is * "detached". * * Returns a pointer to the allocated memory, or NULL on failure. */ void *comedi_alloc_devpriv(struct comedi_device *dev, size_t size) { dev->private = kzalloc(size, GFP_KERNEL); return dev->private; } EXPORT_SYMBOL_GPL(comedi_alloc_devpriv); /** * comedi_alloc_subdevices() - Allocate subdevices for COMEDI device * @dev: COMEDI device. * @num_subdevices: Number of subdevices to allocate. * * Allocates and initializes an array of &struct comedi_subdevice for the * COMEDI device. If successful, sets @dev->subdevices to point to the * first one and @dev->n_subdevices to the number. * * Returns 0 on success, -EINVAL if @num_subdevices is < 1, or -ENOMEM if * failed to allocate the memory. */ int comedi_alloc_subdevices(struct comedi_device *dev, int num_subdevices) { struct comedi_subdevice *s; int i; if (num_subdevices < 1) return -EINVAL; s = kcalloc(num_subdevices, sizeof(*s), GFP_KERNEL); if (!s) return -ENOMEM; dev->subdevices = s; dev->n_subdevices = num_subdevices; for (i = 0; i < num_subdevices; ++i) { s = &dev->subdevices[i]; s->device = dev; s->index = i; s->async_dma_dir = DMA_NONE; spin_lock_init(&s->spin_lock); s->minor = -1; } return 0; } EXPORT_SYMBOL_GPL(comedi_alloc_subdevices); /** * comedi_alloc_subdev_readback() - Allocate memory for the subdevice readback * @s: COMEDI subdevice. * * This is called by low-level COMEDI drivers to allocate an array to record * the last values written to a subdevice's analog output channels (at least * by the %INSN_WRITE instruction), to allow them to be read back by an * %INSN_READ instruction. It also provides a default handler for the * %INSN_READ instruction unless one has already been set. * * On success, @s->readback points to the first element of the array, which * is zero-filled. The low-level driver is responsible for updating its * contents. @s->insn_read will be set to comedi_readback_insn_read() * unless it is already non-NULL. * * Returns 0 on success, -EINVAL if the subdevice has no channels, or * -ENOMEM on allocation failure. */ int comedi_alloc_subdev_readback(struct comedi_subdevice *s) { if (!s->n_chan) return -EINVAL; s->readback = kcalloc(s->n_chan, sizeof(*s->readback), GFP_KERNEL); if (!s->readback) return -ENOMEM; if (!s->insn_read) s->insn_read = comedi_readback_insn_read; return 0; } EXPORT_SYMBOL_GPL(comedi_alloc_subdev_readback); static void comedi_device_detach_cleanup(struct comedi_device *dev) { int i; struct comedi_subdevice *s; lockdep_assert_held(&dev->attach_lock); lockdep_assert_held(&dev->mutex); if (dev->subdevices) { for (i = 0; i < dev->n_subdevices; i++) { s = &dev->subdevices[i]; if (comedi_can_auto_free_spriv(s)) kfree(s->private); comedi_free_subdevice_minor(s); if (s->async) { comedi_buf_alloc(dev, s, 0); kfree(s->async); } kfree(s->readback); } kfree(dev->subdevices); dev->subdevices = NULL; dev->n_subdevices = 0; } kfree(dev->private); if (!IS_ERR(dev->pacer)) kfree(dev->pacer); dev->private = NULL; dev->pacer = NULL; dev->driver = NULL; dev->board_name = NULL; dev->board_ptr = NULL; dev->mmio = NULL; dev->iobase = 0; dev->iolen = 0; dev->ioenabled = false; dev->irq = 0; dev->read_subdev = NULL; dev->write_subdev = NULL; dev->open = NULL; dev->close = NULL; comedi_clear_hw_dev(dev); } void comedi_device_detach(struct comedi_device *dev) { lockdep_assert_held(&dev->mutex); comedi_device_cancel_all(dev); down_write(&dev->attach_lock); dev->attached = false; dev->detach_count++; if (dev->driver) dev->driver->detach(dev); comedi_device_detach_cleanup(dev); up_write(&dev->attach_lock); } static int poll_invalid(struct comedi_device *dev, struct comedi_subdevice *s) { return -EINVAL; } static int insn_device_inval(struct comedi_device *dev, struct comedi_insn *insn, unsigned int *data) { return -EINVAL; } static unsigned int get_zero_valid_routes(struct comedi_device *dev, unsigned int n_pairs, unsigned int *pair_data) { return 0; } int insn_inval(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { return -EINVAL; } /** * comedi_readback_insn_read() - A generic (*insn_read) for subdevice readback. * @dev: COMEDI device. * @s: COMEDI subdevice. * @insn: COMEDI instruction. * @data: Pointer to return the readback data. * * Handles the %INSN_READ instruction for subdevices that use the readback * array allocated by comedi_alloc_subdev_readback(). It may be used * directly as the subdevice's handler (@s->insn_read) or called via a * wrapper. * * @insn->n is normally 1, which will read a single value. If higher, the * same element of the readback array will be read multiple times. * * Returns @insn->n on success, or -EINVAL if @s->readback is NULL. */ int comedi_readback_insn_read(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { unsigned int chan = CR_CHAN(insn->chanspec); int i; if (!s->readback) return -EINVAL; for (i = 0; i < insn->n; i++) data[i] = s->readback[chan]; return insn->n; } EXPORT_SYMBOL_GPL(comedi_readback_insn_read); /** * comedi_timeout() - Busy-wait for a driver condition to occur * @dev: COMEDI device. * @s: COMEDI subdevice. * @insn: COMEDI instruction. * @cb: Callback to check for the condition. * @context: Private context from the driver. * * Busy-waits for up to a second (%COMEDI_TIMEOUT_MS) for the condition or * some error (other than -EBUSY) to occur. The parameters @dev, @s, @insn, * and @context are passed to the callback function, which returns -EBUSY to * continue waiting or some other value to stop waiting (generally 0 if the * condition occurred, or some error value). * * Returns -ETIMEDOUT if timed out, otherwise the return value from the * callback function. */ int comedi_timeout(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, int (*cb)(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned long context), unsigned long context) { unsigned long timeout = jiffies + msecs_to_jiffies(COMEDI_TIMEOUT_MS); int ret; while (time_before(jiffies, timeout)) { ret = cb(dev, s, insn, context); if (ret != -EBUSY) return ret; /* success (0) or non EBUSY errno */ cpu_relax(); } return -ETIMEDOUT; } EXPORT_SYMBOL_GPL(comedi_timeout); /** * comedi_dio_insn_config() - Boilerplate (*insn_config) for DIO subdevices * @dev: COMEDI device. * @s: COMEDI subdevice. * @insn: COMEDI instruction. * @data: Instruction parameters and return data. * @mask: io_bits mask for grouped channels, or 0 for single channel. * * If @mask is 0, it is replaced with a single-bit mask corresponding to the * channel number specified by @insn->chanspec. Otherwise, @mask * corresponds to a group of channels (which should include the specified * channel) that are always configured together as inputs or outputs. * * Partially handles the %INSN_CONFIG_DIO_INPUT, %INSN_CONFIG_DIO_OUTPUTS, * and %INSN_CONFIG_DIO_QUERY instructions. The first two update * @s->io_bits to record the directions of the masked channels. The last * one sets @data[1] to the current direction of the group of channels * (%COMEDI_INPUT) or %COMEDI_OUTPUT) as recorded in @s->io_bits. * * The caller is responsible for updating the DIO direction in the hardware * registers if this function returns 0. * * Returns 0 for a %INSN_CONFIG_DIO_INPUT or %INSN_CONFIG_DIO_OUTPUT * instruction, @insn->n (> 0) for a %INSN_CONFIG_DIO_QUERY instruction, or * -EINVAL for some other instruction. */ int comedi_dio_insn_config(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data, unsigned int mask) { unsigned int chan_mask = 1 << CR_CHAN(insn->chanspec); if (!mask) mask = chan_mask; switch (data[0]) { case INSN_CONFIG_DIO_INPUT: s->io_bits &= ~mask; break; case INSN_CONFIG_DIO_OUTPUT: s->io_bits |= mask; break; case INSN_CONFIG_DIO_QUERY: data[1] = (s->io_bits & mask) ? COMEDI_OUTPUT : COMEDI_INPUT; return insn->n; default: return -EINVAL; } return 0; } EXPORT_SYMBOL_GPL(comedi_dio_insn_config); /** * comedi_dio_update_state() - Update the internal state of DIO subdevices * @s: COMEDI subdevice. * @data: The channel mask and bits to update. * * Updates @s->state which holds the internal state of the outputs for DIO * or DO subdevices (up to 32 channels). @data[0] contains a bit-mask of * the channels to be updated. @data[1] contains a bit-mask of those * channels to be set to '1'. The caller is responsible for updating the * outputs in hardware according to @s->state. As a minimum, the channels * in the returned bit-mask need to be updated. * * Returns @mask with non-existent channels removed. */ unsigned int comedi_dio_update_state(struct comedi_subdevice *s, unsigned int *data) { unsigned int chanmask = (s->n_chan < 32) ? ((1 << s->n_chan) - 1) : 0xffffffff; unsigned int mask = data[0] & chanmask; unsigned int bits = data[1]; if (mask) { s->state &= ~mask; s->state |= (bits & mask); } return mask; } EXPORT_SYMBOL_GPL(comedi_dio_update_state); /** * comedi_bytes_per_scan_cmd() - Get length of asynchronous command "scan" in * bytes * @s: COMEDI subdevice. * @cmd: COMEDI command. * * Determines the overall scan length according to the subdevice type and the * number of channels in the scan for the specified command. * * For digital input, output or input/output subdevices, samples for * multiple channels are assumed to be packed into one or more unsigned * short or unsigned int values according to the subdevice's %SDF_LSAMPL * flag. For other types of subdevice, samples are assumed to occupy a * whole unsigned short or unsigned int according to the %SDF_LSAMPL flag. * * Returns the overall scan length in bytes. */ unsigned int comedi_bytes_per_scan_cmd(struct comedi_subdevice *s, struct comedi_cmd *cmd) { unsigned int num_samples; unsigned int bits_per_sample; switch (s->type) { case COMEDI_SUBD_DI: case COMEDI_SUBD_DO: case COMEDI_SUBD_DIO: bits_per_sample = 8 * comedi_bytes_per_sample(s); num_samples = DIV_ROUND_UP(cmd->scan_end_arg, bits_per_sample); break; default: num_samples = cmd->scan_end_arg; break; } return comedi_samples_to_bytes(s, num_samples); } EXPORT_SYMBOL_GPL(comedi_bytes_per_scan_cmd); /** * comedi_bytes_per_scan() - Get length of asynchronous command "scan" in bytes * @s: COMEDI subdevice. * * Determines the overall scan length according to the subdevice type and the * number of channels in the scan for the current command. * * For digital input, output or input/output subdevices, samples for * multiple channels are assumed to be packed into one or more unsigned * short or unsigned int values according to the subdevice's %SDF_LSAMPL * flag. For other types of subdevice, samples are assumed to occupy a * whole unsigned short or unsigned int according to the %SDF_LSAMPL flag. * * Returns the overall scan length in bytes. */ unsigned int comedi_bytes_per_scan(struct comedi_subdevice *s) { struct comedi_cmd *cmd = &s->async->cmd; return comedi_bytes_per_scan_cmd(s, cmd); } EXPORT_SYMBOL_GPL(comedi_bytes_per_scan); static unsigned int __comedi_nscans_left(struct comedi_subdevice *s, unsigned int nscans) { struct comedi_async *async = s->async; struct comedi_cmd *cmd = &async->cmd; if (cmd->stop_src == TRIG_COUNT) { unsigned int scans_left = 0; if (async->scans_done < cmd->stop_arg) scans_left = cmd->stop_arg - async->scans_done; if (nscans > scans_left) nscans = scans_left; } return nscans; } /** * comedi_nscans_left() - Return the number of scans left in the command * @s: COMEDI subdevice. * @nscans: The expected number of scans or 0 for all available scans. * * If @nscans is 0, it is set to the number of scans available in the * async buffer. * * If the async command has a stop_src of %TRIG_COUNT, the @nscans will be * checked against the number of scans remaining to complete the command. * * The return value will then be either the expected number of scans or the * number of scans remaining to complete the command, whichever is fewer. */ unsigned int comedi_nscans_left(struct comedi_subdevice *s, unsigned int nscans) { if (nscans == 0) { unsigned int nbytes = comedi_buf_read_n_available(s); nscans = nbytes / comedi_bytes_per_scan(s); } return __comedi_nscans_left(s, nscans); } EXPORT_SYMBOL_GPL(comedi_nscans_left); /** * comedi_nsamples_left() - Return the number of samples left in the command * @s: COMEDI subdevice. * @nsamples: The expected number of samples. * * Returns the number of samples remaining to complete the command, or the * specified expected number of samples (@nsamples), whichever is fewer. */ unsigned int comedi_nsamples_left(struct comedi_subdevice *s, unsigned int nsamples) { struct comedi_async *async = s->async; struct comedi_cmd *cmd = &async->cmd; unsigned long long scans_left; unsigned long long samples_left; if (cmd->stop_src != TRIG_COUNT) return nsamples; scans_left = __comedi_nscans_left(s, cmd->stop_arg); if (!scans_left) return 0; samples_left = scans_left * cmd->scan_end_arg - comedi_bytes_to_samples(s, async->scan_progress); if (samples_left < nsamples) return samples_left; return nsamples; } EXPORT_SYMBOL_GPL(comedi_nsamples_left); /** * comedi_inc_scan_progress() - Update scan progress in asynchronous command * @s: COMEDI subdevice. * @num_bytes: Amount of data in bytes to increment scan progress. * * Increments the scan progress by the number of bytes specified by @num_bytes. * If the scan progress reaches or exceeds the scan length in bytes, reduce * it modulo the scan length in bytes and set the "end of scan" asynchronous * event flag (%COMEDI_CB_EOS) to be processed later. */ void comedi_inc_scan_progress(struct comedi_subdevice *s, unsigned int num_bytes) { struct comedi_async *async = s->async; struct comedi_cmd *cmd = &async->cmd; unsigned int scan_length = comedi_bytes_per_scan(s); /* track the 'cur_chan' for non-SDF_PACKED subdevices */ if (!(s->subdev_flags & SDF_PACKED)) { async->cur_chan += comedi_bytes_to_samples(s, num_bytes); async->cur_chan %= cmd->chanlist_len; } async->scan_progress += num_bytes; if (async->scan_progress >= scan_length) { unsigned int nscans = async->scan_progress / scan_length; if (async->scans_done < (UINT_MAX - nscans)) async->scans_done += nscans; else async->scans_done = UINT_MAX; async->scan_progress %= scan_length; async->events |= COMEDI_CB_EOS; } } EXPORT_SYMBOL_GPL(comedi_inc_scan_progress); /** * comedi_handle_events() - Handle events and possibly stop acquisition * @dev: COMEDI device. * @s: COMEDI subdevice. * * Handles outstanding asynchronous acquisition event flags associated * with the subdevice. Call the subdevice's @s->cancel() handler if the * "end of acquisition", "error" or "overflow" event flags are set in order * to stop the acquisition at the driver level. * * Calls comedi_event() to further process the event flags, which may mark * the asynchronous command as no longer running, possibly terminated with * an error, and may wake up tasks. * * Return a bit-mask of the handled events. */ unsigned int comedi_handle_events(struct comedi_device *dev, struct comedi_subdevice *s) { unsigned int events = s->async->events; if (events == 0) return events; if ((events & COMEDI_CB_CANCEL_MASK) && s->cancel) s->cancel(dev, s); comedi_event(dev, s); return events; } EXPORT_SYMBOL_GPL(comedi_handle_events); static int insn_rw_emulate_bits(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct comedi_insn _insn; unsigned int chan = CR_CHAN(insn->chanspec); unsigned int base_chan = (chan < 32) ? 0 : chan; unsigned int _data[2]; int ret; memset(_data, 0, sizeof(_data)); memset(&_insn, 0, sizeof(_insn)); _insn.insn = INSN_BITS; _insn.chanspec = base_chan; _insn.n = 2; _insn.subdev = insn->subdev; if (insn->insn == INSN_WRITE) { if (!(s->subdev_flags & SDF_WRITABLE)) return -EINVAL; _data[0] = 1 << (chan - base_chan); /* mask */ _data[1] = data[0] ? (1 << (chan - base_chan)) : 0; /* bits */ } ret = s->insn_bits(dev, s, &_insn, _data); if (ret < 0) return ret; if (insn->insn == INSN_READ) data[0] = (_data[1] >> (chan - base_chan)) & 1; return 1; } static int __comedi_device_postconfig_async(struct comedi_device *dev, struct comedi_subdevice *s) { struct comedi_async *async; unsigned int buf_size; int ret; lockdep_assert_held(&dev->mutex); if ((s->subdev_flags & (SDF_CMD_READ | SDF_CMD_WRITE)) == 0) { dev_warn(dev->class_dev, "async subdevices must support SDF_CMD_READ or SDF_CMD_WRITE\n"); return -EINVAL; } if (!s->do_cmdtest) { dev_warn(dev->class_dev, "async subdevices must have a do_cmdtest() function\n"); return -EINVAL; } if (!s->cancel) dev_warn(dev->class_dev, "async subdevices should have a cancel() function\n"); async = kzalloc(sizeof(*async), GFP_KERNEL); if (!async) return -ENOMEM; init_waitqueue_head(&async->wait_head); s->async = async; async->max_bufsize = comedi_default_buf_maxsize_kb * 1024; buf_size = comedi_default_buf_size_kb * 1024; if (buf_size > async->max_bufsize) buf_size = async->max_bufsize; if (comedi_buf_alloc(dev, s, buf_size) < 0) { dev_warn(dev->class_dev, "Buffer allocation failed\n"); return -ENOMEM; } if (s->buf_change) { ret = s->buf_change(dev, s); if (ret < 0) return ret; } comedi_alloc_subdevice_minor(s); return 0; } static int __comedi_device_postconfig(struct comedi_device *dev) { struct comedi_subdevice *s; int ret; int i; lockdep_assert_held(&dev->mutex); if (!dev->insn_device_config) dev->insn_device_config = insn_device_inval; if (!dev->get_valid_routes) dev->get_valid_routes = get_zero_valid_routes; for (i = 0; i < dev->n_subdevices; i++) { s = &dev->subdevices[i]; if (s->type == COMEDI_SUBD_UNUSED) continue; if (s->type == COMEDI_SUBD_DO) { if (s->n_chan < 32) s->io_bits = (1 << s->n_chan) - 1; else s->io_bits = 0xffffffff; } if (s->len_chanlist == 0) s->len_chanlist = 1; if (s->do_cmd) { ret = __comedi_device_postconfig_async(dev, s); if (ret) return ret; } if (!s->range_table && !s->range_table_list) s->range_table = &range_unknown; if (!s->insn_read && s->insn_bits) s->insn_read = insn_rw_emulate_bits; if (!s->insn_write && s->insn_bits) s->insn_write = insn_rw_emulate_bits; if (!s->insn_read) s->insn_read = insn_inval; if (!s->insn_write) s->insn_write = insn_inval; if (!s->insn_bits) s->insn_bits = insn_inval; if (!s->insn_config) s->insn_config = insn_inval; if (!s->poll) s->poll = poll_invalid; } return 0; } /* do a little post-config cleanup */ static int comedi_device_postconfig(struct comedi_device *dev) { int ret; lockdep_assert_held(&dev->mutex); ret = __comedi_device_postconfig(dev); if (ret < 0) return ret; down_write(&dev->attach_lock); dev->attached = true; up_write(&dev->attach_lock); return 0; } /* * Generic recognize function for drivers that register their supported * board names. * * 'driv->board_name' points to a 'const char *' member within the * zeroth element of an array of some private board information * structure, say 'struct foo_board' containing a member 'const char * *board_name' that is initialized to point to a board name string that * is one of the candidates matched against this function's 'name' * parameter. * * 'driv->offset' is the size of the private board information * structure, say 'sizeof(struct foo_board)', and 'driv->num_names' is * the length of the array of private board information structures. * * If one of the board names in the array of private board information * structures matches the name supplied to this function, the function * returns a pointer to the pointer to the board name, otherwise it * returns NULL. The return value ends up in the 'board_ptr' member of * a 'struct comedi_device' that the low-level comedi driver's * 'attach()' hook can convert to a point to a particular element of its * array of private board information structures by subtracting the * offset of the member that points to the board name. (No subtraction * is required if the board name pointer is the first member of the * private board information structure, which is generally the case.) */ static void *comedi_recognize(struct comedi_driver *driv, const char *name) { char **name_ptr = (char **)driv->board_name; int i; for (i = 0; i < driv->num_names; i++) { if (strcmp(*name_ptr, name) == 0) return name_ptr; name_ptr = (void *)name_ptr + driv->offset; } return NULL; } static void comedi_report_boards(struct comedi_driver *driv) { unsigned int i; const char *const *name_ptr; pr_info("comedi: valid board names for %s driver are:\n", driv->driver_name); name_ptr = driv->board_name; for (i = 0; i < driv->num_names; i++) { pr_info(" %s\n", *name_ptr); name_ptr = (const char **)((char *)name_ptr + driv->offset); } if (driv->num_names == 0) pr_info(" %s\n", driv->driver_name); } /** * comedi_load_firmware() - Request and load firmware for a device * @dev: COMEDI device. * @device: Hardware device. * @name: The name of the firmware image. * @cb: Callback to the upload the firmware image. * @context: Private context from the driver. * * Sends a firmware request for the hardware device and waits for it. Calls * the callback function to upload the firmware to the device, them releases * the firmware. * * Returns 0 on success, -EINVAL if @cb is NULL, or a negative error number * from the firmware request or the callback function. */ int comedi_load_firmware(struct comedi_device *dev, struct device *device, const char *name, int (*cb)(struct comedi_device *dev, const u8 *data, size_t size, unsigned long context), unsigned long context) { const struct firmware *fw; int ret; if (!cb) return -EINVAL; ret = request_firmware(&fw, name, device); if (ret == 0) { ret = cb(dev, fw->data, fw->size, context); release_firmware(fw); } return min(ret, 0); } EXPORT_SYMBOL_GPL(comedi_load_firmware); /** * __comedi_request_region() - Request an I/O region for a legacy driver * @dev: COMEDI device. * @start: Base address of the I/O region. * @len: Length of the I/O region. * * Requests the specified I/O port region which must start at a non-zero * address. * * Returns 0 on success, -EINVAL if @start is 0, or -EIO if the request * fails. */ int __comedi_request_region(struct comedi_device *dev, unsigned long start, unsigned long len) { if (!start) { dev_warn(dev->class_dev, "%s: a I/O base address must be specified\n", dev->board_name); return -EINVAL; } if (!request_region(start, len, dev->board_name)) { dev_warn(dev->class_dev, "%s: I/O port conflict (%#lx,%lu)\n", dev->board_name, start, len); return -EIO; } return 0; } EXPORT_SYMBOL_GPL(__comedi_request_region); /** * comedi_request_region() - Request an I/O region for a legacy driver * @dev: COMEDI device. * @start: Base address of the I/O region. * @len: Length of the I/O region. * * Requests the specified I/O port region which must start at a non-zero * address. * * On success, @dev->iobase is set to the base address of the region and * @dev->iolen is set to its length. * * Returns 0 on success, -EINVAL if @start is 0, or -EIO if the request * fails. */ int comedi_request_region(struct comedi_device *dev, unsigned long start, unsigned long len) { int ret; ret = __comedi_request_region(dev, start, len); if (ret == 0) { dev->iobase = start; dev->iolen = len; } return ret; } EXPORT_SYMBOL_GPL(comedi_request_region); /** * comedi_legacy_detach() - A generic (*detach) function for legacy drivers * @dev: COMEDI device. * * This is a simple, generic 'detach' handler for legacy COMEDI devices that * just use a single I/O port region and possibly an IRQ and that don't need * any special clean-up for their private device or subdevice storage. It * can also be called by a driver-specific 'detach' handler. * * If @dev->irq is non-zero, the IRQ will be freed. If @dev->iobase and * @dev->iolen are both non-zero, the I/O port region will be released. */ void comedi_legacy_detach(struct comedi_device *dev) { if (dev->irq) { free_irq(dev->irq, dev); dev->irq = 0; } if (dev->iobase && dev->iolen) { release_region(dev->iobase, dev->iolen); dev->iobase = 0; dev->iolen = 0; } } EXPORT_SYMBOL_GPL(comedi_legacy_detach); int comedi_device_attach(struct comedi_device *dev, struct comedi_devconfig *it) { struct comedi_driver *driv; int ret; lockdep_assert_held(&dev->mutex); if (dev->attached) return -EBUSY; mutex_lock(&comedi_drivers_list_lock); for (driv = comedi_drivers; driv; driv = driv->next) { if (!try_module_get(driv->module)) continue; if (driv->num_names) { dev->board_ptr = comedi_recognize(driv, it->board_name); if (dev->board_ptr) break; } else if (strcmp(driv->driver_name, it->board_name) == 0) { break; } module_put(driv->module); } if (!driv) { /* recognize has failed if we get here */ /* report valid board names before returning error */ for (driv = comedi_drivers; driv; driv = driv->next) { if (!try_module_get(driv->module)) continue; comedi_report_boards(driv); module_put(driv->module); } ret = -EIO; goto out; } if (!driv->attach) { /* driver does not support manual configuration */ dev_warn(dev->class_dev, "driver '%s' does not support attach using comedi_config\n", driv->driver_name); module_put(driv->module); ret = -EIO; goto out; } dev->driver = driv; dev->board_name = dev->board_ptr ? *(const char **)dev->board_ptr : dev->driver->driver_name; ret = driv->attach(dev, it); if (ret >= 0) ret = comedi_device_postconfig(dev); if (ret < 0) { comedi_device_detach(dev); module_put(driv->module); } /* On success, the driver module count has been incremented. */ out: mutex_unlock(&comedi_drivers_list_lock); return ret; } /** * comedi_auto_config() - Create a COMEDI device for a hardware device * @hardware_device: Hardware device. * @driver: COMEDI low-level driver for the hardware device. * @context: Driver context for the auto_attach handler. * * Allocates a new COMEDI device for the hardware device and calls the * low-level driver's 'auto_attach' handler to set-up the hardware and * allocate the COMEDI subdevices. Additional "post-configuration" setting * up is performed on successful return from the 'auto_attach' handler. * If the 'auto_attach' handler fails, the low-level driver's 'detach' * handler will be called as part of the clean-up. * * This is usually called from a wrapper function in a bus-specific COMEDI * module, which in turn is usually called from a bus device 'probe' * function in the low-level driver. * * Returns 0 on success, -EINVAL if the parameters are invalid or the * post-configuration determines the driver has set the COMEDI device up * incorrectly, -ENOMEM if failed to allocate memory, -EBUSY if run out of * COMEDI minor device numbers, or some negative error number returned by * the driver's 'auto_attach' handler. */ int comedi_auto_config(struct device *hardware_device, struct comedi_driver *driver, unsigned long context) { struct comedi_device *dev; int ret; if (!hardware_device) { pr_warn("BUG! %s called with NULL hardware_device\n", __func__); return -EINVAL; } if (!driver) { dev_warn(hardware_device, "BUG! %s called with NULL comedi driver\n", __func__); return -EINVAL; } if (!driver->auto_attach) { dev_warn(hardware_device, "BUG! comedi driver '%s' has no auto_attach handler\n", driver->driver_name); return -EINVAL; } dev = comedi_alloc_board_minor(hardware_device); if (IS_ERR(dev)) { dev_warn(hardware_device, "driver '%s' could not create device.\n", driver->driver_name); return PTR_ERR(dev); } /* Note: comedi_alloc_board_minor() locked dev->mutex. */ lockdep_assert_held(&dev->mutex); dev->driver = driver; dev->board_name = dev->driver->driver_name; ret = driver->auto_attach(dev, context); if (ret >= 0) ret = comedi_device_postconfig(dev); if (ret < 0) { dev_warn(hardware_device, "driver '%s' failed to auto-configure device.\n", driver->driver_name); mutex_unlock(&dev->mutex); comedi_release_hardware_device(hardware_device); } else { /* * class_dev should be set properly here * after a successful auto config */ dev_info(dev->class_dev, "driver '%s' has successfully auto-configured '%s'.\n", driver->driver_name, dev->board_name); mutex_unlock(&dev->mutex); } return ret; } EXPORT_SYMBOL_GPL(comedi_auto_config); /** * comedi_auto_unconfig() - Unconfigure auto-allocated COMEDI device * @hardware_device: Hardware device previously passed to * comedi_auto_config(). * * Cleans up and eventually destroys the COMEDI device allocated by * comedi_auto_config() for the same hardware device. As part of this * clean-up, the low-level COMEDI driver's 'detach' handler will be called. * (The COMEDI device itself will persist in an unattached state if it is * still open, until it is released, and any mmapped buffers will persist * until they are munmapped.) * * This is usually called from a wrapper module in a bus-specific COMEDI * module, which in turn is usually set as the bus device 'remove' function * in the low-level COMEDI driver. */ void comedi_auto_unconfig(struct device *hardware_device) { if (!hardware_device) return; comedi_release_hardware_device(hardware_device); } EXPORT_SYMBOL_GPL(comedi_auto_unconfig); /** * comedi_driver_register() - Register a low-level COMEDI driver * @driver: Low-level COMEDI driver. * * The low-level COMEDI driver is added to the list of registered COMEDI * drivers. This is used by the handler for the "/proc/comedi" file and is * also used by the handler for the %COMEDI_DEVCONFIG ioctl to configure * "legacy" COMEDI devices (for those low-level drivers that support it). * * Returns 0. */ int comedi_driver_register(struct comedi_driver *driver) { mutex_lock(&comedi_drivers_list_lock); driver->next = comedi_drivers; comedi_drivers = driver; mutex_unlock(&comedi_drivers_list_lock); return 0; } EXPORT_SYMBOL_GPL(comedi_driver_register); /** * comedi_driver_unregister() - Unregister a low-level COMEDI driver * @driver: Low-level COMEDI driver. * * The low-level COMEDI driver is removed from the list of registered COMEDI * drivers. Detaches any COMEDI devices attached to the driver, which will * result in the low-level driver's 'detach' handler being called for those * devices before this function returns. */ void comedi_driver_unregister(struct comedi_driver *driver) { struct comedi_driver *prev; int i; /* unlink the driver */ mutex_lock(&comedi_drivers_list_lock); if (comedi_drivers == driver) { comedi_drivers = driver->next; } else { for (prev = comedi_drivers; prev->next; prev = prev->next) { if (prev->next == driver) { prev->next = driver->next; break; } } } mutex_unlock(&comedi_drivers_list_lock); /* check for devices using this driver */ for (i = 0; i < COMEDI_NUM_BOARD_MINORS; i++) { struct comedi_device *dev = comedi_dev_get_from_minor(i); if (!dev) continue; mutex_lock(&dev->mutex); if (dev->attached && dev->driver == driver) { if (dev->use_count) dev_warn(dev->class_dev, "BUG! detaching device with use_count=%d\n", dev->use_count); comedi_device_detach(dev); } mutex_unlock(&dev->mutex); comedi_dev_put(dev); } } EXPORT_SYMBOL_GPL(comedi_driver_unregister); |
| 7 7 7 20 1 20 19 20 20 1 1 19 19 19 18 7 29 29 29 29 29 29 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | /* * Route Plug-In * Copyright (c) 2000 by Abramo Bagnara <abramo@alsa-project.org> * * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Library General Public License as * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include <linux/time.h> #include <sound/core.h> #include <sound/pcm.h> #include "pcm_plugin.h" static void zero_areas(struct snd_pcm_plugin_channel *dvp, int ndsts, snd_pcm_uframes_t frames, snd_pcm_format_t format) { int dst = 0; for (; dst < ndsts; ++dst) { if (dvp->wanted) snd_pcm_area_silence(&dvp->area, 0, frames, format); dvp->enabled = 0; dvp++; } } static inline void copy_area(const struct snd_pcm_plugin_channel *src_channel, struct snd_pcm_plugin_channel *dst_channel, snd_pcm_uframes_t frames, snd_pcm_format_t format) { dst_channel->enabled = 1; snd_pcm_area_copy(&src_channel->area, 0, &dst_channel->area, 0, frames, format); } static snd_pcm_sframes_t route_transfer(struct snd_pcm_plugin *plugin, const struct snd_pcm_plugin_channel *src_channels, struct snd_pcm_plugin_channel *dst_channels, snd_pcm_uframes_t frames) { int nsrcs, ndsts, dst; struct snd_pcm_plugin_channel *dvp; snd_pcm_format_t format; if (snd_BUG_ON(!plugin || !src_channels || !dst_channels)) return -ENXIO; if (frames == 0) return 0; if (frames > dst_channels[0].frames) frames = dst_channels[0].frames; nsrcs = plugin->src_format.channels; ndsts = plugin->dst_format.channels; format = plugin->dst_format.format; dvp = dst_channels; if (nsrcs <= 1) { /* expand to all channels */ for (dst = 0; dst < ndsts; ++dst) { copy_area(src_channels, dvp, frames, format); dvp++; } return frames; } for (dst = 0; dst < ndsts && dst < nsrcs; ++dst) { copy_area(src_channels, dvp, frames, format); dvp++; src_channels++; } if (dst < ndsts) zero_areas(dvp, ndsts - dst, frames, format); return frames; } int snd_pcm_plugin_build_route(struct snd_pcm_substream *plug, struct snd_pcm_plugin_format *src_format, struct snd_pcm_plugin_format *dst_format, struct snd_pcm_plugin **r_plugin) { struct snd_pcm_plugin *plugin; int err; if (snd_BUG_ON(!r_plugin)) return -ENXIO; *r_plugin = NULL; if (snd_BUG_ON(src_format->rate != dst_format->rate)) return -ENXIO; if (snd_BUG_ON(src_format->format != dst_format->format)) return -ENXIO; err = snd_pcm_plugin_build(plug, "route conversion", src_format, dst_format, 0, &plugin); if (err < 0) return err; plugin->transfer = route_transfer; *r_plugin = plugin; return 0; } |
| 9 8 1 9 1 1 1 1 1 1 1 1 1 9 10 2 10 3 10 2 27 10 25 6 8 21 1 1 19 1 1 18 1 1 18 27 14 14 2 14 1 1 14 13 60 60 60 39 47 4 4 2 19 1 18 17 17 6 11 6 47 23 3 4 1 19 31 31 29 31 4 27 25 5 2 2 1 1 1 1 1 1 1 1 1 1 8 8 8 7 7 7 4 2 7 2 2 2 2 2 1 1 1 2 12 2 12 10 5 5 10 1 9 9 9 9 1 14 14 1 14 14 1 13 7 13 1 13 13 7 13 1 1 1 1 13 12 13 13 13 12 7 5 1 5 12 13 27 10 27 24 7 7 27 27 27 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 | // SPDX-License-Identifier: GPL-2.0-only /* * This file contains vfs inode ops for the 9P2000 protocol. * * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/pagemap.h> #include <linux/stat.h> #include <linux/string.h> #include <linux/namei.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/xattr.h> #include <linux/posix_acl.h> #include <net/9p/9p.h> #include <net/9p/client.h> #include "v9fs.h" #include "v9fs_vfs.h" #include "fid.h" #include "cache.h" #include "xattr.h" #include "acl.h" static const struct inode_operations v9fs_dir_inode_operations; static const struct inode_operations v9fs_dir_inode_operations_dotu; static const struct inode_operations v9fs_file_inode_operations; static const struct inode_operations v9fs_symlink_inode_operations; /** * unixmode2p9mode - convert unix mode bits to plan 9 * @v9ses: v9fs session information * @mode: mode to convert * */ static u32 unixmode2p9mode(struct v9fs_session_info *v9ses, umode_t mode) { int res; res = mode & 0777; if (S_ISDIR(mode)) res |= P9_DMDIR; if (v9fs_proto_dotu(v9ses)) { if (v9ses->nodev == 0) { if (S_ISSOCK(mode)) res |= P9_DMSOCKET; if (S_ISFIFO(mode)) res |= P9_DMNAMEDPIPE; if (S_ISBLK(mode)) res |= P9_DMDEVICE; if (S_ISCHR(mode)) res |= P9_DMDEVICE; } if ((mode & S_ISUID) == S_ISUID) res |= P9_DMSETUID; if ((mode & S_ISGID) == S_ISGID) res |= P9_DMSETGID; if ((mode & S_ISVTX) == S_ISVTX) res |= P9_DMSETVTX; } return res; } /** * p9mode2perm- convert plan9 mode bits to unix permission bits * @v9ses: v9fs session information * @stat: p9_wstat from which mode need to be derived * */ static int p9mode2perm(struct v9fs_session_info *v9ses, struct p9_wstat *stat) { int res; int mode = stat->mode; res = mode & 0777; /* S_IRWXUGO */ if (v9fs_proto_dotu(v9ses)) { if ((mode & P9_DMSETUID) == P9_DMSETUID) res |= S_ISUID; if ((mode & P9_DMSETGID) == P9_DMSETGID) res |= S_ISGID; if ((mode & P9_DMSETVTX) == P9_DMSETVTX) res |= S_ISVTX; } return res; } /** * p9mode2unixmode- convert plan9 mode bits to unix mode bits * @v9ses: v9fs session information * @stat: p9_wstat from which mode need to be derived * @rdev: major number, minor number in case of device files. * */ static umode_t p9mode2unixmode(struct v9fs_session_info *v9ses, struct p9_wstat *stat, dev_t *rdev) { int res, r; u32 mode = stat->mode; *rdev = 0; res = p9mode2perm(v9ses, stat); if ((mode & P9_DMDIR) == P9_DMDIR) res |= S_IFDIR; else if ((mode & P9_DMSYMLINK) && (v9fs_proto_dotu(v9ses))) res |= S_IFLNK; else if ((mode & P9_DMSOCKET) && (v9fs_proto_dotu(v9ses)) && (v9ses->nodev == 0)) res |= S_IFSOCK; else if ((mode & P9_DMNAMEDPIPE) && (v9fs_proto_dotu(v9ses)) && (v9ses->nodev == 0)) res |= S_IFIFO; else if ((mode & P9_DMDEVICE) && (v9fs_proto_dotu(v9ses)) && (v9ses->nodev == 0)) { char type = 0; int major = -1, minor = -1; r = sscanf(stat->extension, "%c %i %i", &type, &major, &minor); if (r != 3) { p9_debug(P9_DEBUG_ERROR, "invalid device string, umode will be bogus: %s\n", stat->extension); return res; } switch (type) { case 'c': res |= S_IFCHR; break; case 'b': res |= S_IFBLK; break; default: p9_debug(P9_DEBUG_ERROR, "Unknown special type %c %s\n", type, stat->extension); } *rdev = MKDEV(major, minor); } else res |= S_IFREG; return res; } /** * v9fs_uflags2omode- convert posix open flags to plan 9 mode bits * @uflags: flags to convert * @extended: if .u extensions are active */ int v9fs_uflags2omode(int uflags, int extended) { int ret; switch (uflags&3) { default: case O_RDONLY: ret = P9_OREAD; break; case O_WRONLY: ret = P9_OWRITE; break; case O_RDWR: ret = P9_ORDWR; break; } if (uflags & O_TRUNC) ret |= P9_OTRUNC; if (extended) { if (uflags & O_EXCL) ret |= P9_OEXCL; if (uflags & O_APPEND) ret |= P9_OAPPEND; } return ret; } /** * v9fs_blank_wstat - helper function to setup a 9P stat structure * @wstat: structure to initialize * */ void v9fs_blank_wstat(struct p9_wstat *wstat) { wstat->type = ~0; wstat->dev = ~0; wstat->qid.type = ~0; wstat->qid.version = ~0; *((long long *)&wstat->qid.path) = ~0; wstat->mode = ~0; wstat->atime = ~0; wstat->mtime = ~0; wstat->length = ~0; wstat->name = NULL; wstat->uid = NULL; wstat->gid = NULL; wstat->muid = NULL; wstat->n_uid = INVALID_UID; wstat->n_gid = INVALID_GID; wstat->n_muid = INVALID_UID; wstat->extension = NULL; } /** * v9fs_alloc_inode - helper function to allocate an inode * @sb: The superblock to allocate the inode from */ struct inode *v9fs_alloc_inode(struct super_block *sb) { struct v9fs_inode *v9inode; v9inode = alloc_inode_sb(sb, v9fs_inode_cache, GFP_KERNEL); if (!v9inode) return NULL; v9inode->cache_validity = 0; mutex_init(&v9inode->v_mutex); return &v9inode->netfs.inode; } /** * v9fs_free_inode - destroy an inode * @inode: The inode to be freed */ void v9fs_free_inode(struct inode *inode) { kmem_cache_free(v9fs_inode_cache, V9FS_I(inode)); } /* * Set parameters for the netfs library */ void v9fs_set_netfs_context(struct inode *inode) { struct v9fs_inode *v9inode = V9FS_I(inode); netfs_inode_init(&v9inode->netfs, &v9fs_req_ops, true); } int v9fs_init_inode(struct v9fs_session_info *v9ses, struct inode *inode, struct p9_qid *qid, umode_t mode, dev_t rdev) { int err = 0; struct v9fs_inode *v9inode = V9FS_I(inode); memcpy(&v9inode->qid, qid, sizeof(struct p9_qid)); inode_init_owner(&nop_mnt_idmap, inode, NULL, mode); inode->i_blocks = 0; inode->i_rdev = rdev; simple_inode_init_ts(inode); inode->i_mapping->a_ops = &v9fs_addr_operations; inode->i_private = NULL; switch (mode & S_IFMT) { case S_IFIFO: case S_IFBLK: case S_IFCHR: case S_IFSOCK: if (v9fs_proto_dotl(v9ses)) { inode->i_op = &v9fs_file_inode_operations_dotl; } else if (v9fs_proto_dotu(v9ses)) { inode->i_op = &v9fs_file_inode_operations; } else { p9_debug(P9_DEBUG_ERROR, "special files without extended mode\n"); err = -EINVAL; goto error; } init_special_inode(inode, inode->i_mode, inode->i_rdev); break; case S_IFREG: if (v9fs_proto_dotl(v9ses)) { inode->i_op = &v9fs_file_inode_operations_dotl; inode->i_fop = &v9fs_file_operations_dotl; } else { inode->i_op = &v9fs_file_inode_operations; inode->i_fop = &v9fs_file_operations; } break; case S_IFLNK: if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) { p9_debug(P9_DEBUG_ERROR, "extended modes used with legacy protocol\n"); err = -EINVAL; goto error; } if (v9fs_proto_dotl(v9ses)) inode->i_op = &v9fs_symlink_inode_operations_dotl; else inode->i_op = &v9fs_symlink_inode_operations; break; case S_IFDIR: inc_nlink(inode); if (v9fs_proto_dotl(v9ses)) inode->i_op = &v9fs_dir_inode_operations_dotl; else if (v9fs_proto_dotu(v9ses)) inode->i_op = &v9fs_dir_inode_operations_dotu; else inode->i_op = &v9fs_dir_inode_operations; if (v9fs_proto_dotl(v9ses)) inode->i_fop = &v9fs_dir_operations_dotl; else inode->i_fop = &v9fs_dir_operations; break; default: p9_debug(P9_DEBUG_ERROR, "BAD mode 0x%hx S_IFMT 0x%x\n", mode, mode & S_IFMT); err = -EINVAL; goto error; } error: return err; } /** * v9fs_evict_inode - Remove an inode from the inode cache * @inode: inode to release * */ void v9fs_evict_inode(struct inode *inode) { struct v9fs_inode __maybe_unused *v9inode = V9FS_I(inode); __le32 __maybe_unused version; if (!is_bad_inode(inode)) { netfs_wait_for_outstanding_io(inode); truncate_inode_pages_final(&inode->i_data); version = cpu_to_le32(v9inode->qid.version); netfs_clear_inode_writeback(inode, &version); clear_inode(inode); filemap_fdatawrite(&inode->i_data); #ifdef CONFIG_9P_FSCACHE if (v9fs_inode_cookie(v9inode)) fscache_relinquish_cookie(v9fs_inode_cookie(v9inode), false); #endif } else clear_inode(inode); } struct inode * v9fs_fid_iget(struct super_block *sb, struct p9_fid *fid, bool new) { dev_t rdev; int retval; umode_t umode; struct inode *inode; struct p9_wstat *st; struct v9fs_session_info *v9ses = sb->s_fs_info; inode = iget_locked(sb, QID2INO(&fid->qid)); if (unlikely(!inode)) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) { if (!new) { goto done; } else { p9_debug(P9_DEBUG_VFS, "WARNING: Inode collision %ld\n", inode->i_ino); iput(inode); remove_inode_hash(inode); inode = iget_locked(sb, QID2INO(&fid->qid)); WARN_ON(!(inode->i_state & I_NEW)); } } /* * initialize the inode with the stat info * FIXME!! we may need support for stale inodes * later. */ st = p9_client_stat(fid); if (IS_ERR(st)) { retval = PTR_ERR(st); goto error; } umode = p9mode2unixmode(v9ses, st, &rdev); retval = v9fs_init_inode(v9ses, inode, &fid->qid, umode, rdev); v9fs_stat2inode(st, inode, sb, 0); p9stat_free(st); kfree(st); if (retval) goto error; v9fs_set_netfs_context(inode); v9fs_cache_inode_get_cookie(inode); unlock_new_inode(inode); done: return inode; error: iget_failed(inode); return ERR_PTR(retval); } /** * v9fs_at_to_dotl_flags- convert Linux specific AT flags to * plan 9 AT flag. * @flags: flags to convert */ static int v9fs_at_to_dotl_flags(int flags) { int rflags = 0; if (flags & AT_REMOVEDIR) rflags |= P9_DOTL_AT_REMOVEDIR; return rflags; } /** * v9fs_dec_count - helper functon to drop i_nlink. * * If a directory had nlink <= 2 (including . and ..), then we should not drop * the link count, which indicates the underlying exported fs doesn't maintain * nlink accurately. e.g. * - overlayfs sets nlink to 1 for merged dir * - ext4 (with dir_nlink feature enabled) sets nlink to 1 if a dir has more * than EXT4_LINK_MAX (65000) links. * * @inode: inode whose nlink is being dropped */ static void v9fs_dec_count(struct inode *inode) { if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) { if (inode->i_nlink) { drop_nlink(inode); } else { p9_debug(P9_DEBUG_VFS, "WARNING: unexpected i_nlink zero %d inode %ld\n", inode->i_nlink, inode->i_ino); } } } /** * v9fs_remove - helper function to remove files and directories * @dir: directory inode that is being deleted * @dentry: dentry that is being deleted * @flags: removing a directory * */ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags) { struct inode *inode; int retval = -EOPNOTSUPP; struct p9_fid *v9fid, *dfid; struct v9fs_session_info *v9ses; p9_debug(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %x\n", dir, dentry, flags); v9ses = v9fs_inode2v9ses(dir); inode = d_inode(dentry); dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { retval = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", retval); return retval; } if (v9fs_proto_dotl(v9ses)) retval = p9_client_unlinkat(dfid, dentry->d_name.name, v9fs_at_to_dotl_flags(flags)); p9_fid_put(dfid); if (retval == -EOPNOTSUPP) { /* Try the one based on path */ v9fid = v9fs_fid_clone(dentry); if (IS_ERR(v9fid)) return PTR_ERR(v9fid); retval = p9_client_remove(v9fid); } if (!retval) { /* * directories on unlink should have zero * link count */ if (flags & AT_REMOVEDIR) { clear_nlink(inode); v9fs_dec_count(dir); } else v9fs_dec_count(inode); if (inode->i_nlink <= 0) /* no more refs unhash it */ remove_inode_hash(inode); v9fs_invalidate_inode_attr(inode); v9fs_invalidate_inode_attr(dir); /* invalidate all fids associated with dentry */ /* NOTE: This will not include open fids */ dentry->d_op->d_release(dentry); } return retval; } /** * v9fs_create - Create a file * @v9ses: session information * @dir: directory that dentry is being created in * @dentry: dentry that is being created * @extension: 9p2000.u extension string to support devices, etc. * @perm: create permissions * @mode: open mode * */ static struct p9_fid * v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, struct dentry *dentry, char *extension, u32 perm, u8 mode) { int err; const unsigned char *name; struct p9_fid *dfid, *ofid = NULL, *fid = NULL; struct inode *inode; p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry); name = dentry->d_name.name; dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); return ERR_PTR(err); } /* clone a fid to use for creation */ ofid = clone_fid(dfid); if (IS_ERR(ofid)) { err = PTR_ERR(ofid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); goto error; } err = p9_client_fcreate(ofid, name, perm, mode, extension); if (err < 0) { p9_debug(P9_DEBUG_VFS, "p9_client_fcreate failed %d\n", err); goto error; } if (!(perm & P9_DMLINK)) { /* now walk from the parent so we can get unopened fid */ fid = p9_client_walk(dfid, 1, &name, 1); if (IS_ERR(fid)) { err = PTR_ERR(fid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); goto error; } /* * instantiate inode and assign the unopened fid to the dentry */ inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb, true); if (IS_ERR(inode)) { err = PTR_ERR(inode); p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", err); goto error; } v9fs_fid_add(dentry, &fid); d_instantiate(dentry, inode); } p9_fid_put(dfid); return ofid; error: p9_fid_put(dfid); p9_fid_put(ofid); p9_fid_put(fid); return ERR_PTR(err); } /** * v9fs_vfs_create - VFS hook to create a regular file * @idmap: idmap of the mount * @dir: The parent directory * @dentry: The name of file to be created * @mode: The UNIX file mode to set * @excl: True if the file must not yet exist * * open(.., O_CREAT) is handled in v9fs_vfs_atomic_open(). This is only called * for mknod(2). * */ static int v9fs_vfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); u32 perm = unixmode2p9mode(v9ses, mode); struct p9_fid *fid; /* P9_OEXCL? */ fid = v9fs_create(v9ses, dir, dentry, NULL, perm, P9_ORDWR); if (IS_ERR(fid)) return PTR_ERR(fid); v9fs_invalidate_inode_attr(dir); p9_fid_put(fid); return 0; } /** * v9fs_vfs_mkdir - VFS mkdir hook to create a directory * @idmap: idmap of the mount * @dir: inode that is being unlinked * @dentry: dentry that is being unlinked * @mode: mode for new directory * */ static int v9fs_vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { int err; u32 perm; struct p9_fid *fid; struct v9fs_session_info *v9ses; p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry); err = 0; v9ses = v9fs_inode2v9ses(dir); perm = unixmode2p9mode(v9ses, mode | S_IFDIR); fid = v9fs_create(v9ses, dir, dentry, NULL, perm, P9_OREAD); if (IS_ERR(fid)) { err = PTR_ERR(fid); fid = NULL; } else { inc_nlink(dir); v9fs_invalidate_inode_attr(dir); } if (fid) p9_fid_put(fid); return err; } /** * v9fs_vfs_lookup - VFS lookup hook to "walk" to a new inode * @dir: inode that is being walked from * @dentry: dentry that is being walked to? * @flags: lookup flags (unused) * */ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct dentry *res; struct v9fs_session_info *v9ses; struct p9_fid *dfid, *fid; struct inode *inode; const unsigned char *name; p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%pd) %p flags: %x\n", dir, dentry, dentry, flags); if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); v9ses = v9fs_inode2v9ses(dir); /* We can walk d_parent because we hold the dir->i_mutex */ dfid = v9fs_parent_fid(dentry); if (IS_ERR(dfid)) return ERR_CAST(dfid); /* * Make sure we don't use a wrong inode due to parallel * unlink. For cached mode create calls request for new * inode. But with cache disabled, lookup should do this. */ name = dentry->d_name.name; fid = p9_client_walk(dfid, 1, &name, 1); p9_fid_put(dfid); if (fid == ERR_PTR(-ENOENT)) inode = NULL; else if (IS_ERR(fid)) inode = ERR_CAST(fid); else inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb, false); /* * If we had a rename on the server and a parallel lookup * for the new name, then make sure we instantiate with * the new name. ie look up for a/b, while on server somebody * moved b under k and client parallely did a lookup for * k/b. */ res = d_splice_alias(inode, dentry); if (!IS_ERR(fid)) { if (!res) v9fs_fid_add(dentry, &fid); else if (!IS_ERR(res)) v9fs_fid_add(res, &fid); else p9_fid_put(fid); } return res; } static int v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned int flags, umode_t mode) { int err; u32 perm; struct v9fs_inode __maybe_unused *v9inode; struct v9fs_session_info *v9ses; struct p9_fid *fid; struct dentry *res = NULL; struct inode *inode; int p9_omode; if (d_in_lookup(dentry)) { res = v9fs_vfs_lookup(dir, dentry, 0); if (IS_ERR(res)) return PTR_ERR(res); if (res) dentry = res; } /* Only creates */ if (!(flags & O_CREAT) || d_really_is_positive(dentry)) return finish_no_open(file, res); v9ses = v9fs_inode2v9ses(dir); perm = unixmode2p9mode(v9ses, mode); p9_omode = v9fs_uflags2omode(flags, v9fs_proto_dotu(v9ses)); if ((v9ses->cache & CACHE_WRITEBACK) && (p9_omode & P9_OWRITE)) { p9_omode = (p9_omode & ~P9_OWRITE) | P9_ORDWR; p9_debug(P9_DEBUG_CACHE, "write-only file with writeback enabled, creating w/ O_RDWR\n"); } fid = v9fs_create(v9ses, dir, dentry, NULL, perm, p9_omode); if (IS_ERR(fid)) { err = PTR_ERR(fid); goto error; } v9fs_invalidate_inode_attr(dir); inode = d_inode(dentry); v9inode = V9FS_I(inode); err = finish_open(file, dentry, generic_file_open); if (err) goto error; file->private_data = fid; #ifdef CONFIG_9P_FSCACHE if (v9ses->cache & CACHE_FSCACHE) fscache_use_cookie(v9fs_inode_cookie(v9inode), file->f_mode & FMODE_WRITE); #endif v9fs_fid_add_modes(fid, v9ses->flags, v9ses->cache, file->f_flags); v9fs_open_fid_add(inode, &fid); file->f_mode |= FMODE_CREATED; out: dput(res); return err; error: p9_fid_put(fid); goto out; } /** * v9fs_vfs_unlink - VFS unlink hook to delete an inode * @i: inode that is being unlinked * @d: dentry that is being unlinked * */ int v9fs_vfs_unlink(struct inode *i, struct dentry *d) { return v9fs_remove(i, d, 0); } /** * v9fs_vfs_rmdir - VFS unlink hook to delete a directory * @i: inode that is being unlinked * @d: dentry that is being unlinked * */ int v9fs_vfs_rmdir(struct inode *i, struct dentry *d) { return v9fs_remove(i, d, AT_REMOVEDIR); } /** * v9fs_vfs_rename - VFS hook to rename an inode * @idmap: The idmap of the mount * @old_dir: old dir inode * @old_dentry: old dentry * @new_dir: new dir inode * @new_dentry: new dentry * @flags: RENAME_* flags * */ int v9fs_vfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { int retval; struct inode *old_inode; struct inode *new_inode; struct v9fs_session_info *v9ses; struct p9_fid *oldfid = NULL, *dfid = NULL; struct p9_fid *olddirfid = NULL; struct p9_fid *newdirfid = NULL; struct p9_wstat wstat; if (flags) return -EINVAL; p9_debug(P9_DEBUG_VFS, "\n"); old_inode = d_inode(old_dentry); new_inode = d_inode(new_dentry); v9ses = v9fs_inode2v9ses(old_inode); oldfid = v9fs_fid_lookup(old_dentry); if (IS_ERR(oldfid)) return PTR_ERR(oldfid); dfid = v9fs_parent_fid(old_dentry); olddirfid = clone_fid(dfid); p9_fid_put(dfid); dfid = NULL; if (IS_ERR(olddirfid)) { retval = PTR_ERR(olddirfid); goto error; } dfid = v9fs_parent_fid(new_dentry); newdirfid = clone_fid(dfid); p9_fid_put(dfid); dfid = NULL; if (IS_ERR(newdirfid)) { retval = PTR_ERR(newdirfid); goto error; } down_write(&v9ses->rename_sem); if (v9fs_proto_dotl(v9ses)) { retval = p9_client_renameat(olddirfid, old_dentry->d_name.name, newdirfid, new_dentry->d_name.name); if (retval == -EOPNOTSUPP) retval = p9_client_rename(oldfid, newdirfid, new_dentry->d_name.name); if (retval != -EOPNOTSUPP) goto error_locked; } if (old_dentry->d_parent != new_dentry->d_parent) { /* * 9P .u can only handle file rename in the same directory */ p9_debug(P9_DEBUG_ERROR, "old dir and new dir are different\n"); retval = -EXDEV; goto error_locked; } v9fs_blank_wstat(&wstat); wstat.muid = v9ses->uname; wstat.name = new_dentry->d_name.name; retval = p9_client_wstat(oldfid, &wstat); error_locked: if (!retval) { if (new_inode) { if (S_ISDIR(new_inode->i_mode)) clear_nlink(new_inode); else v9fs_dec_count(new_inode); } if (S_ISDIR(old_inode->i_mode)) { if (!new_inode) inc_nlink(new_dir); v9fs_dec_count(old_dir); } v9fs_invalidate_inode_attr(old_inode); v9fs_invalidate_inode_attr(old_dir); v9fs_invalidate_inode_attr(new_dir); /* successful rename */ d_move(old_dentry, new_dentry); } up_write(&v9ses->rename_sem); error: p9_fid_put(newdirfid); p9_fid_put(olddirfid); p9_fid_put(oldfid); return retval; } /** * v9fs_vfs_getattr - retrieve file metadata * @idmap: idmap of the mount * @path: Object to query * @stat: metadata structure to populate * @request_mask: Mask of STATX_xxx flags indicating the caller's interests * @flags: AT_STATX_xxx setting * */ static int v9fs_vfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { struct dentry *dentry = path->dentry; struct inode *inode = d_inode(dentry); struct v9fs_session_info *v9ses; struct p9_fid *fid; struct p9_wstat *st; p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry); v9ses = v9fs_dentry2v9ses(dentry); if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) { generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); return 0; } else if (v9ses->cache & CACHE_WRITEBACK) { if (S_ISREG(inode->i_mode)) { int retval = filemap_fdatawrite(inode->i_mapping); if (retval) p9_debug(P9_DEBUG_ERROR, "flushing writeback during getattr returned %d\n", retval); } } fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) return PTR_ERR(fid); st = p9_client_stat(fid); p9_fid_put(fid); if (IS_ERR(st)) return PTR_ERR(st); v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb, 0); generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), stat); p9stat_free(st); kfree(st); return 0; } /** * v9fs_vfs_setattr - set file metadata * @idmap: idmap of the mount * @dentry: file whose metadata to set * @iattr: metadata assignment structure * */ static int v9fs_vfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { int retval, use_dentry = 0; struct inode *inode = d_inode(dentry); struct v9fs_session_info *v9ses; struct p9_fid *fid = NULL; struct p9_wstat wstat; p9_debug(P9_DEBUG_VFS, "\n"); retval = setattr_prepare(&nop_mnt_idmap, dentry, iattr); if (retval) return retval; v9ses = v9fs_dentry2v9ses(dentry); if (iattr->ia_valid & ATTR_FILE) { fid = iattr->ia_file->private_data; WARN_ON(!fid); } if (!fid) { fid = v9fs_fid_lookup(dentry); use_dentry = 1; } if (IS_ERR(fid)) return PTR_ERR(fid); v9fs_blank_wstat(&wstat); if (iattr->ia_valid & ATTR_MODE) wstat.mode = unixmode2p9mode(v9ses, iattr->ia_mode); if (iattr->ia_valid & ATTR_MTIME) wstat.mtime = iattr->ia_mtime.tv_sec; if (iattr->ia_valid & ATTR_ATIME) wstat.atime = iattr->ia_atime.tv_sec; if (iattr->ia_valid & ATTR_SIZE) wstat.length = iattr->ia_size; if (v9fs_proto_dotu(v9ses)) { if (iattr->ia_valid & ATTR_UID) wstat.n_uid = iattr->ia_uid; if (iattr->ia_valid & ATTR_GID) wstat.n_gid = iattr->ia_gid; } /* Write all dirty data */ if (d_is_reg(dentry)) { retval = filemap_fdatawrite(inode->i_mapping); if (retval) p9_debug(P9_DEBUG_ERROR, "flushing writeback during setattr returned %d\n", retval); } retval = p9_client_wstat(fid, &wstat); if (use_dentry) p9_fid_put(fid); if (retval < 0) return retval; if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size != i_size_read(inode)) { truncate_setsize(inode, iattr->ia_size); netfs_resize_file(netfs_inode(inode), iattr->ia_size, true); #ifdef CONFIG_9P_FSCACHE if (v9ses->cache & CACHE_FSCACHE) { struct v9fs_inode *v9inode = V9FS_I(inode); fscache_resize_cookie(v9fs_inode_cookie(v9inode), iattr->ia_size); } #endif } v9fs_invalidate_inode_attr(inode); setattr_copy(&nop_mnt_idmap, inode, iattr); mark_inode_dirty(inode); return 0; } /** * v9fs_stat2inode - populate an inode structure with mistat info * @stat: Plan 9 metadata (mistat) structure * @inode: inode to populate * @sb: superblock of filesystem * @flags: control flags (e.g. V9FS_STAT2INODE_KEEP_ISIZE) * */ void v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, struct super_block *sb, unsigned int flags) { umode_t mode; struct v9fs_session_info *v9ses = sb->s_fs_info; struct v9fs_inode *v9inode = V9FS_I(inode); inode_set_atime(inode, stat->atime, 0); inode_set_mtime(inode, stat->mtime, 0); inode_set_ctime(inode, stat->mtime, 0); inode->i_uid = v9ses->dfltuid; inode->i_gid = v9ses->dfltgid; if (v9fs_proto_dotu(v9ses)) { inode->i_uid = stat->n_uid; inode->i_gid = stat->n_gid; } if ((S_ISREG(inode->i_mode)) || (S_ISDIR(inode->i_mode))) { if (v9fs_proto_dotu(v9ses)) { unsigned int i_nlink; /* * Hadlink support got added later to the .u extension. * So there can be a server out there that doesn't * support this even with .u extension. That would * just leave us with stat->extension being an empty * string, though. */ /* HARDLINKCOUNT %u */ if (sscanf(stat->extension, " HARDLINKCOUNT %u", &i_nlink) == 1) set_nlink(inode, i_nlink); } } mode = p9mode2perm(v9ses, stat); mode |= inode->i_mode & ~S_IALLUGO; inode->i_mode = mode; v9inode->netfs.remote_i_size = stat->length; if (!(flags & V9FS_STAT2INODE_KEEP_ISIZE)) v9fs_i_size_write(inode, stat->length); /* not real number of blocks, but 512 byte ones ... */ inode->i_blocks = (stat->length + 512 - 1) >> 9; v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR; } /** * v9fs_vfs_get_link - follow a symlink path * @dentry: dentry for symlink * @inode: inode for symlink * @done: delayed call for when we are done with the return value */ static const char *v9fs_vfs_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { struct v9fs_session_info *v9ses; struct p9_fid *fid; struct p9_wstat *st; char *res; if (!dentry) return ERR_PTR(-ECHILD); v9ses = v9fs_dentry2v9ses(dentry); if (!v9fs_proto_dotu(v9ses)) return ERR_PTR(-EBADF); p9_debug(P9_DEBUG_VFS, "%pd\n", dentry); fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) return ERR_CAST(fid); st = p9_client_stat(fid); p9_fid_put(fid); if (IS_ERR(st)) return ERR_CAST(st); if (!(st->mode & P9_DMSYMLINK)) { p9stat_free(st); kfree(st); return ERR_PTR(-EINVAL); } res = st->extension; st->extension = NULL; if (strlen(res) >= PATH_MAX) res[PATH_MAX - 1] = '\0'; p9stat_free(st); kfree(st); set_delayed_call(done, kfree_link, res); return res; } /** * v9fs_vfs_mkspecial - create a special file * @dir: inode to create special file in * @dentry: dentry to create * @perm: mode to create special file * @extension: 9p2000.u format extension string representing special file * */ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, u32 perm, const char *extension) { struct p9_fid *fid; struct v9fs_session_info *v9ses; v9ses = v9fs_inode2v9ses(dir); if (!v9fs_proto_dotu(v9ses)) { p9_debug(P9_DEBUG_ERROR, "not extended\n"); return -EPERM; } fid = v9fs_create(v9ses, dir, dentry, (char *) extension, perm, P9_OREAD); if (IS_ERR(fid)) return PTR_ERR(fid); v9fs_invalidate_inode_attr(dir); p9_fid_put(fid); return 0; } /** * v9fs_vfs_symlink - helper function to create symlinks * @idmap: idmap of the mount * @dir: directory inode containing symlink * @dentry: dentry for symlink * @symname: symlink data * * See Also: 9P2000.u RFC for more information * */ static int v9fs_vfs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { p9_debug(P9_DEBUG_VFS, " %lu,%pd,%s\n", dir->i_ino, dentry, symname); return v9fs_vfs_mkspecial(dir, dentry, P9_DMSYMLINK, symname); } #define U32_MAX_DIGITS 10 /** * v9fs_vfs_link - create a hardlink * @old_dentry: dentry for file to link to * @dir: inode destination for new link * @dentry: dentry for link * */ static int v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { int retval; char name[1 + U32_MAX_DIGITS + 2]; /* sign + number + \n + \0 */ struct p9_fid *oldfid; p9_debug(P9_DEBUG_VFS, " %lu,%pd,%pd\n", dir->i_ino, dentry, old_dentry); oldfid = v9fs_fid_clone(old_dentry); if (IS_ERR(oldfid)) return PTR_ERR(oldfid); sprintf(name, "%d\n", oldfid->fid); retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name); if (!retval) { v9fs_refresh_inode(oldfid, d_inode(old_dentry)); v9fs_invalidate_inode_attr(dir); } p9_fid_put(oldfid); return retval; } /** * v9fs_vfs_mknod - create a special file * @idmap: idmap of the mount * @dir: inode destination for new link * @dentry: dentry for file * @mode: mode for creation * @rdev: device associated with special file * */ static int v9fs_vfs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); int retval; char name[2 + U32_MAX_DIGITS + 1 + U32_MAX_DIGITS + 1]; u32 perm; p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino, dentry, mode, MAJOR(rdev), MINOR(rdev)); /* build extension */ if (S_ISBLK(mode)) sprintf(name, "b %u %u", MAJOR(rdev), MINOR(rdev)); else if (S_ISCHR(mode)) sprintf(name, "c %u %u", MAJOR(rdev), MINOR(rdev)); else *name = 0; perm = unixmode2p9mode(v9ses, mode); retval = v9fs_vfs_mkspecial(dir, dentry, perm, name); return retval; } int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) { int umode; dev_t rdev; struct p9_wstat *st; struct v9fs_session_info *v9ses; unsigned int flags; v9ses = v9fs_inode2v9ses(inode); st = p9_client_stat(fid); if (IS_ERR(st)) return PTR_ERR(st); /* * Don't update inode if the file type is different */ umode = p9mode2unixmode(v9ses, st, &rdev); if (inode_wrong_type(inode, umode)) goto out; /* * We don't want to refresh inode->i_size, * because we may have cached data */ flags = (v9ses->cache & CACHE_LOOSE) ? V9FS_STAT2INODE_KEEP_ISIZE : 0; v9fs_stat2inode(st, inode, inode->i_sb, flags); out: p9stat_free(st); kfree(st); return 0; } static const struct inode_operations v9fs_dir_inode_operations_dotu = { .create = v9fs_vfs_create, .lookup = v9fs_vfs_lookup, .atomic_open = v9fs_vfs_atomic_open, .symlink = v9fs_vfs_symlink, .link = v9fs_vfs_link, .unlink = v9fs_vfs_unlink, .mkdir = v9fs_vfs_mkdir, .rmdir = v9fs_vfs_rmdir, .mknod = v9fs_vfs_mknod, .rename = v9fs_vfs_rename, .getattr = v9fs_vfs_getattr, .setattr = v9fs_vfs_setattr, }; static const struct inode_operations v9fs_dir_inode_operations = { .create = v9fs_vfs_create, .lookup = v9fs_vfs_lookup, .atomic_open = v9fs_vfs_atomic_open, .unlink = v9fs_vfs_unlink, .mkdir = v9fs_vfs_mkdir, .rmdir = v9fs_vfs_rmdir, .mknod = v9fs_vfs_mknod, .rename = v9fs_vfs_rename, .getattr = v9fs_vfs_getattr, .setattr = v9fs_vfs_setattr, }; static const struct inode_operations v9fs_file_inode_operations = { .getattr = v9fs_vfs_getattr, .setattr = v9fs_vfs_setattr, }; static const struct inode_operations v9fs_symlink_inode_operations = { .get_link = v9fs_vfs_get_link, .getattr = v9fs_vfs_getattr, .setattr = v9fs_vfs_setattr, }; |
| 9 9 9 9 9 7 1 8 8 8 8 8 8 6 3 3 6 6 3 6 6 6 6 6 6 6 2 2 2 2 2 6 6 6 7 7 7 6 6 8 8 8 8 1 1 4 4 4 8 8 8 8 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 | // SPDX-License-Identifier: GPL-2.0-only /* Object lifetime handling and tracing. * * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/slab.h> #include <linux/mempool.h> #include <linux/delay.h> #include "internal.h" /* * Allocate an I/O request and initialise it. */ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, struct file *file, loff_t start, size_t len, enum netfs_io_origin origin) { static atomic_t debug_ids; struct inode *inode = file ? file_inode(file) : mapping->host; struct netfs_inode *ctx = netfs_inode(inode); struct netfs_io_request *rreq; mempool_t *mempool = ctx->ops->request_pool ?: &netfs_request_pool; struct kmem_cache *cache = mempool->pool_data; int ret; for (;;) { rreq = mempool_alloc(mempool, GFP_KERNEL); if (rreq) break; msleep(10); } memset(rreq, 0, kmem_cache_size(cache)); rreq->start = start; rreq->len = len; rreq->upper_len = len; rreq->origin = origin; rreq->netfs_ops = ctx->ops; rreq->mapping = mapping; rreq->inode = inode; rreq->i_size = i_size_read(inode); rreq->debug_id = atomic_inc_return(&debug_ids); rreq->wsize = INT_MAX; spin_lock_init(&rreq->lock); INIT_LIST_HEAD(&rreq->io_streams[0].subrequests); INIT_LIST_HEAD(&rreq->io_streams[1].subrequests); INIT_LIST_HEAD(&rreq->subrequests); INIT_WORK(&rreq->work, NULL); refcount_set(&rreq->ref, 1); __set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags); if (file && file->f_flags & O_NONBLOCK) __set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags); if (rreq->netfs_ops->init_request) { ret = rreq->netfs_ops->init_request(rreq, file); if (ret < 0) { mempool_free(rreq, rreq->netfs_ops->request_pool ?: &netfs_request_pool); return ERR_PTR(ret); } } atomic_inc(&ctx->io_count); trace_netfs_rreq_ref(rreq->debug_id, 1, netfs_rreq_trace_new); netfs_proc_add_rreq(rreq); netfs_stat(&netfs_n_rh_rreq); return rreq; } void netfs_get_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace what) { int r; __refcount_inc(&rreq->ref, &r); trace_netfs_rreq_ref(rreq->debug_id, r + 1, what); } void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async) { struct netfs_io_subrequest *subreq; struct netfs_io_stream *stream; int s; while (!list_empty(&rreq->subrequests)) { subreq = list_first_entry(&rreq->subrequests, struct netfs_io_subrequest, rreq_link); list_del(&subreq->rreq_link); netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_clear); } for (s = 0; s < ARRAY_SIZE(rreq->io_streams); s++) { stream = &rreq->io_streams[s]; while (!list_empty(&stream->subrequests)) { subreq = list_first_entry(&stream->subrequests, struct netfs_io_subrequest, rreq_link); list_del(&subreq->rreq_link); netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_clear); } } } static void netfs_free_request_rcu(struct rcu_head *rcu) { struct netfs_io_request *rreq = container_of(rcu, struct netfs_io_request, rcu); mempool_free(rreq, rreq->netfs_ops->request_pool ?: &netfs_request_pool); netfs_stat_d(&netfs_n_rh_rreq); } static void netfs_free_request(struct work_struct *work) { struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work); struct netfs_inode *ictx = netfs_inode(rreq->inode); unsigned int i; trace_netfs_rreq(rreq, netfs_rreq_trace_free); netfs_proc_del_rreq(rreq); netfs_clear_subrequests(rreq, false); if (rreq->netfs_ops->free_request) rreq->netfs_ops->free_request(rreq); if (rreq->cache_resources.ops) rreq->cache_resources.ops->end_operation(&rreq->cache_resources); if (rreq->direct_bv) { for (i = 0; i < rreq->direct_bv_count; i++) { if (rreq->direct_bv[i].bv_page) { if (rreq->direct_bv_unpin) unpin_user_page(rreq->direct_bv[i].bv_page); } } kvfree(rreq->direct_bv); } if (atomic_dec_and_test(&ictx->io_count)) wake_up_var(&ictx->io_count); call_rcu(&rreq->rcu, netfs_free_request_rcu); } void netfs_put_request(struct netfs_io_request *rreq, bool was_async, enum netfs_rreq_ref_trace what) { unsigned int debug_id; bool dead; int r; if (rreq) { debug_id = rreq->debug_id; dead = __refcount_dec_and_test(&rreq->ref, &r); trace_netfs_rreq_ref(debug_id, r - 1, what); if (dead) { if (was_async) { rreq->work.func = netfs_free_request; if (!queue_work(system_unbound_wq, &rreq->work)) BUG(); } else { netfs_free_request(&rreq->work); } } } } /* * Allocate and partially initialise an I/O request structure. */ struct netfs_io_subrequest *netfs_alloc_subrequest(struct netfs_io_request *rreq) { struct netfs_io_subrequest *subreq; mempool_t *mempool = rreq->netfs_ops->subrequest_pool ?: &netfs_subrequest_pool; struct kmem_cache *cache = mempool->pool_data; for (;;) { subreq = mempool_alloc(rreq->netfs_ops->subrequest_pool ?: &netfs_subrequest_pool, GFP_KERNEL); if (subreq) break; msleep(10); } memset(subreq, 0, kmem_cache_size(cache)); INIT_WORK(&subreq->work, NULL); INIT_LIST_HEAD(&subreq->rreq_link); refcount_set(&subreq->ref, 2); subreq->rreq = rreq; subreq->debug_index = atomic_inc_return(&rreq->subreq_counter); netfs_get_request(rreq, netfs_rreq_trace_get_subreq); netfs_stat(&netfs_n_rh_sreq); return subreq; } void netfs_get_subrequest(struct netfs_io_subrequest *subreq, enum netfs_sreq_ref_trace what) { int r; __refcount_inc(&subreq->ref, &r); trace_netfs_sreq_ref(subreq->rreq->debug_id, subreq->debug_index, r + 1, what); } static void netfs_free_subrequest(struct netfs_io_subrequest *subreq, bool was_async) { struct netfs_io_request *rreq = subreq->rreq; trace_netfs_sreq(subreq, netfs_sreq_trace_free); if (rreq->netfs_ops->free_subrequest) rreq->netfs_ops->free_subrequest(subreq); mempool_free(subreq, rreq->netfs_ops->subrequest_pool ?: &netfs_subrequest_pool); netfs_stat_d(&netfs_n_rh_sreq); netfs_put_request(rreq, was_async, netfs_rreq_trace_put_subreq); } void netfs_put_subrequest(struct netfs_io_subrequest *subreq, bool was_async, enum netfs_sreq_ref_trace what) { unsigned int debug_index = subreq->debug_index; unsigned int debug_id = subreq->rreq->debug_id; bool dead; int r; dead = __refcount_dec_and_test(&subreq->ref, &r); trace_netfs_sreq_ref(debug_id, debug_index, r - 1, what); if (dead) netfs_free_subrequest(subreq, was_async); } |
| 318 119 318 317 317 303 304 305 304 304 102 304 101 230 122 230 303 304 305 262 152 152 152 152 152 5 305 328 305 83 83 327 81 81 81 81 81 81 81 81 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 | /* * Copyright (C) 2014 Red Hat * Author: Rob Clark <robdclark@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include <drm/drm_atomic.h> #include <drm/drm_crtc.h> #include <drm/drm_device.h> #include <drm/drm_modeset_lock.h> #include <drm/drm_print.h> /** * DOC: kms locking * * As KMS moves toward more fine grained locking, and atomic ioctl where * userspace can indirectly control locking order, it becomes necessary * to use &ww_mutex and acquire-contexts to avoid deadlocks. But because * the locking is more distributed around the driver code, we want a bit * of extra utility/tracking out of our acquire-ctx. This is provided * by &struct drm_modeset_lock and &struct drm_modeset_acquire_ctx. * * For basic principles of &ww_mutex, see: Documentation/locking/ww-mutex-design.rst * * The basic usage pattern is to:: * * drm_modeset_acquire_init(ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE) * retry: * foreach (lock in random_ordered_set_of_locks) { * ret = drm_modeset_lock(lock, ctx) * if (ret == -EDEADLK) { * ret = drm_modeset_backoff(ctx); * if (!ret) * goto retry; * } * if (ret) * goto out; * } * ... do stuff ... * out: * drm_modeset_drop_locks(ctx); * drm_modeset_acquire_fini(ctx); * * For convenience this control flow is implemented in * DRM_MODESET_LOCK_ALL_BEGIN() and DRM_MODESET_LOCK_ALL_END() for the case * where all modeset locks need to be taken through drm_modeset_lock_all_ctx(). * * If all that is needed is a single modeset lock, then the &struct * drm_modeset_acquire_ctx is not needed and the locking can be simplified * by passing a NULL instead of ctx in the drm_modeset_lock() call or * calling drm_modeset_lock_single_interruptible(). To unlock afterwards * call drm_modeset_unlock(). * * On top of these per-object locks using &ww_mutex there's also an overall * &drm_mode_config.mutex, for protecting everything else. Mostly this means * probe state of connectors, and preventing hotplug add/removal of connectors. * * Finally there's a bunch of dedicated locks to protect drm core internal * lists and lookup data structures. */ static DEFINE_WW_CLASS(crtc_ww_class); #if IS_ENABLED(CONFIG_DRM_DEBUG_MODESET_LOCK) static noinline depot_stack_handle_t __drm_stack_depot_save(void) { unsigned long entries[8]; unsigned int n; n = stack_trace_save(entries, ARRAY_SIZE(entries), 1); return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN); } static void __drm_stack_depot_print(depot_stack_handle_t stack_depot) { struct drm_printer p = drm_dbg_printer(NULL, DRM_UT_KMS, "drm_modeset_lock"); unsigned long *entries; unsigned int nr_entries; char *buf; buf = kmalloc(PAGE_SIZE, GFP_NOWAIT | __GFP_NOWARN); if (!buf) return; nr_entries = stack_depot_fetch(stack_depot, &entries); stack_trace_snprint(buf, PAGE_SIZE, entries, nr_entries, 2); drm_printf(&p, "attempting to lock a contended lock without backoff:\n%s", buf); kfree(buf); } static void __drm_stack_depot_init(void) { stack_depot_init(); } #else /* CONFIG_DRM_DEBUG_MODESET_LOCK */ static depot_stack_handle_t __drm_stack_depot_save(void) { return 0; } static void __drm_stack_depot_print(depot_stack_handle_t stack_depot) { } static void __drm_stack_depot_init(void) { } #endif /* CONFIG_DRM_DEBUG_MODESET_LOCK */ /** * drm_modeset_lock_all - take all modeset locks * @dev: DRM device * * This function takes all modeset locks, suitable where a more fine-grained * scheme isn't (yet) implemented. Locks must be dropped by calling the * drm_modeset_unlock_all() function. * * This function is deprecated. It allocates a lock acquisition context and * stores it in &drm_device.mode_config. This facilitate conversion of * existing code because it removes the need to manually deal with the * acquisition context, but it is also brittle because the context is global * and care must be taken not to nest calls. New code should use the * drm_modeset_lock_all_ctx() function and pass in the context explicitly. */ void drm_modeset_lock_all(struct drm_device *dev) { struct drm_mode_config *config = &dev->mode_config; struct drm_modeset_acquire_ctx *ctx; int ret; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL | __GFP_NOFAIL); if (WARN_ON(!ctx)) return; mutex_lock(&config->mutex); drm_modeset_acquire_init(ctx, 0); retry: ret = drm_modeset_lock_all_ctx(dev, ctx); if (ret < 0) { if (ret == -EDEADLK) { drm_modeset_backoff(ctx); goto retry; } drm_modeset_acquire_fini(ctx); kfree(ctx); return; } ww_acquire_done(&ctx->ww_ctx); WARN_ON(config->acquire_ctx); /* * We hold the locks now, so it is safe to stash the acquisition * context for drm_modeset_unlock_all(). */ config->acquire_ctx = ctx; drm_warn_on_modeset_not_all_locked(dev); } EXPORT_SYMBOL(drm_modeset_lock_all); /** * drm_modeset_unlock_all - drop all modeset locks * @dev: DRM device * * This function drops all modeset locks taken by a previous call to the * drm_modeset_lock_all() function. * * This function is deprecated. It uses the lock acquisition context stored * in &drm_device.mode_config. This facilitates conversion of existing * code because it removes the need to manually deal with the acquisition * context, but it is also brittle because the context is global and care must * be taken not to nest calls. New code should pass the acquisition context * directly to the drm_modeset_drop_locks() function. */ void drm_modeset_unlock_all(struct drm_device *dev) { struct drm_mode_config *config = &dev->mode_config; struct drm_modeset_acquire_ctx *ctx = config->acquire_ctx; if (WARN_ON(!ctx)) return; config->acquire_ctx = NULL; drm_modeset_drop_locks(ctx); drm_modeset_acquire_fini(ctx); kfree(ctx); mutex_unlock(&dev->mode_config.mutex); } EXPORT_SYMBOL(drm_modeset_unlock_all); /** * drm_warn_on_modeset_not_all_locked - check that all modeset locks are locked * @dev: device * * Useful as a debug assert. */ void drm_warn_on_modeset_not_all_locked(struct drm_device *dev) { struct drm_crtc *crtc; /* Locking is currently fubar in the panic handler. */ if (oops_in_progress) return; drm_for_each_crtc(crtc, dev) WARN_ON(!drm_modeset_is_locked(&crtc->mutex)); WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex)); WARN_ON(!mutex_is_locked(&dev->mode_config.mutex)); } EXPORT_SYMBOL(drm_warn_on_modeset_not_all_locked); /** * drm_modeset_acquire_init - initialize acquire context * @ctx: the acquire context * @flags: 0 or %DRM_MODESET_ACQUIRE_INTERRUPTIBLE * * When passing %DRM_MODESET_ACQUIRE_INTERRUPTIBLE to @flags, * all calls to drm_modeset_lock() will perform an interruptible * wait. */ void drm_modeset_acquire_init(struct drm_modeset_acquire_ctx *ctx, uint32_t flags) { memset(ctx, 0, sizeof(*ctx)); ww_acquire_init(&ctx->ww_ctx, &crtc_ww_class); INIT_LIST_HEAD(&ctx->locked); if (flags & DRM_MODESET_ACQUIRE_INTERRUPTIBLE) ctx->interruptible = true; } EXPORT_SYMBOL(drm_modeset_acquire_init); /** * drm_modeset_acquire_fini - cleanup acquire context * @ctx: the acquire context */ void drm_modeset_acquire_fini(struct drm_modeset_acquire_ctx *ctx) { ww_acquire_fini(&ctx->ww_ctx); } EXPORT_SYMBOL(drm_modeset_acquire_fini); /** * drm_modeset_drop_locks - drop all locks * @ctx: the acquire context * * Drop all locks currently held against this acquire context. */ void drm_modeset_drop_locks(struct drm_modeset_acquire_ctx *ctx) { if (WARN_ON(ctx->contended)) __drm_stack_depot_print(ctx->stack_depot); while (!list_empty(&ctx->locked)) { struct drm_modeset_lock *lock; lock = list_first_entry(&ctx->locked, struct drm_modeset_lock, head); drm_modeset_unlock(lock); } } EXPORT_SYMBOL(drm_modeset_drop_locks); static inline int modeset_lock(struct drm_modeset_lock *lock, struct drm_modeset_acquire_ctx *ctx, bool interruptible, bool slow) { int ret; if (WARN_ON(ctx->contended)) __drm_stack_depot_print(ctx->stack_depot); if (ctx->trylock_only) { lockdep_assert_held(&ctx->ww_ctx); if (!ww_mutex_trylock(&lock->mutex, NULL)) return -EBUSY; else return 0; } else if (interruptible && slow) { ret = ww_mutex_lock_slow_interruptible(&lock->mutex, &ctx->ww_ctx); } else if (interruptible) { ret = ww_mutex_lock_interruptible(&lock->mutex, &ctx->ww_ctx); } else if (slow) { ww_mutex_lock_slow(&lock->mutex, &ctx->ww_ctx); ret = 0; } else { ret = ww_mutex_lock(&lock->mutex, &ctx->ww_ctx); } if (!ret) { WARN_ON(!list_empty(&lock->head)); list_add(&lock->head, &ctx->locked); } else if (ret == -EALREADY) { /* we already hold the lock.. this is fine. For atomic * we will need to be able to drm_modeset_lock() things * without having to keep track of what is already locked * or not. */ ret = 0; } else if (ret == -EDEADLK) { ctx->contended = lock; ctx->stack_depot = __drm_stack_depot_save(); } return ret; } /** * drm_modeset_backoff - deadlock avoidance backoff * @ctx: the acquire context * * If deadlock is detected (ie. drm_modeset_lock() returns -EDEADLK), * you must call this function to drop all currently held locks and * block until the contended lock becomes available. * * This function returns 0 on success, or -ERESTARTSYS if this context * is initialized with %DRM_MODESET_ACQUIRE_INTERRUPTIBLE and the * wait has been interrupted. */ int drm_modeset_backoff(struct drm_modeset_acquire_ctx *ctx) { struct drm_modeset_lock *contended = ctx->contended; ctx->contended = NULL; ctx->stack_depot = 0; if (WARN_ON(!contended)) return 0; drm_modeset_drop_locks(ctx); return modeset_lock(contended, ctx, ctx->interruptible, true); } EXPORT_SYMBOL(drm_modeset_backoff); /** * drm_modeset_lock_init - initialize lock * @lock: lock to init */ void drm_modeset_lock_init(struct drm_modeset_lock *lock) { ww_mutex_init(&lock->mutex, &crtc_ww_class); INIT_LIST_HEAD(&lock->head); __drm_stack_depot_init(); } EXPORT_SYMBOL(drm_modeset_lock_init); /** * drm_modeset_lock - take modeset lock * @lock: lock to take * @ctx: acquire ctx * * If @ctx is not NULL, then its ww acquire context is used and the * lock will be tracked by the context and can be released by calling * drm_modeset_drop_locks(). If -EDEADLK is returned, this means a * deadlock scenario has been detected and it is an error to attempt * to take any more locks without first calling drm_modeset_backoff(). * * If the @ctx is not NULL and initialized with * %DRM_MODESET_ACQUIRE_INTERRUPTIBLE, this function will fail with * -ERESTARTSYS when interrupted. * * If @ctx is NULL then the function call behaves like a normal, * uninterruptible non-nesting mutex_lock() call. */ int drm_modeset_lock(struct drm_modeset_lock *lock, struct drm_modeset_acquire_ctx *ctx) { if (ctx) return modeset_lock(lock, ctx, ctx->interruptible, false); ww_mutex_lock(&lock->mutex, NULL); return 0; } EXPORT_SYMBOL(drm_modeset_lock); /** * drm_modeset_lock_single_interruptible - take a single modeset lock * @lock: lock to take * * This function behaves as drm_modeset_lock() with a NULL context, * but performs interruptible waits. * * This function returns 0 on success, or -ERESTARTSYS when interrupted. */ int drm_modeset_lock_single_interruptible(struct drm_modeset_lock *lock) { return ww_mutex_lock_interruptible(&lock->mutex, NULL); } EXPORT_SYMBOL(drm_modeset_lock_single_interruptible); /** * drm_modeset_unlock - drop modeset lock * @lock: lock to release */ void drm_modeset_unlock(struct drm_modeset_lock *lock) { list_del_init(&lock->head); ww_mutex_unlock(&lock->mutex); } EXPORT_SYMBOL(drm_modeset_unlock); /** * drm_modeset_lock_all_ctx - take all modeset locks * @dev: DRM device * @ctx: lock acquisition context * * This function takes all modeset locks, suitable where a more fine-grained * scheme isn't (yet) implemented. * * Unlike drm_modeset_lock_all(), it doesn't take the &drm_mode_config.mutex * since that lock isn't required for modeset state changes. Callers which * need to grab that lock too need to do so outside of the acquire context * @ctx. * * Locks acquired with this function should be released by calling the * drm_modeset_drop_locks() function on @ctx. * * See also: DRM_MODESET_LOCK_ALL_BEGIN() and DRM_MODESET_LOCK_ALL_END() * * Returns: 0 on success or a negative error-code on failure. */ int drm_modeset_lock_all_ctx(struct drm_device *dev, struct drm_modeset_acquire_ctx *ctx) { struct drm_private_obj *privobj; struct drm_crtc *crtc; struct drm_plane *plane; int ret; ret = drm_modeset_lock(&dev->mode_config.connection_mutex, ctx); if (ret) return ret; drm_for_each_crtc(crtc, dev) { ret = drm_modeset_lock(&crtc->mutex, ctx); if (ret) return ret; } drm_for_each_plane(plane, dev) { ret = drm_modeset_lock(&plane->mutex, ctx); if (ret) return ret; } drm_for_each_privobj(privobj, dev) { ret = drm_modeset_lock(&privobj->lock, ctx); if (ret) return ret; } return 0; } EXPORT_SYMBOL(drm_modeset_lock_all_ctx); |
| 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Asymmetric Public-key cryptography key type interface * * See Documentation/crypto/asymmetric-keys.rst * * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #ifndef _KEYS_ASYMMETRIC_TYPE_H #define _KEYS_ASYMMETRIC_TYPE_H #include <linux/key-type.h> #include <linux/verification.h> extern struct key_type key_type_asymmetric; /* * The key payload is four words. The asymmetric-type key uses them as * follows: */ enum asymmetric_payload_bits { asym_crypto, /* The data representing the key */ asym_subtype, /* Pointer to an asymmetric_key_subtype struct */ asym_key_ids, /* Pointer to an asymmetric_key_ids struct */ asym_auth /* The key's authorisation (signature, parent key ID) */ }; /* * Identifiers for an asymmetric key ID. We have three ways of looking up a * key derived from an X.509 certificate: * * (1) Serial Number & Issuer. Non-optional. This is the only valid way to * map a PKCS#7 signature to an X.509 certificate. * * (2) Issuer & Subject Unique IDs. Optional. These were the original way to * match X.509 certificates, but have fallen into disuse in favour of (3). * * (3) Auth & Subject Key Identifiers. Optional. SKIDs are only provided on * CA keys that are intended to sign other keys, so don't appear in end * user certificates unless forced. * * We could also support an PGP key identifier, which is just a SHA1 sum of the * public key and certain parameters, but since we don't support PGP keys at * the moment, we shall ignore those. * * What we actually do is provide a place where binary identifiers can be * stashed and then compare against them when checking for an id match. */ struct asymmetric_key_id { unsigned short len; unsigned char data[]; }; struct asymmetric_key_ids { void *id[3]; }; extern bool asymmetric_key_id_same(const struct asymmetric_key_id *kid1, const struct asymmetric_key_id *kid2); extern bool asymmetric_key_id_partial(const struct asymmetric_key_id *kid1, const struct asymmetric_key_id *kid2); extern struct asymmetric_key_id *asymmetric_key_generate_id(const void *val_1, size_t len_1, const void *val_2, size_t len_2); static inline const struct asymmetric_key_ids *asymmetric_key_ids(const struct key *key) { return key->payload.data[asym_key_ids]; } static inline const struct public_key *asymmetric_key_public_key(const struct key *key) { return key->payload.data[asym_crypto]; } extern struct key *find_asymmetric_key(struct key *keyring, const struct asymmetric_key_id *id_0, const struct asymmetric_key_id *id_1, const struct asymmetric_key_id *id_2, bool partial); int x509_load_certificate_list(const u8 cert_list[], const unsigned long list_size, const struct key *keyring); /* * The payload is at the discretion of the subtype. */ #endif /* _KEYS_ASYMMETRIC_TYPE_H */ |
| 21741 22183 2354 2351 2354 2354 2353 1384 1384 1296 1298 2351 2352 7512 7512 7489 7512 7512 7497 4118 4116 4706 4700 7497 7496 21784 21717 21783 21780 21727 21734 21729 10587 10604 2316 249 249 153 156 8 8142 18 289 7 11293 8220 8203 8211 11305 7460 222 9 22 1 222 222 222 222 184 19 19 19 19 19 19 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2021, Google LLC. * Pasha Tatashin <pasha.tatashin@soleen.com> */ #include <linux/kstrtox.h> #include <linux/mm.h> #include <linux/page_table_check.h> #include <linux/swap.h> #include <linux/swapops.h> #undef pr_fmt #define pr_fmt(fmt) "page_table_check: " fmt struct page_table_check { atomic_t anon_map_count; atomic_t file_map_count; }; static bool __page_table_check_enabled __initdata = IS_ENABLED(CONFIG_PAGE_TABLE_CHECK_ENFORCED); DEFINE_STATIC_KEY_TRUE(page_table_check_disabled); EXPORT_SYMBOL(page_table_check_disabled); static int __init early_page_table_check_param(char *buf) { return kstrtobool(buf, &__page_table_check_enabled); } early_param("page_table_check", early_page_table_check_param); static bool __init need_page_table_check(void) { return __page_table_check_enabled; } static void __init init_page_table_check(void) { if (!__page_table_check_enabled) return; static_branch_disable(&page_table_check_disabled); } struct page_ext_operations page_table_check_ops = { .size = sizeof(struct page_table_check), .need = need_page_table_check, .init = init_page_table_check, .need_shared_flags = false, }; static struct page_table_check *get_page_table_check(struct page_ext *page_ext) { BUG_ON(!page_ext); return page_ext_data(page_ext, &page_table_check_ops); } /* * An entry is removed from the page table, decrement the counters for that page * verify that it is of correct type and counters do not become negative. */ static void page_table_check_clear(unsigned long pfn, unsigned long pgcnt) { struct page_ext *page_ext; struct page *page; unsigned long i; bool anon; if (!pfn_valid(pfn)) return; page = pfn_to_page(pfn); page_ext = page_ext_get(page); if (!page_ext) return; BUG_ON(PageSlab(page)); anon = PageAnon(page); for (i = 0; i < pgcnt; i++) { struct page_table_check *ptc = get_page_table_check(page_ext); if (anon) { BUG_ON(atomic_read(&ptc->file_map_count)); BUG_ON(atomic_dec_return(&ptc->anon_map_count) < 0); } else { BUG_ON(atomic_read(&ptc->anon_map_count)); BUG_ON(atomic_dec_return(&ptc->file_map_count) < 0); } page_ext = page_ext_next(page_ext); } page_ext_put(page_ext); } /* * A new entry is added to the page table, increment the counters for that page * verify that it is of correct type and is not being mapped with a different * type to a different process. */ static void page_table_check_set(unsigned long pfn, unsigned long pgcnt, bool rw) { struct page_ext *page_ext; struct page *page; unsigned long i; bool anon; if (!pfn_valid(pfn)) return; page = pfn_to_page(pfn); page_ext = page_ext_get(page); if (!page_ext) return; BUG_ON(PageSlab(page)); anon = PageAnon(page); for (i = 0; i < pgcnt; i++) { struct page_table_check *ptc = get_page_table_check(page_ext); if (anon) { BUG_ON(atomic_read(&ptc->file_map_count)); BUG_ON(atomic_inc_return(&ptc->anon_map_count) > 1 && rw); } else { BUG_ON(atomic_read(&ptc->anon_map_count)); BUG_ON(atomic_inc_return(&ptc->file_map_count) < 0); } page_ext = page_ext_next(page_ext); } page_ext_put(page_ext); } /* * page is on free list, or is being allocated, verify that counters are zeroes * crash if they are not. */ void __page_table_check_zero(struct page *page, unsigned int order) { struct page_ext *page_ext; unsigned long i; BUG_ON(PageSlab(page)); page_ext = page_ext_get(page); if (!page_ext) return; for (i = 0; i < (1ul << order); i++) { struct page_table_check *ptc = get_page_table_check(page_ext); BUG_ON(atomic_read(&ptc->anon_map_count)); BUG_ON(atomic_read(&ptc->file_map_count)); page_ext = page_ext_next(page_ext); } page_ext_put(page_ext); } void __page_table_check_pte_clear(struct mm_struct *mm, pte_t pte) { if (&init_mm == mm) return; if (pte_user_accessible_page(pte)) { page_table_check_clear(pte_pfn(pte), PAGE_SIZE >> PAGE_SHIFT); } } EXPORT_SYMBOL(__page_table_check_pte_clear); void __page_table_check_pmd_clear(struct mm_struct *mm, pmd_t pmd) { if (&init_mm == mm) return; if (pmd_user_accessible_page(pmd)) { page_table_check_clear(pmd_pfn(pmd), PMD_SIZE >> PAGE_SHIFT); } } EXPORT_SYMBOL(__page_table_check_pmd_clear); void __page_table_check_pud_clear(struct mm_struct *mm, pud_t pud) { if (&init_mm == mm) return; if (pud_user_accessible_page(pud)) { page_table_check_clear(pud_pfn(pud), PUD_SIZE >> PAGE_SHIFT); } } EXPORT_SYMBOL(__page_table_check_pud_clear); /* Whether the swap entry cached writable information */ static inline bool swap_cached_writable(swp_entry_t entry) { return is_writable_device_exclusive_entry(entry) || is_writable_device_private_entry(entry) || is_writable_migration_entry(entry); } static inline void page_table_check_pte_flags(pte_t pte) { if (pte_present(pte) && pte_uffd_wp(pte)) WARN_ON_ONCE(pte_write(pte)); else if (is_swap_pte(pte) && pte_swp_uffd_wp(pte)) WARN_ON_ONCE(swap_cached_writable(pte_to_swp_entry(pte))); } void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte, unsigned int nr) { unsigned int i; if (&init_mm == mm) return; page_table_check_pte_flags(pte); for (i = 0; i < nr; i++) __page_table_check_pte_clear(mm, ptep_get(ptep + i)); if (pte_user_accessible_page(pte)) page_table_check_set(pte_pfn(pte), nr, pte_write(pte)); } EXPORT_SYMBOL(__page_table_check_ptes_set); static inline void page_table_check_pmd_flags(pmd_t pmd) { if (pmd_present(pmd) && pmd_uffd_wp(pmd)) WARN_ON_ONCE(pmd_write(pmd)); else if (is_swap_pmd(pmd) && pmd_swp_uffd_wp(pmd)) WARN_ON_ONCE(swap_cached_writable(pmd_to_swp_entry(pmd))); } void __page_table_check_pmd_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd) { if (&init_mm == mm) return; page_table_check_pmd_flags(pmd); __page_table_check_pmd_clear(mm, *pmdp); if (pmd_user_accessible_page(pmd)) { page_table_check_set(pmd_pfn(pmd), PMD_SIZE >> PAGE_SHIFT, pmd_write(pmd)); } } EXPORT_SYMBOL(__page_table_check_pmd_set); void __page_table_check_pud_set(struct mm_struct *mm, pud_t *pudp, pud_t pud) { if (&init_mm == mm) return; __page_table_check_pud_clear(mm, *pudp); if (pud_user_accessible_page(pud)) { page_table_check_set(pud_pfn(pud), PUD_SIZE >> PAGE_SHIFT, pud_write(pud)); } } EXPORT_SYMBOL(__page_table_check_pud_set); void __page_table_check_pte_clear_range(struct mm_struct *mm, unsigned long addr, pmd_t pmd) { if (&init_mm == mm) return; if (!pmd_bad(pmd) && !pmd_leaf(pmd)) { pte_t *ptep = pte_offset_map(&pmd, addr); unsigned long i; if (WARN_ON(!ptep)) return; for (i = 0; i < PTRS_PER_PTE; i++) { __page_table_check_pte_clear(mm, ptep_get(ptep)); addr += PAGE_SIZE; ptep++; } pte_unmap(ptep - PTRS_PER_PTE); } } |
| 47 47 47 3 3 2 2 1 1 1 3 10 6 6 6 6 5 5 5 5 5 5 3 3 40 40 40 40 3 40 11 40 38 40 2 1 2 3 3 1 3 1 3 3 3 3 3 9 5 1 4 4 4 4 4 1 1 9 1 1 4 4 4 9 1 49 48 48 48 3 2 1 1 48 21 48 46 6 7 1 3 3 45 4 4 4 4 4 41 3 39 41 41 41 2 41 39 41 41 40 48 1 1 3 38 68 68 68 68 68 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 | // SPDX-License-Identifier: GPL-2.0-or-later /* * DCCP over IPv6 * Linux INET6 implementation * * Based on net/dccp6/ipv6.c * * Arnaldo Carvalho de Melo <acme@ghostprotocols.net> */ #include <linux/module.h> #include <linux/random.h> #include <linux/slab.h> #include <linux/xfrm.h> #include <linux/string.h> #include <net/addrconf.h> #include <net/inet_common.h> #include <net/inet_hashtables.h> #include <net/inet_sock.h> #include <net/inet6_connection_sock.h> #include <net/inet6_hashtables.h> #include <net/ip6_route.h> #include <net/ipv6.h> #include <net/protocol.h> #include <net/transp_v6.h> #include <net/ip6_checksum.h> #include <net/xfrm.h> #include <net/secure_seq.h> #include <net/netns/generic.h> #include <net/sock.h> #include <net/rstreason.h> #include "dccp.h" #include "ipv6.h" #include "feat.h" struct dccp_v6_pernet { struct sock *v6_ctl_sk; }; static unsigned int dccp_v6_pernet_id __read_mostly; /* The per-net v6_ctl_sk is used for sending RSTs and ACKs */ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped; static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops; /* add pseudo-header to DCCP checksum stored in skb->csum */ static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr) { return csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_DCCP, skb->csum); } static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb) { struct ipv6_pinfo *np = inet6_sk(sk); struct dccp_hdr *dh = dccp_hdr(skb); dccp_csum_outgoing(skb); dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &sk->sk_v6_daddr); } static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb) { return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32, dccp_hdr(skb)->dccph_dport, dccp_hdr(skb)->dccph_sport ); } static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct ipv6hdr *hdr; const struct dccp_hdr *dh; struct dccp_sock *dp; struct ipv6_pinfo *np; struct sock *sk; int err; __u64 seq; struct net *net = dev_net(skb->dev); if (!pskb_may_pull(skb, offset + sizeof(*dh))) return -EINVAL; dh = (struct dccp_hdr *)(skb->data + offset); if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh))) return -EINVAL; hdr = (const struct ipv6hdr *)skb->data; dh = (struct dccp_hdr *)(skb->data + offset); sk = __inet6_lookup_established(net, &dccp_hashinfo, &hdr->daddr, dh->dccph_dport, &hdr->saddr, ntohs(dh->dccph_sport), inet6_iif(skb), 0); if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return -ENOENT; } if (sk->sk_state == DCCP_TIME_WAIT) { inet_twsk_put(inet_twsk(sk)); return 0; } seq = dccp_hdr_seq(dh); if (sk->sk_state == DCCP_NEW_SYN_RECV) { dccp_req_err(sk, seq); return 0; } bh_lock_sock(sk); if (sock_owned_by_user(sk)) __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == DCCP_CLOSED) goto out; dp = dccp_sk(sk); if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && !between48(seq, dp->dccps_awl, dp->dccps_awh)) { __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } np = inet6_sk(sk); if (type == NDISC_REDIRECT) { if (!sock_owned_by_user(sk)) { struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); if (dst) dst->ops->redirect(dst, sk, skb); } goto out; } if (type == ICMPV6_PKT_TOOBIG) { struct dst_entry *dst = NULL; if (!ip6_sk_accept_pmtu(sk)) goto out; if (sock_owned_by_user(sk)) goto out; if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED)) goto out; dst = inet6_csk_update_pmtu(sk, ntohl(info)); if (!dst) goto out; if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) dccp_sync_mss(sk, dst_mtu(dst)); goto out; } icmpv6_err_convert(type, code, &err); /* Might be for an request_sock */ switch (sk->sk_state) { case DCCP_REQUESTING: case DCCP_RESPOND: /* Cannot happen. It can, it SYNs are crossed. --ANK */ if (!sock_owned_by_user(sk)) { __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS); sk->sk_err = err; /* * Wake people up to see the error * (see connect in sock.c) */ sk_error_report(sk); dccp_done(sk); } else { WRITE_ONCE(sk->sk_err_soft, err); } goto out; } if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { sk->sk_err = err; sk_error_report(sk); } else { WRITE_ONCE(sk->sk_err_soft, err); } out: bh_unlock_sock(sk); sock_put(sk); return 0; } static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req) { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; struct in6_addr *final_p, final; struct flowi6 fl6; int err = -1; struct dst_entry *dst; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_DCCP; fl6.daddr = ireq->ir_v6_rmt_addr; fl6.saddr = ireq->ir_v6_loc_addr; fl6.flowlabel = 0; fl6.flowi6_oif = ireq->ir_iif; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = htons(ireq->ir_num); security_req_classify_flow(req, flowi6_to_flowi_common(&fl6)); rcu_read_lock(); final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); rcu_read_unlock(); dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; goto done; } skb = dccp_make_response(sk, dst, req); if (skb != NULL) { struct dccp_hdr *dh = dccp_hdr(skb); struct ipv6_txoptions *opt; dh->dccph_checksum = dccp_v6_csum_finish(skb, &ireq->ir_v6_loc_addr, &ireq->ir_v6_rmt_addr); fl6.daddr = ireq->ir_v6_rmt_addr; rcu_read_lock(); opt = ireq->ipv6_opt; if (!opt) opt = rcu_dereference(np->opt); err = ip6_xmit(sk, skb, &fl6, READ_ONCE(sk->sk_mark), opt, np->tclass, READ_ONCE(sk->sk_priority)); rcu_read_unlock(); err = net_xmit_eval(err); } done: dst_release(dst); return err; } static void dccp_v6_reqsk_destructor(struct request_sock *req) { dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg); kfree(inet_rsk(req)->ipv6_opt); kfree_skb(inet_rsk(req)->pktopts); } static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb, enum sk_rst_reason reason) { const struct ipv6hdr *rxip6h; struct sk_buff *skb; struct flowi6 fl6; struct net *net = dev_net(skb_dst(rxskb)->dev); struct dccp_v6_pernet *pn; struct sock *ctl_sk; struct dst_entry *dst; if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET) return; if (!ipv6_unicast_destination(rxskb)) return; pn = net_generic(net, dccp_v6_pernet_id); ctl_sk = pn->v6_ctl_sk; skb = dccp_ctl_make_reset(ctl_sk, rxskb); if (skb == NULL) return; rxip6h = ipv6_hdr(rxskb); dccp_hdr(skb)->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr, &rxip6h->daddr); memset(&fl6, 0, sizeof(fl6)); fl6.daddr = rxip6h->saddr; fl6.saddr = rxip6h->daddr; fl6.flowi6_proto = IPPROTO_DCCP; fl6.flowi6_oif = inet6_iif(rxskb); fl6.fl6_dport = dccp_hdr(skb)->dccph_dport; fl6.fl6_sport = dccp_hdr(skb)->dccph_sport; security_skb_classify_flow(rxskb, flowi6_to_flowi_common(&fl6)); /* sk = NULL, but it is safe for now. RST socket required. */ dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL); if (!IS_ERR(dst)) { skb_dst_set(skb, dst); ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0, 0); DCCP_INC_STATS(DCCP_MIB_OUTSEGS); DCCP_INC_STATS(DCCP_MIB_OUTRSTS); return; } kfree_skb(skb); } static struct request_sock_ops dccp6_request_sock_ops = { .family = AF_INET6, .obj_size = sizeof(struct dccp6_request_sock), .rtx_syn_ack = dccp_v6_send_response, .send_ack = dccp_reqsk_send_ack, .destructor = dccp_v6_reqsk_destructor, .send_reset = dccp_v6_ctl_send_reset, .syn_ack_timeout = dccp_syn_ack_timeout, }; static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { struct request_sock *req; struct dccp_request_sock *dreq; struct inet_request_sock *ireq; struct ipv6_pinfo *np = inet6_sk(sk); const __be32 service = dccp_hdr_request(skb)->dccph_req_service; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); if (skb->protocol == htons(ETH_P_IP)) return dccp_v4_conn_request(sk, skb); if (!ipv6_unicast_destination(skb)) return 0; /* discard, don't send a reset here */ if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); return 0; } if (dccp_bad_service_code(sk, service)) { dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; goto drop; } /* * There are no SYN attacks on IPv6, yet... */ dcb->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; if (inet_csk_reqsk_queue_is_full(sk)) goto drop; if (sk_acceptq_is_full(sk)) goto drop; req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk, true); if (req == NULL) goto drop; if (dccp_reqsk_init(req, dccp_sk(sk), skb)) goto drop_and_free; dreq = dccp_rsk(req); if (dccp_parse_options(sk, dreq, skb)) goto drop_and_free; ireq = inet_rsk(req); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; ireq->ireq_family = AF_INET6; ireq->ir_mark = inet_request_mark(sk, skb); if (security_inet_conn_request(sk, skb, req)) goto drop_and_free; if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { refcount_inc(&skb->users); ireq->pktopts = skb; } ireq->ir_iif = READ_ONCE(sk->sk_bound_dev_if); /* So that link locals have meaning */ if (!ireq->ir_iif && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = inet6_iif(skb); /* * Step 3: Process LISTEN state * * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie * * Setting S.SWL/S.SWH to is deferred to dccp_create_openreq_child(). */ dreq->dreq_isr = dcb->dccpd_seq; dreq->dreq_gsr = dreq->dreq_isr; dreq->dreq_iss = dccp_v6_init_sequence(skb); dreq->dreq_gss = dreq->dreq_iss; dreq->dreq_service = service; if (dccp_v6_send_response(sk, req)) goto drop_and_free; if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT))) reqsk_free(req); else reqsk_put(req); return 0; drop_and_free: reqsk_free(req); drop: __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS); return -1; } static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, bool *own_req) { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *newnp; const struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_txoptions *opt; struct inet_sock *newinet; struct dccp6_sock *newdp6; struct sock *newsk; if (skb->protocol == htons(ETH_P_IP)) { /* * v6 mapped */ newsk = dccp_v4_request_recv_sock(sk, skb, req, dst, req_unhash, own_req); if (newsk == NULL) return NULL; newdp6 = (struct dccp6_sock *)newsk; newinet = inet_sk(newsk); newinet->pinet6 = &newdp6->inet6; newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); newnp->saddr = newsk->sk_v6_rcv_saddr; inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped; newsk->sk_backlog_rcv = dccp_v4_do_rcv; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; newnp->mcast_oif = inet_iif(skb); newnp->mcast_hops = ip_hdr(skb)->ttl; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count * here, dccp_create_openreq_child now does this for us, see the comment in * that function for the gory details. -acme */ /* It is tricky place. Until this moment IPv4 tcp worked with IPv6 icsk.icsk_af_ops. Sync it now. */ dccp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); return newsk; } if (sk_acceptq_is_full(sk)) goto out_overflow; if (!dst) { struct flowi6 fl6; dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_DCCP); if (!dst) goto out; } newsk = dccp_create_openreq_child(sk, req, skb); if (newsk == NULL) goto out_nonewsk; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks * count here, dccp_create_openreq_child now does this for us, see the * comment in that function for the gory details. -acme */ ip6_dst_store(newsk, dst, NULL, NULL); newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | NETIF_F_TSO); newdp6 = (struct dccp6_sock *)newsk; newinet = inet_sk(newsk); newinet->pinet6 = &newdp6->inet6; newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; newnp->saddr = ireq->ir_v6_loc_addr; newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; newsk->sk_bound_dev_if = ireq->ir_iif; /* Now IPv6 options... First: no IPv4 options. */ newinet->inet_opt = NULL; /* Clone RX bits */ newnp->rxopt.all = np->rxopt.all; newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; /* * Clone native IPv6 options from listening socket (if any) * * Yes, keeping reference count would be much more clever, but we make * one more one thing there: reattach optmem to newsk. */ opt = ireq->ipv6_opt; if (!opt) opt = rcu_dereference(np->opt); if (opt) { opt = ipv6_dup_options(newsk, opt); RCU_INIT_POINTER(newnp->opt, opt); } inet_csk(newsk)->icsk_ext_hdr_len = 0; if (opt) inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + opt->opt_flen; dccp_sync_mss(newsk, dst_mtu(dst)); newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; newinet->inet_rcv_saddr = LOOPBACK4_IPV6; if (__inet_inherit_port(sk, newsk) < 0) { inet_csk_prepare_forced_close(newsk); dccp_done(newsk); goto out; } *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL); /* Clone pktoptions received with SYN, if we own the req */ if (*own_req && ireq->pktopts) { newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); consume_skb(ireq->pktopts); ireq->pktopts = NULL; } return newsk; out_overflow: __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); out_nonewsk: dst_release(dst); out: __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); return NULL; } /* The socket must have it's spinlock held when we get * here. * * We have a potential double-lock case here, so even when * doing backlog processing we use the BH locking scheme. * This is because we cannot sleep with the original spinlock * held. */ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) { struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *opt_skb = NULL; /* Imagine: socket is IPv6. IPv4 packet arrives, goes to IPv4 receive handler and backlogged. From backlog it always goes here. Kerboom... Fortunately, dccp_rcv_established and rcv_established handle them correctly, but it is not case with dccp_v6_hnd_req and dccp_v6_ctl_send_reset(). --ANK */ if (skb->protocol == htons(ETH_P_IP)) return dccp_v4_do_rcv(sk, skb); if (sk_filter(sk, skb)) goto discard; /* * socket locking is here for SMP purposes as backlog rcv is currently * called with bh processing disabled. */ /* Do Stevens' IPV6_PKTOPTIONS. Yes, guys, it is the only place in our code, where we may make it not affecting IPv4. The rest of code is protocol independent, and I do not like idea to uglify IPv4. Actually, all the idea behind IPV6_PKTOPTIONS looks not very well thought. For now we latch options, received in the last packet, enqueued by tcp. Feel free to propose better solution. --ANK (980728) */ if (np->rxopt.all) opt_skb = skb_clone_and_charge_r(skb, sk); if (sk->sk_state == DCCP_OPEN) { /* Fast path */ if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len)) goto reset; if (opt_skb) goto ipv6_pktoptions; return 0; } /* * Step 3: Process LISTEN state * If S.state == LISTEN, * If P.type == Request or P contains a valid Init Cookie option, * (* Must scan the packet's options to check for Init * Cookies. Only Init Cookies are processed here, * however; other options are processed in Step 8. This * scan need only be performed if the endpoint uses Init * Cookies *) * (* Generate a new socket and switch to that socket *) * Set S := new socket for this port pair * S.state = RESPOND * Choose S.ISS (initial seqno) or set from Init Cookies * Initialize S.GAR := S.ISS * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies * Continue with S.state == RESPOND * (* A Response packet will be generated in Step 11 *) * Otherwise, * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return * * NOTE: the check for the packet types is done in * dccp_rcv_state_process */ if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len)) goto reset; if (opt_skb) goto ipv6_pktoptions; return 0; reset: dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); discard: if (opt_skb != NULL) __kfree_skb(opt_skb); kfree_skb(skb); return 0; /* Handling IPV6_PKTOPTIONS skb the similar * way it's done for net/ipv6/tcp_ipv6.c */ ipv6_pktoptions: if (!((1 << sk->sk_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) { if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) WRITE_ONCE(np->mcast_oif, inet6_iif(opt_skb)); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) WRITE_ONCE(np->mcast_hops, ipv6_hdr(opt_skb)->hop_limit); if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); if (inet6_test_bit(REPFLOW, sk)) np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &DCCP_SKB_CB(opt_skb)->header.h6)) { memmove(IP6CB(opt_skb), &DCCP_SKB_CB(opt_skb)->header.h6, sizeof(struct inet6_skb_parm)); opt_skb = xchg(&np->pktoptions, opt_skb); } else { __kfree_skb(opt_skb); opt_skb = xchg(&np->pktoptions, NULL); } } kfree_skb(opt_skb); return 0; } static int dccp_v6_rcv(struct sk_buff *skb) { const struct dccp_hdr *dh; bool refcounted; struct sock *sk; int min_cov; /* Step 1: Check header basics */ if (dccp_invalid_packet(skb)) goto discard_it; /* Step 1: If header checksum is incorrect, drop packet and return. */ if (dccp_v6_csum_finish(skb, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr)) { DCCP_WARN("dropped packet with invalid checksum\n"); goto discard_it; } dh = dccp_hdr(skb); DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(dh); DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; if (dccp_packet_without_ack(skb)) DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ; else DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); lookup: sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh), dh->dccph_sport, dh->dccph_dport, inet6_iif(skb), 0, &refcounted); if (!sk) { dccp_pr_debug("failed to look up flow ID in table and " "get corresponding socket\n"); goto no_dccp_socket; } /* * Step 2: * ... or S.state == TIMEWAIT, * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ if (sk->sk_state == DCCP_TIME_WAIT) { dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: do_time_wait\n"); inet_twsk_put(inet_twsk(sk)); goto no_dccp_socket; } if (sk->sk_state == DCCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); struct sock *nsk; sk = req->rsk_listener; if (unlikely(sk->sk_state != DCCP_LISTEN)) { inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } sock_hold(sk); refcounted = true; nsk = dccp_check_req(sk, skb, req); if (!nsk) { reqsk_put(req); goto discard_and_relse; } if (nsk == sk) { reqsk_put(req); } else if (dccp_child_process(sk, nsk, skb)) { dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); goto discard_and_relse; } else { sock_put(sk); return 0; } } /* * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage * o if MinCsCov = 0, only packets with CsCov = 0 are accepted * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov */ min_cov = dccp_sk(sk)->dccps_pcrlen; if (dh->dccph_cscov && (min_cov == 0 || dh->dccph_cscov < min_cov)) { dccp_pr_debug("Packet CsCov %d does not satisfy MinCsCov %d\n", dh->dccph_cscov, min_cov); /* FIXME: send Data Dropped option (see also dccp_v4_rcv) */ goto discard_and_relse; } if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; nf_reset_ct(skb); return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted) ? -1 : 0; no_dccp_socket: if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard_it; /* * Step 2: * If no socket ... * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ if (dh->dccph_type != DCCP_PKT_RESET) { DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED); } discard_it: kfree_skb(skb); return 0; discard_and_relse: if (refcounted) sock_put(sk); goto discard_it; } static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *)uaddr; struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct dccp_sock *dp = dccp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; struct ipv6_txoptions *opt; struct flowi6 fl6; struct dst_entry *dst; int addr_type; int err; dp->dccps_role = DCCP_ROLE_CLIENT; if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; memset(&fl6, 0, sizeof(fl6)); if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK; IP6_ECN_flow_init(fl6.flowlabel); if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) { struct ip6_flowlabel *flowlabel; flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (IS_ERR(flowlabel)) return -EINVAL; fl6_sock_release(flowlabel); } } /* * connect() to INADDR_ANY means loopback (BSD'ism). */ if (ipv6_addr_any(&usin->sin6_addr)) usin->sin6_addr.s6_addr[15] = 1; addr_type = ipv6_addr_type(&usin->sin6_addr); if (addr_type & IPV6_ADDR_MULTICAST) return -ENETUNREACH; if (addr_type & IPV6_ADDR_LINKLOCAL) { if (addr_len >= sizeof(struct sockaddr_in6) && usin->sin6_scope_id) { /* If interface is set while binding, indices * must coincide. */ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != usin->sin6_scope_id) return -EINVAL; sk->sk_bound_dev_if = usin->sin6_scope_id; } /* Connect to link-local address requires an interface */ if (!sk->sk_bound_dev_if) return -EINVAL; } sk->sk_v6_daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; /* * DCCP over IPv4 */ if (addr_type == IPV6_ADDR_MAPPED) { u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; net_dbg_ratelimited("connect: ipv4 mapped\n"); if (ipv6_only_sock(sk)) return -ENETUNREACH; sin.sin_family = AF_INET; sin.sin_port = usin->sin6_port; sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; icsk->icsk_af_ops = &dccp_ipv6_mapped; sk->sk_backlog_rcv = dccp_v4_do_rcv; err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); if (err) { icsk->icsk_ext_hdr_len = exthdrlen; icsk->icsk_af_ops = &dccp_ipv6_af_ops; sk->sk_backlog_rcv = dccp_v6_do_rcv; goto failure; } np->saddr = sk->sk_v6_rcv_saddr; return err; } if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) saddr = &sk->sk_v6_rcv_saddr; fl6.flowi6_proto = IPPROTO_DCCP; fl6.daddr = sk->sk_v6_daddr; fl6.saddr = saddr ? *saddr : np->saddr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto failure; } if (saddr == NULL) { saddr = &fl6.saddr; err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); if (err) goto failure; } /* set the source address */ np->saddr = *saddr; inet->inet_rcv_saddr = LOOPBACK4_IPV6; ip6_dst_store(sk, dst, NULL, NULL); icsk->icsk_ext_hdr_len = 0; if (opt) icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen; inet->inet_dport = usin->sin6_port; dccp_set_state(sk, DCCP_REQUESTING); err = inet6_hash_connect(&dccp_death_row, sk); if (err) goto late_failure; dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32, sk->sk_v6_daddr.s6_addr32, inet->inet_sport, inet->inet_dport); err = dccp_connect(sk); if (err) goto late_failure; return 0; late_failure: dccp_set_state(sk, DCCP_CLOSED); inet_bhash2_reset_saddr(sk); __sk_dst_reset(sk); failure: inet->inet_dport = 0; sk->sk_route_caps = 0; return err; } static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops = { .queue_xmit = inet6_csk_xmit, .send_check = dccp_v6_send_check, .rebuild_header = inet6_sk_rebuild_header, .conn_request = dccp_v6_conn_request, .syn_recv_sock = dccp_v6_request_recv_sock, .net_header_len = sizeof(struct ipv6hdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), }; /* * DCCP over IPv4 via INET6 API */ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = { .queue_xmit = ip_queue_xmit, .send_check = dccp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, .conn_request = dccp_v6_conn_request, .syn_recv_sock = dccp_v6_request_recv_sock, .net_header_len = sizeof(struct iphdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), }; static void dccp_v6_sk_destruct(struct sock *sk) { dccp_destruct_common(sk); inet6_sock_destruct(sk); } /* NOTE: A lot of things set to zero explicitly by call to * sk_alloc() so need not be done here. */ static int dccp_v6_init_sock(struct sock *sk) { static __u8 dccp_v6_ctl_sock_initialized; int err = dccp_init_sock(sk, dccp_v6_ctl_sock_initialized); if (err == 0) { if (unlikely(!dccp_v6_ctl_sock_initialized)) dccp_v6_ctl_sock_initialized = 1; inet_csk(sk)->icsk_af_ops = &dccp_ipv6_af_ops; sk->sk_destruct = dccp_v6_sk_destruct; } return err; } static struct timewait_sock_ops dccp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct dccp6_timewait_sock), }; static struct proto dccp_v6_prot = { .name = "DCCPv6", .owner = THIS_MODULE, .close = dccp_close, .connect = dccp_v6_connect, .disconnect = dccp_disconnect, .ioctl = dccp_ioctl, .init = dccp_v6_init_sock, .setsockopt = dccp_setsockopt, .getsockopt = dccp_getsockopt, .sendmsg = dccp_sendmsg, .recvmsg = dccp_recvmsg, .backlog_rcv = dccp_v6_do_rcv, .hash = inet6_hash, .unhash = inet_unhash, .accept = inet_csk_accept, .get_port = inet_csk_get_port, .shutdown = dccp_shutdown, .destroy = dccp_destroy_sock, .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp6_sock), .ipv6_pinfo_offset = offsetof(struct dccp6_sock, inet6), .slab_flags = SLAB_TYPESAFE_BY_RCU, .rsk_prot = &dccp6_request_sock_ops, .twsk_prot = &dccp6_timewait_sock_ops, .h.hashinfo = &dccp_hashinfo, }; static const struct inet6_protocol dccp_v6_protocol = { .handler = dccp_v6_rcv, .err_handler = dccp_v6_err, .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL, }; static const struct proto_ops inet6_dccp_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, .bind = inet6_bind, .connect = inet_stream_connect, .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = inet6_getname, .poll = dccp_poll, .ioctl = inet6_ioctl, .gettstamp = sock_gettstamp, .listen = inet_dccp_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, .recvmsg = sock_common_recvmsg, .mmap = sock_no_mmap, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, #endif }; static struct inet_protosw dccp_v6_protosw = { .type = SOCK_DCCP, .protocol = IPPROTO_DCCP, .prot = &dccp_v6_prot, .ops = &inet6_dccp_ops, .flags = INET_PROTOSW_ICSK, }; static int __net_init dccp_v6_init_net(struct net *net) { struct dccp_v6_pernet *pn = net_generic(net, dccp_v6_pernet_id); if (dccp_hashinfo.bhash == NULL) return -ESOCKTNOSUPPORT; return inet_ctl_sock_create(&pn->v6_ctl_sk, PF_INET6, SOCK_DCCP, IPPROTO_DCCP, net); } static void __net_exit dccp_v6_exit_net(struct net *net) { struct dccp_v6_pernet *pn = net_generic(net, dccp_v6_pernet_id); inet_ctl_sock_destroy(pn->v6_ctl_sk); } static struct pernet_operations dccp_v6_ops = { .init = dccp_v6_init_net, .exit = dccp_v6_exit_net, .id = &dccp_v6_pernet_id, .size = sizeof(struct dccp_v6_pernet), }; static int __init dccp_v6_init(void) { int err = proto_register(&dccp_v6_prot, 1); if (err) goto out; inet6_register_protosw(&dccp_v6_protosw); err = register_pernet_subsys(&dccp_v6_ops); if (err) goto out_destroy_ctl_sock; err = inet6_add_protocol(&dccp_v6_protocol, IPPROTO_DCCP); if (err) goto out_unregister_proto; out: return err; out_unregister_proto: unregister_pernet_subsys(&dccp_v6_ops); out_destroy_ctl_sock: inet6_unregister_protosw(&dccp_v6_protosw); proto_unregister(&dccp_v6_prot); goto out; } static void __exit dccp_v6_exit(void) { inet6_del_protocol(&dccp_v6_protocol, IPPROTO_DCCP); unregister_pernet_subsys(&dccp_v6_ops); inet6_unregister_protosw(&dccp_v6_protosw); proto_unregister(&dccp_v6_prot); } module_init(dccp_v6_init); module_exit(dccp_v6_exit); /* * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33) * values directly, Also cover the case where the protocol is not specified, * i.e. net-pf-PF_INET6-proto-0-type-SOCK_DCCP */ MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 33, 6); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 0, 6); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>"); MODULE_DESCRIPTION("DCCPv6 - Datagram Congestion Controlled Protocol"); |
| 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2018 HUAWEI, Inc. * https://www.huawei.com/ */ #include "internal.h" struct z_erofs_gbuf { spinlock_t lock; void *ptr; struct page **pages; unsigned int nrpages; }; static struct z_erofs_gbuf *z_erofs_gbufpool, *z_erofs_rsvbuf; static unsigned int z_erofs_gbuf_count, z_erofs_gbuf_nrpages, z_erofs_rsv_nrpages; module_param_named(global_buffers, z_erofs_gbuf_count, uint, 0444); module_param_named(reserved_pages, z_erofs_rsv_nrpages, uint, 0444); static atomic_long_t erofs_global_shrink_cnt; /* for all mounted instances */ /* protected by 'erofs_sb_list_lock' */ static unsigned int shrinker_run_no; /* protects the mounted 'erofs_sb_list' */ static DEFINE_SPINLOCK(erofs_sb_list_lock); static LIST_HEAD(erofs_sb_list); static struct shrinker *erofs_shrinker_info; static unsigned int z_erofs_gbuf_id(void) { return raw_smp_processor_id() % z_erofs_gbuf_count; } void *z_erofs_get_gbuf(unsigned int requiredpages) __acquires(gbuf->lock) { struct z_erofs_gbuf *gbuf; migrate_disable(); gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()]; spin_lock(&gbuf->lock); /* check if the buffer is too small */ if (requiredpages > gbuf->nrpages) { spin_unlock(&gbuf->lock); migrate_enable(); /* (for sparse checker) pretend gbuf->lock is still taken */ __acquire(gbuf->lock); return NULL; } return gbuf->ptr; } void z_erofs_put_gbuf(void *ptr) __releases(gbuf->lock) { struct z_erofs_gbuf *gbuf; gbuf = &z_erofs_gbufpool[z_erofs_gbuf_id()]; DBG_BUGON(gbuf->ptr != ptr); spin_unlock(&gbuf->lock); migrate_enable(); } int z_erofs_gbuf_growsize(unsigned int nrpages) { static DEFINE_MUTEX(gbuf_resize_mutex); struct page **tmp_pages = NULL; struct z_erofs_gbuf *gbuf; void *ptr, *old_ptr; int last, i, j; mutex_lock(&gbuf_resize_mutex); /* avoid shrinking gbufs, since no idea how many fses rely on */ if (nrpages <= z_erofs_gbuf_nrpages) { mutex_unlock(&gbuf_resize_mutex); return 0; } for (i = 0; i < z_erofs_gbuf_count; ++i) { gbuf = &z_erofs_gbufpool[i]; tmp_pages = kcalloc(nrpages, sizeof(*tmp_pages), GFP_KERNEL); if (!tmp_pages) goto out; for (j = 0; j < gbuf->nrpages; ++j) tmp_pages[j] = gbuf->pages[j]; do { last = j; j = alloc_pages_bulk_array(GFP_KERNEL, nrpages, tmp_pages); if (last == j) goto out; } while (j != nrpages); ptr = vmap(tmp_pages, nrpages, VM_MAP, PAGE_KERNEL); if (!ptr) goto out; spin_lock(&gbuf->lock); kfree(gbuf->pages); gbuf->pages = tmp_pages; old_ptr = gbuf->ptr; gbuf->ptr = ptr; gbuf->nrpages = nrpages; spin_unlock(&gbuf->lock); if (old_ptr) vunmap(old_ptr); } z_erofs_gbuf_nrpages = nrpages; out: if (i < z_erofs_gbuf_count && tmp_pages) { for (j = 0; j < nrpages; ++j) if (tmp_pages[j] && tmp_pages[j] != gbuf->pages[j]) __free_page(tmp_pages[j]); kfree(tmp_pages); } mutex_unlock(&gbuf_resize_mutex); return i < z_erofs_gbuf_count ? -ENOMEM : 0; } int __init z_erofs_gbuf_init(void) { unsigned int i, total = num_possible_cpus(); if (z_erofs_gbuf_count) total = min(z_erofs_gbuf_count, total); z_erofs_gbuf_count = total; /* The last (special) global buffer is the reserved buffer */ total += !!z_erofs_rsv_nrpages; z_erofs_gbufpool = kcalloc(total, sizeof(*z_erofs_gbufpool), GFP_KERNEL); if (!z_erofs_gbufpool) return -ENOMEM; if (z_erofs_rsv_nrpages) { z_erofs_rsvbuf = &z_erofs_gbufpool[total - 1]; z_erofs_rsvbuf->pages = kcalloc(z_erofs_rsv_nrpages, sizeof(*z_erofs_rsvbuf->pages), GFP_KERNEL); if (!z_erofs_rsvbuf->pages) { z_erofs_rsvbuf = NULL; z_erofs_rsv_nrpages = 0; } } for (i = 0; i < total; ++i) spin_lock_init(&z_erofs_gbufpool[i].lock); return 0; } void z_erofs_gbuf_exit(void) { int i, j; for (i = 0; i < z_erofs_gbuf_count + (!!z_erofs_rsvbuf); ++i) { struct z_erofs_gbuf *gbuf = &z_erofs_gbufpool[i]; if (gbuf->ptr) { vunmap(gbuf->ptr); gbuf->ptr = NULL; } if (!gbuf->pages) continue; for (j = 0; j < gbuf->nrpages; ++j) if (gbuf->pages[j]) put_page(gbuf->pages[j]); kfree(gbuf->pages); gbuf->pages = NULL; } kfree(z_erofs_gbufpool); } struct page *__erofs_allocpage(struct page **pagepool, gfp_t gfp, bool tryrsv) { struct page *page = *pagepool; if (page) { *pagepool = (struct page *)page_private(page); } else if (tryrsv && z_erofs_rsvbuf && z_erofs_rsvbuf->nrpages) { spin_lock(&z_erofs_rsvbuf->lock); if (z_erofs_rsvbuf->nrpages) page = z_erofs_rsvbuf->pages[--z_erofs_rsvbuf->nrpages]; spin_unlock(&z_erofs_rsvbuf->lock); } if (!page) page = alloc_page(gfp); DBG_BUGON(page && page_ref_count(page) != 1); return page; } void erofs_release_pages(struct page **pagepool) { while (*pagepool) { struct page *page = *pagepool; *pagepool = (struct page *)page_private(page); /* try to fill reserved global pool first */ if (z_erofs_rsvbuf && z_erofs_rsvbuf->nrpages < z_erofs_rsv_nrpages) { spin_lock(&z_erofs_rsvbuf->lock); if (z_erofs_rsvbuf->nrpages < z_erofs_rsv_nrpages) { z_erofs_rsvbuf->pages[z_erofs_rsvbuf->nrpages++] = page; spin_unlock(&z_erofs_rsvbuf->lock); continue; } spin_unlock(&z_erofs_rsvbuf->lock); } put_page(page); } } static bool erofs_workgroup_get(struct erofs_workgroup *grp) { if (lockref_get_not_zero(&grp->lockref)) return true; spin_lock(&grp->lockref.lock); if (__lockref_is_dead(&grp->lockref)) { spin_unlock(&grp->lockref.lock); return false; } if (!grp->lockref.count++) atomic_long_dec(&erofs_global_shrink_cnt); spin_unlock(&grp->lockref.lock); return true; } struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb, pgoff_t index) { struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_workgroup *grp; repeat: rcu_read_lock(); grp = xa_load(&sbi->managed_pslots, index); if (grp) { if (!erofs_workgroup_get(grp)) { /* prefer to relax rcu read side */ rcu_read_unlock(); goto repeat; } DBG_BUGON(index != grp->index); } rcu_read_unlock(); return grp; } struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb, struct erofs_workgroup *grp) { struct erofs_sb_info *const sbi = EROFS_SB(sb); struct erofs_workgroup *pre; DBG_BUGON(grp->lockref.count < 1); repeat: xa_lock(&sbi->managed_pslots); pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index, NULL, grp, GFP_KERNEL); if (pre) { if (xa_is_err(pre)) { pre = ERR_PTR(xa_err(pre)); } else if (!erofs_workgroup_get(pre)) { /* try to legitimize the current in-tree one */ xa_unlock(&sbi->managed_pslots); cond_resched(); goto repeat; } grp = pre; } xa_unlock(&sbi->managed_pslots); return grp; } static void __erofs_workgroup_free(struct erofs_workgroup *grp) { atomic_long_dec(&erofs_global_shrink_cnt); erofs_workgroup_free_rcu(grp); } void erofs_workgroup_put(struct erofs_workgroup *grp) { if (lockref_put_or_lock(&grp->lockref)) return; DBG_BUGON(__lockref_is_dead(&grp->lockref)); if (grp->lockref.count == 1) atomic_long_inc(&erofs_global_shrink_cnt); --grp->lockref.count; spin_unlock(&grp->lockref.lock); } static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, struct erofs_workgroup *grp) { int free = false; spin_lock(&grp->lockref.lock); if (grp->lockref.count) goto out; /* * Note that all cached pages should be detached before deleted from * the XArray. Otherwise some cached pages could be still attached to * the orphan old workgroup when the new one is available in the tree. */ if (erofs_try_to_free_all_cached_folios(sbi, grp)) goto out; /* * It's impossible to fail after the workgroup is freezed, * however in order to avoid some race conditions, add a * DBG_BUGON to observe this in advance. */ DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp); lockref_mark_dead(&grp->lockref); free = true; out: spin_unlock(&grp->lockref.lock); if (free) __erofs_workgroup_free(grp); return free; } static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, unsigned long nr_shrink) { struct erofs_workgroup *grp; unsigned int freed = 0; unsigned long index; xa_lock(&sbi->managed_pslots); xa_for_each(&sbi->managed_pslots, index, grp) { /* try to shrink each valid workgroup */ if (!erofs_try_to_release_workgroup(sbi, grp)) continue; xa_unlock(&sbi->managed_pslots); ++freed; if (!--nr_shrink) return freed; xa_lock(&sbi->managed_pslots); } xa_unlock(&sbi->managed_pslots); return freed; } void erofs_shrinker_register(struct super_block *sb) { struct erofs_sb_info *sbi = EROFS_SB(sb); mutex_init(&sbi->umount_mutex); spin_lock(&erofs_sb_list_lock); list_add(&sbi->list, &erofs_sb_list); spin_unlock(&erofs_sb_list_lock); } void erofs_shrinker_unregister(struct super_block *sb) { struct erofs_sb_info *const sbi = EROFS_SB(sb); mutex_lock(&sbi->umount_mutex); /* clean up all remaining workgroups in memory */ erofs_shrink_workstation(sbi, ~0UL); spin_lock(&erofs_sb_list_lock); list_del(&sbi->list); spin_unlock(&erofs_sb_list_lock); mutex_unlock(&sbi->umount_mutex); } static unsigned long erofs_shrink_count(struct shrinker *shrink, struct shrink_control *sc) { return atomic_long_read(&erofs_global_shrink_cnt); } static unsigned long erofs_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) { struct erofs_sb_info *sbi; struct list_head *p; unsigned long nr = sc->nr_to_scan; unsigned int run_no; unsigned long freed = 0; spin_lock(&erofs_sb_list_lock); do { run_no = ++shrinker_run_no; } while (run_no == 0); /* Iterate over all mounted superblocks and try to shrink them */ p = erofs_sb_list.next; while (p != &erofs_sb_list) { sbi = list_entry(p, struct erofs_sb_info, list); /* * We move the ones we do to the end of the list, so we stop * when we see one we have already done. */ if (sbi->shrinker_run_no == run_no) break; if (!mutex_trylock(&sbi->umount_mutex)) { p = p->next; continue; } spin_unlock(&erofs_sb_list_lock); sbi->shrinker_run_no = run_no; freed += erofs_shrink_workstation(sbi, nr - freed); spin_lock(&erofs_sb_list_lock); /* Get the next list element before we move this one */ p = p->next; /* * Move this one to the end of the list to provide some * fairness. */ list_move_tail(&sbi->list, &erofs_sb_list); mutex_unlock(&sbi->umount_mutex); if (freed >= nr) break; } spin_unlock(&erofs_sb_list_lock); return freed; } int __init erofs_init_shrinker(void) { erofs_shrinker_info = shrinker_alloc(0, "erofs-shrinker"); if (!erofs_shrinker_info) return -ENOMEM; erofs_shrinker_info->count_objects = erofs_shrink_count; erofs_shrinker_info->scan_objects = erofs_shrink_scan; shrinker_register(erofs_shrinker_info); return 0; } void erofs_exit_shrinker(void) { shrinker_free(erofs_shrinker_info); } |
| 8 9 8 8 8 8 8 8 8 8 8 8 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 | // SPDX-License-Identifier: ISC /* * Copyright (c) 2005-2011 Atheros Communications Inc. * Copyright (c) 2011-2017 Qualcomm Atheros, Inc. * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved. * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include <linux/module.h> #include <linux/firmware.h> #include <linux/of.h> #include <linux/property.h> #include <linux/dmi.h> #include <linux/ctype.h> #include <linux/pm_qos.h> #include <linux/nvmem-consumer.h> #include <asm/byteorder.h> #include "core.h" #include "mac.h" #include "htc.h" #include "hif.h" #include "wmi.h" #include "bmi.h" #include "debug.h" #include "htt.h" #include "testmode.h" #include "wmi-ops.h" #include "coredump.h" #include "leds.h" unsigned int ath10k_debug_mask; EXPORT_SYMBOL(ath10k_debug_mask); static unsigned int ath10k_cryptmode_param; static bool uart_print; static bool skip_otp; static bool fw_diag_log; /* frame mode values are mapped as per enum ath10k_hw_txrx_mode */ unsigned int ath10k_frame_mode = ATH10K_HW_TXRX_NATIVE_WIFI; unsigned long ath10k_coredump_mask = BIT(ATH10K_FW_CRASH_DUMP_REGISTERS) | BIT(ATH10K_FW_CRASH_DUMP_CE_DATA); /* FIXME: most of these should be readonly */ module_param_named(debug_mask, ath10k_debug_mask, uint, 0644); module_param_named(cryptmode, ath10k_cryptmode_param, uint, 0644); module_param(uart_print, bool, 0644); module_param(skip_otp, bool, 0644); module_param(fw_diag_log, bool, 0644); module_param_named(frame_mode, ath10k_frame_mode, uint, 0644); module_param_named(coredump_mask, ath10k_coredump_mask, ulong, 0444); MODULE_PARM_DESC(debug_mask, "Debugging mask"); MODULE_PARM_DESC(uart_print, "Uart target debugging"); MODULE_PARM_DESC(skip_otp, "Skip otp failure for calibration in testmode"); MODULE_PARM_DESC(cryptmode, "Crypto mode: 0-hardware, 1-software"); MODULE_PARM_DESC(frame_mode, "Datapath frame mode (0: raw, 1: native wifi (default), 2: ethernet)"); MODULE_PARM_DESC(coredump_mask, "Bitfield of what to include in firmware crash file"); MODULE_PARM_DESC(fw_diag_log, "Diag based fw log debugging"); static const struct ath10k_hw_params ath10k_hw_params_list[] = { { .id = QCA988X_HW_2_0_VERSION, .dev_id = QCA988X_2_0_DEVICE_ID, .bus = ATH10K_BUS_PCI, .name = "qca988x hw2.0", .patch_load_addr = QCA988X_HW_2_0_PATCH_LOAD_ADDR, .uart_pin = 7, .led_pin = 1, .cc_wraparound_type = ATH10K_HW_CC_WRAP_SHIFTED_ALL, .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, .cal_data_len = 2116, .fw = { .dir = QCA988X_HW_2_0_FW_DIR, .board_size = QCA988X_BOARD_DATA_SZ, .board_ext_size = QCA988X_BOARD_EXT_DATA_SZ, }, .rx_desc_ops = &qca988x_rx_desc_ops, .hw_ops = &qca988x_ops, .decap_align_bytes = 4, .spectral_bin_discard = 0, .spectral_bin_offset = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, .n_cipher_suites = 8, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, .credit_size_workaround = false, .tx_stats_over_pktlog = true, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA988X_HW_2_0_VERSION, .dev_id = QCA988X_2_0_DEVICE_ID_UBNT, .name = "qca988x hw2.0 ubiquiti", .patch_load_addr = QCA988X_HW_2_0_PATCH_LOAD_ADDR, .uart_pin = 7, .led_pin = 0, .cc_wraparound_type = ATH10K_HW_CC_WRAP_SHIFTED_ALL, .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, .cal_data_len = 2116, .fw = { .dir = QCA988X_HW_2_0_FW_DIR, .board_size = QCA988X_BOARD_DATA_SZ, .board_ext_size = QCA988X_BOARD_EXT_DATA_SZ, }, .rx_desc_ops = &qca988x_rx_desc_ops, .hw_ops = &qca988x_ops, .decap_align_bytes = 4, .spectral_bin_discard = 0, .spectral_bin_offset = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, .n_cipher_suites = 8, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, .credit_size_workaround = false, .tx_stats_over_pktlog = true, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA9887_HW_1_0_VERSION, .dev_id = QCA9887_1_0_DEVICE_ID, .bus = ATH10K_BUS_PCI, .name = "qca9887 hw1.0", .patch_load_addr = QCA9887_HW_1_0_PATCH_LOAD_ADDR, .uart_pin = 7, .led_pin = 1, .cc_wraparound_type = ATH10K_HW_CC_WRAP_SHIFTED_ALL, .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, .cal_data_len = 2116, .fw = { .dir = QCA9887_HW_1_0_FW_DIR, .board_size = QCA9887_BOARD_DATA_SZ, .board_ext_size = QCA9887_BOARD_EXT_DATA_SZ, }, .rx_desc_ops = &qca988x_rx_desc_ops, .hw_ops = &qca988x_ops, .decap_align_bytes = 4, .spectral_bin_discard = 0, .spectral_bin_offset = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, .n_cipher_suites = 8, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, .credit_size_workaround = false, .tx_stats_over_pktlog = false, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA6174_HW_3_2_VERSION, .dev_id = QCA6174_3_2_DEVICE_ID, .bus = ATH10K_BUS_SDIO, .name = "qca6174 hw3.2 sdio", .patch_load_addr = QCA6174_HW_3_0_PATCH_LOAD_ADDR, .uart_pin = 19, .led_pin = 0, .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, .cal_data_len = 0, .fw = { .dir = QCA6174_HW_3_0_FW_DIR, .board_size = QCA6174_BOARD_DATA_SZ, .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, .rx_desc_ops = &qca988x_rx_desc_ops, .hw_ops = &qca6174_sdio_ops, .hw_clk = qca6174_clk, .target_cpu_freq = 176000000, .decap_align_bytes = 4, .n_cipher_suites = 8, .num_peers = 10, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .uart_pin_workaround = true, .tx_stats_over_pktlog = false, .credit_size_workaround = false, .bmi_large_size_download = true, .supports_peer_stats_info = true, .dynamic_sar_support = true, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA6174_HW_2_1_VERSION, .dev_id = QCA6164_2_1_DEVICE_ID, .bus = ATH10K_BUS_PCI, .name = "qca6164 hw2.1", .patch_load_addr = QCA6174_HW_2_1_PATCH_LOAD_ADDR, .uart_pin = 6, .led_pin = 0, .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, .cal_data_len = 8124, .fw = { .dir = QCA6174_HW_2_1_FW_DIR, .board_size = QCA6174_BOARD_DATA_SZ, .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, .rx_desc_ops = &qca988x_rx_desc_ops, .hw_ops = &qca988x_ops, .decap_align_bytes = 4, .spectral_bin_discard = 0, .spectral_bin_offset = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, .n_cipher_suites = 8, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, .credit_size_workaround = false, .tx_stats_over_pktlog = false, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA6174_HW_2_1_VERSION, .dev_id = QCA6174_2_1_DEVICE_ID, .bus = ATH10K_BUS_PCI, .name = "qca6174 hw2.1", .patch_load_addr = QCA6174_HW_2_1_PATCH_LOAD_ADDR, .uart_pin = 6, .led_pin = 0, .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, .cal_data_len = 8124, .fw = { .dir = QCA6174_HW_2_1_FW_DIR, .board_size = QCA6174_BOARD_DATA_SZ, .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, .rx_desc_ops = &qca988x_rx_desc_ops, .hw_ops = &qca988x_ops, .decap_align_bytes = 4, .spectral_bin_discard = 0, .spectral_bin_offset = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, .n_cipher_suites = 8, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, .credit_size_workaround = false, .tx_stats_over_pktlog = false, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA6174_HW_3_0_VERSION, .dev_id = QCA6174_2_1_DEVICE_ID, .bus = ATH10K_BUS_PCI, .name = "qca6174 hw3.0", .patch_load_addr = QCA6174_HW_3_0_PATCH_LOAD_ADDR, .uart_pin = 6, .led_pin = 0, .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, .cal_data_len = 8124, .fw = { .dir = QCA6174_HW_3_0_FW_DIR, .board_size = QCA6174_BOARD_DATA_SZ, .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, .rx_desc_ops = &qca988x_rx_desc_ops, .hw_ops = &qca988x_ops, .decap_align_bytes = 4, .spectral_bin_discard = 0, .spectral_bin_offset = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, .n_cipher_suites = 8, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, .credit_size_workaround = false, .tx_stats_over_pktlog = false, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA6174_HW_3_2_VERSION, .dev_id = QCA6174_2_1_DEVICE_ID, .bus = ATH10K_BUS_PCI, .name = "qca6174 hw3.2", .patch_load_addr = QCA6174_HW_3_0_PATCH_LOAD_ADDR, .uart_pin = 6, .led_pin = 0, .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, .cal_data_len = 8124, .fw = { /* uses same binaries as hw3.0 */ .dir = QCA6174_HW_3_0_FW_DIR, .board_size = QCA6174_BOARD_DATA_SZ, .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, .rx_desc_ops = &qca988x_rx_desc_ops, .hw_ops = &qca6174_ops, .hw_clk = qca6174_clk, .target_cpu_freq = 176000000, .decap_align_bytes = 4, .spectral_bin_discard = 0, .spectral_bin_offset = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, .n_cipher_suites = 8, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = true, .credit_size_workaround = false, .tx_stats_over_pktlog = false, .supports_peer_stats_info = true, .dynamic_sar_support = true, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = true, }, { .id = QCA99X0_HW_2_0_DEV_VERSION, .dev_id = QCA99X0_2_0_DEVICE_ID, .bus = ATH10K_BUS_PCI, .name = "qca99x0 hw2.0", .patch_load_addr = QCA99X0_HW_2_0_PATCH_LOAD_ADDR, .uart_pin = 7, .led_pin = 17, .otp_exe_param = 0x00000700, .continuous_frag_desc = true, .cck_rate_map_rev2 = true, .channel_counters_freq_hz = 150000, .max_probe_resp_desc_thres = 24, .tx_chain_mask = 0xf, .rx_chain_mask = 0xf, .max_spatial_stream = 4, .cal_data_len = 12064, .fw = { .dir = QCA99X0_HW_2_0_FW_DIR, .board_size = QCA99X0_BOARD_DATA_SZ, .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ, }, .sw_decrypt_mcast_mgmt = true, .rx_desc_ops = &qca99x0_rx_desc_ops, .hw_ops = &qca99x0_ops, .decap_align_bytes = 1, .spectral_bin_discard = 4, .spectral_bin_offset = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, .n_cipher_suites = 11, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, .credit_size_workaround = false, .tx_stats_over_pktlog = false, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA9984_HW_1_0_DEV_VERSION, .dev_id = QCA9984_1_0_DEVICE_ID, .bus = ATH10K_BUS_PCI, .name = "qca9984/qca9994 hw1.0", .patch_load_addr = QCA9984_HW_1_0_PATCH_LOAD_ADDR, .uart_pin = 7, .led_pin = 17, .cc_wraparound_type = ATH10K_HW_CC_WRAP_SHIFTED_EACH, .otp_exe_param = 0x00000700, .continuous_frag_desc = true, .cck_rate_map_rev2 = true, .channel_counters_freq_hz = 150000, .max_probe_resp_desc_thres = 24, .tx_chain_mask = 0xf, .rx_chain_mask = 0xf, .max_spatial_stream = 4, .cal_data_len = 12064, .fw = { .dir = QCA9984_HW_1_0_FW_DIR, .board_size = QCA99X0_BOARD_DATA_SZ, .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ, .ext_board_size = QCA99X0_EXT_BOARD_DATA_SZ, }, .sw_decrypt_mcast_mgmt = true, .rx_desc_ops = &qca99x0_rx_desc_ops, .hw_ops = &qca99x0_ops, .decap_align_bytes = 1, .spectral_bin_discard = 12, .spectral_bin_offset = 8, /* Can do only 2x2 VHT160 or 80+80. 1560Mbps is 4x4 80Mhz * or 2x2 160Mhz, long-guard-interval. */ .vht160_mcs_rx_highest = 1560, .vht160_mcs_tx_highest = 1560, .n_cipher_suites = 11, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, .credit_size_workaround = false, .tx_stats_over_pktlog = false, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA9888_HW_2_0_DEV_VERSION, .dev_id = QCA9888_2_0_DEVICE_ID, .bus = ATH10K_BUS_PCI, .name = "qca9888 hw2.0", .patch_load_addr = QCA9888_HW_2_0_PATCH_LOAD_ADDR, .uart_pin = 7, .led_pin = 17, .cc_wraparound_type = ATH10K_HW_CC_WRAP_SHIFTED_EACH, .otp_exe_param = 0x00000700, .continuous_frag_desc = true, .channel_counters_freq_hz = 150000, .max_probe_resp_desc_thres = 24, .tx_chain_mask = 3, .rx_chain_mask = 3, .max_spatial_stream = 2, .cal_data_len = 12064, .fw = { .dir = QCA9888_HW_2_0_FW_DIR, .board_size = QCA99X0_BOARD_DATA_SZ, .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ, }, .sw_decrypt_mcast_mgmt = true, .rx_desc_ops = &qca99x0_rx_desc_ops, .hw_ops = &qca99x0_ops, .decap_align_bytes = 1, .spectral_bin_discard = 12, .spectral_bin_offset = 8, /* Can do only 1x1 VHT160 or 80+80. 780Mbps is 2x2 80Mhz or * 1x1 160Mhz, long-guard-interval. */ .vht160_mcs_rx_highest = 780, .vht160_mcs_tx_highest = 780, .n_cipher_suites = 11, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, .credit_size_workaround = false, .tx_stats_over_pktlog = false, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA9377_HW_1_0_DEV_VERSION, .dev_id = QCA9377_1_0_DEVICE_ID, .bus = ATH10K_BUS_PCI, .name = "qca9377 hw1.0", .patch_load_addr = QCA9377_HW_1_0_PATCH_LOAD_ADDR, .uart_pin = 6, .led_pin = 0, .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, .cal_data_len = 8124, .fw = { .dir = QCA9377_HW_1_0_FW_DIR, .board_size = QCA9377_BOARD_DATA_SZ, .board_ext_size = QCA9377_BOARD_EXT_DATA_SZ, }, .rx_desc_ops = &qca988x_rx_desc_ops, .hw_ops = &qca988x_ops, .decap_align_bytes = 4, .spectral_bin_discard = 0, .spectral_bin_offset = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, .n_cipher_suites = 8, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, .credit_size_workaround = false, .tx_stats_over_pktlog = false, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA9377_HW_1_1_DEV_VERSION, .dev_id = QCA9377_1_0_DEVICE_ID, .bus = ATH10K_BUS_PCI, .name = "qca9377 hw1.1", .patch_load_addr = QCA9377_HW_1_0_PATCH_LOAD_ADDR, .uart_pin = 6, .led_pin = 0, .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, .cal_data_len = 8124, .fw = { .dir = QCA9377_HW_1_0_FW_DIR, .board_size = QCA9377_BOARD_DATA_SZ, .board_ext_size = QCA9377_BOARD_EXT_DATA_SZ, }, .rx_desc_ops = &qca988x_rx_desc_ops, .hw_ops = &qca6174_ops, .hw_clk = qca6174_clk, .target_cpu_freq = 176000000, .decap_align_bytes = 4, .spectral_bin_discard = 0, .spectral_bin_offset = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, .n_cipher_suites = 8, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = true, .credit_size_workaround = false, .tx_stats_over_pktlog = false, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA9377_HW_1_1_DEV_VERSION, .dev_id = QCA9377_1_0_DEVICE_ID, .bus = ATH10K_BUS_SDIO, .name = "qca9377 hw1.1 sdio", .patch_load_addr = QCA9377_HW_1_0_PATCH_LOAD_ADDR, .uart_pin = 19, .led_pin = 0, .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, .cal_data_len = 8124, .fw = { .dir = QCA9377_HW_1_0_FW_DIR, .board_size = QCA9377_BOARD_DATA_SZ, .board_ext_size = QCA9377_BOARD_EXT_DATA_SZ, }, .rx_desc_ops = &qca988x_rx_desc_ops, .hw_ops = &qca6174_ops, .hw_clk = qca6174_clk, .target_cpu_freq = 176000000, .decap_align_bytes = 4, .n_cipher_suites = 8, .num_peers = TARGET_QCA9377_HL_NUM_PEERS, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .uart_pin_workaround = true, .credit_size_workaround = true, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = QCA4019_HW_1_0_DEV_VERSION, .dev_id = 0, .bus = ATH10K_BUS_AHB, .name = "qca4019 hw1.0", .patch_load_addr = QCA4019_HW_1_0_PATCH_LOAD_ADDR, .uart_pin = 7, .led_pin = 0, .cc_wraparound_type = ATH10K_HW_CC_WRAP_SHIFTED_EACH, .otp_exe_param = 0x0010000, .continuous_frag_desc = true, .cck_rate_map_rev2 = true, .channel_counters_freq_hz = 125000, .max_probe_resp_desc_thres = 24, .tx_chain_mask = 0x3, .rx_chain_mask = 0x3, .max_spatial_stream = 2, .cal_data_len = 12064, .fw = { .dir = QCA4019_HW_1_0_FW_DIR, .board_size = QCA4019_BOARD_DATA_SZ, .board_ext_size = QCA4019_BOARD_EXT_DATA_SZ, }, .sw_decrypt_mcast_mgmt = true, .rx_desc_ops = &qca99x0_rx_desc_ops, .hw_ops = &qca99x0_ops, .decap_align_bytes = 1, .spectral_bin_discard = 4, .spectral_bin_offset = 0, .vht160_mcs_rx_highest = 0, .vht160_mcs_tx_highest = 0, .n_cipher_suites = 11, .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .target_64bit = false, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL, .shadow_reg_support = false, .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, .credit_size_workaround = false, .tx_stats_over_pktlog = false, .dynamic_sar_support = false, .hw_restart_disconnect = false, .use_fw_tx_credits = true, .delay_unmap_buffer = false, .mcast_frame_registration = false, }, { .id = WCN3990_HW_1_0_DEV_VERSION, .dev_id = 0, .bus = ATH10K_BUS_SNOC, .name = "wcn3990 hw1.0", .led_pin = 0, .continuous_frag_desc = true, .tx_chain_mask = 0x7, .rx_chain_mask = 0x7, .max_spatial_stream = 4, .fw = { .dir = WCN3990_HW_1_0_FW_DIR, .board_size = WCN3990_BOARD_DATA_SZ, .board_ext_size = WCN3990_BOARD_EXT_DATA_SZ, }, .sw_decrypt_mcast_mgmt = true, .rx_desc_ops = &wcn3990_rx_desc_ops, .hw_ops = &wcn3990_ops, .decap_align_bytes = 1, .num_peers = TARGET_HL_TLV_NUM_PEERS, .n_cipher_suites = 11, .ast_skid_limit = TARGET_HL_TLV_AST_SKID_LIMIT, .num_wds_entries = TARGET_HL_TLV_NUM_WDS_ENTRIES, .target_64bit = true, .rx_ring_fill_level = HTT_RX_RING_FILL_LEVEL_DUAL_MAC, .shadow_reg_support = true, .rri_on_ddr = true, .hw_filter_reset_required = false, .fw_diag_ce_download = false, .credit_size_workaround = false, .tx_stats_over_pktlog = false, .dynamic_sar_support = true, .hw_restart_disconnect = true, .use_fw_tx_credits = false, .delay_unmap_buffer = true, .mcast_frame_registration = false, }, }; static const char *const ath10k_core_fw_feature_str[] = { [ATH10K_FW_FEATURE_EXT_WMI_MGMT_RX] = "wmi-mgmt-rx", [ATH10K_FW_FEATURE_WMI_10X] = "wmi-10.x", [ATH10K_FW_FEATURE_HAS_WMI_MGMT_TX] = "has-wmi-mgmt-tx", [ATH10K_FW_FEATURE_NO_P2P] = "no-p2p", [ATH10K_FW_FEATURE_WMI_10_2] = "wmi-10.2", [ATH10K_FW_FEATURE_MULTI_VIF_PS_SUPPORT] = "multi-vif-ps", [ATH10K_FW_FEATURE_WOWLAN_SUPPORT] = "wowlan", [ATH10K_FW_FEATURE_IGNORE_OTP_RESULT] = "ignore-otp", [ATH10K_FW_FEATURE_NO_NWIFI_DECAP_4ADDR_PADDING] = "no-4addr-pad", [ATH10K_FW_FEATURE_SUPPORTS_SKIP_CLOCK_INIT] = "skip-clock-init", [ATH10K_FW_FEATURE_RAW_MODE_SUPPORT] = "raw-mode", [ATH10K_FW_FEATURE_SUPPORTS_ADAPTIVE_CCA] = "adaptive-cca", [ATH10K_FW_FEATURE_MFP_SUPPORT] = "mfp", [ATH10K_FW_FEATURE_PEER_FLOW_CONTROL] = "peer-flow-ctrl", [ATH10K_FW_FEATURE_BTCOEX_PARAM] = "btcoex-param", [ATH10K_FW_FEATURE_SKIP_NULL_FUNC_WAR] = "skip-null-func-war", [ATH10K_FW_FEATURE_ALLOWS_MESH_BCAST] = "allows-mesh-bcast", [ATH10K_FW_FEATURE_NO_PS] = "no-ps", [ATH10K_FW_FEATURE_MGMT_TX_BY_REF] = "mgmt-tx-by-reference", [ATH10K_FW_FEATURE_NON_BMI] = "non-bmi", [ATH10K_FW_FEATURE_SINGLE_CHAN_INFO_PER_CHANNEL] = "single-chan-info-per-channel", [ATH10K_FW_FEATURE_PEER_FIXED_RATE] = "peer-fixed-rate", [ATH10K_FW_FEATURE_IRAM_RECOVERY] = "iram-recovery", }; static unsigned int ath10k_core_get_fw_feature_str(char *buf, size_t buf_len, enum ath10k_fw_features feat) { /* make sure that ath10k_core_fw_feature_str[] gets updated */ BUILD_BUG_ON(ARRAY_SIZE(ath10k_core_fw_feature_str) != ATH10K_FW_FEATURE_COUNT); if (feat >= ARRAY_SIZE(ath10k_core_fw_feature_str) || WARN_ON(!ath10k_core_fw_feature_str[feat])) { return scnprintf(buf, buf_len, "bit%d", feat); } return scnprintf(buf, buf_len, "%s", ath10k_core_fw_feature_str[feat]); } void ath10k_core_get_fw_features_str(struct ath10k *ar, char *buf, size_t buf_len) { size_t len = 0; int i; for (i = 0; i < ATH10K_FW_FEATURE_COUNT; i++) { if (test_bit(i, ar->normal_mode_fw.fw_file.fw_features)) { if (len > 0) len += scnprintf(buf + len, buf_len - len, ","); len += ath10k_core_get_fw_feature_str(buf + len, buf_len - len, i); } } } static void ath10k_send_suspend_complete(struct ath10k *ar) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot suspend complete\n"); complete(&ar->target_suspend); } static int ath10k_init_sdio(struct ath10k *ar, enum ath10k_firmware_mode mode) { bool mtu_workaround = ar->hw_params.credit_size_workaround; int ret; u32 param = 0; ret = ath10k_bmi_write32(ar, hi_mbox_io_block_sz, 256); if (ret) return ret; ret = ath10k_bmi_write32(ar, hi_mbox_isr_yield_limit, 99); if (ret) return ret; ret = ath10k_bmi_read32(ar, hi_acs_flags, ¶m); if (ret) return ret; param |= HI_ACS_FLAGS_SDIO_REDUCE_TX_COMPL_SET; if (mode == ATH10K_FIRMWARE_MODE_NORMAL && !mtu_workaround) param |= HI_ACS_FLAGS_ALT_DATA_CREDIT_SIZE; else param &= ~HI_ACS_FLAGS_ALT_DATA_CREDIT_SIZE; if (mode == ATH10K_FIRMWARE_MODE_UTF) param &= ~HI_ACS_FLAGS_SDIO_SWAP_MAILBOX_SET; else param |= HI_ACS_FLAGS_SDIO_SWAP_MAILBOX_SET; ret = ath10k_bmi_write32(ar, hi_acs_flags, param); if (ret) return ret; ret = ath10k_bmi_read32(ar, hi_option_flag2, ¶m); if (ret) return ret; param |= HI_OPTION_SDIO_CRASH_DUMP_ENHANCEMENT_HOST; ret = ath10k_bmi_write32(ar, hi_option_flag2, param); if (ret) return ret; return 0; } static int ath10k_init_configure_target(struct ath10k *ar) { u32 param_host; int ret; /* tell target which HTC version it is used*/ ret = ath10k_bmi_write32(ar, hi_app_host_interest, HTC_PROTOCOL_VERSION); if (ret) { ath10k_err(ar, "settings HTC version failed\n"); return ret; } /* set the firmware mode to STA/IBSS/AP */ ret = ath10k_bmi_read32(ar, hi_option_flag, ¶m_host); if (ret) { ath10k_err(ar, "setting firmware mode (1/2) failed\n"); return ret; } /* TODO following parameters need to be re-visited. */ /* num_device */ param_host |= (1 << HI_OPTION_NUM_DEV_SHIFT); /* Firmware mode */ /* FIXME: Why FW_MODE_AP ??.*/ param_host |= (HI_OPTION_FW_MODE_AP << HI_OPTION_FW_MODE_SHIFT); /* mac_addr_method */ param_host |= (1 << HI_OPTION_MAC_ADDR_METHOD_SHIFT); /* firmware_bridge */ param_host |= (0 << HI_OPTION_FW_BRIDGE_SHIFT); /* fwsubmode */ param_host |= (0 << HI_OPTION_FW_SUBMODE_SHIFT); ret = ath10k_bmi_write32(ar, hi_option_flag, param_host); if (ret) { ath10k_err(ar, "setting firmware mode (2/2) failed\n"); return ret; } /* We do all byte-swapping on the host */ ret = ath10k_bmi_write32(ar, hi_be, 0); if (ret) { ath10k_err(ar, "setting host CPU BE mode failed\n"); return ret; } /* FW descriptor/Data swap flags */ ret = ath10k_bmi_write32(ar, hi_fw_swap, 0); if (ret) { ath10k_err(ar, "setting FW data/desc swap flags failed\n"); return ret; } /* Some devices have a special sanity check that verifies the PCI * Device ID is written to this host interest var. It is known to be * required to boot QCA6164. */ ret = ath10k_bmi_write32(ar, hi_hci_uart_pwr_mgmt_params_ext, ar->dev_id); if (ret) { ath10k_err(ar, "failed to set pwr_mgmt_params: %d\n", ret); return ret; } return 0; } static const struct firmware *ath10k_fetch_fw_file(struct ath10k *ar, const char *dir, const char *file) { char filename[100]; const struct firmware *fw; int ret; if (file == NULL) return ERR_PTR(-ENOENT); if (dir == NULL) dir = "."; if (ar->board_name) { snprintf(filename, sizeof(filename), "%s/%s/%s", dir, ar->board_name, file); ret = firmware_request_nowarn(&fw, filename, ar->dev); ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot fw request '%s': %d\n", filename, ret); if (!ret) return fw; } snprintf(filename, sizeof(filename), "%s/%s", dir, file); ret = firmware_request_nowarn(&fw, filename, ar->dev); ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot fw request '%s': %d\n", filename, ret); if (ret) return ERR_PTR(ret); return fw; } static int ath10k_push_board_ext_data(struct ath10k *ar, const void *data, size_t data_len) { u32 board_data_size = ar->hw_params.fw.board_size; u32 board_ext_data_size = ar->hw_params.fw.board_ext_size; u32 board_ext_data_addr; int ret; ret = ath10k_bmi_read32(ar, hi_board_ext_data, &board_ext_data_addr); if (ret) { ath10k_err(ar, "could not read board ext data addr (%d)\n", ret); return ret; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot push board extended data addr 0x%x\n", board_ext_data_addr); if (board_ext_data_addr == 0) return 0; if (data_len != (board_data_size + board_ext_data_size)) { ath10k_err(ar, "invalid board (ext) data sizes %zu != %d+%d\n", data_len, board_data_size, board_ext_data_size); return -EINVAL; } ret = ath10k_bmi_write_memory(ar, board_ext_data_addr, data + board_data_size, board_ext_data_size); if (ret) { ath10k_err(ar, "could not write board ext data (%d)\n", ret); return ret; } ret = ath10k_bmi_write32(ar, hi_board_ext_data_config, (board_ext_data_size << 16) | 1); if (ret) { ath10k_err(ar, "could not write board ext data bit (%d)\n", ret); return ret; } return 0; } static int ath10k_core_get_board_id_from_otp(struct ath10k *ar) { u32 result, address; u8 board_id, chip_id; bool ext_bid_support; int ret, bmi_board_id_param; address = ar->hw_params.patch_load_addr; if (!ar->normal_mode_fw.fw_file.otp_data || !ar->normal_mode_fw.fw_file.otp_len) { ath10k_warn(ar, "failed to retrieve board id because of invalid otp\n"); return -ENODATA; } if (ar->id.bmi_ids_valid) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot already acquired valid otp board id,skip download, board_id %d chip_id %d\n", ar->id.bmi_board_id, ar->id.bmi_chip_id); goto skip_otp_download; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot upload otp to 0x%x len %zd for board id\n", address, ar->normal_mode_fw.fw_file.otp_len); ret = ath10k_bmi_fast_download(ar, address, ar->normal_mode_fw.fw_file.otp_data, ar->normal_mode_fw.fw_file.otp_len); if (ret) { ath10k_err(ar, "could not write otp for board id check: %d\n", ret); return ret; } if (ar->cal_mode == ATH10K_PRE_CAL_MODE_DT || ar->cal_mode == ATH10K_PRE_CAL_MODE_FILE || ar->cal_mode == ATH10K_PRE_CAL_MODE_NVMEM) bmi_board_id_param = BMI_PARAM_GET_FLASH_BOARD_ID; else bmi_board_id_param = BMI_PARAM_GET_EEPROM_BOARD_ID; ret = ath10k_bmi_execute(ar, address, bmi_board_id_param, &result); if (ret) { ath10k_err(ar, "could not execute otp for board id check: %d\n", ret); return ret; } board_id = MS(result, ATH10K_BMI_BOARD_ID_FROM_OTP); chip_id = MS(result, ATH10K_BMI_CHIP_ID_FROM_OTP); ext_bid_support = (result & ATH10K_BMI_EXT_BOARD_ID_SUPPORT); ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot get otp board id result 0x%08x board_id %d chip_id %d ext_bid_support %d\n", result, board_id, chip_id, ext_bid_support); ar->id.ext_bid_supported = ext_bid_support; if ((result & ATH10K_BMI_BOARD_ID_STATUS_MASK) != 0 || (board_id == 0)) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "board id does not exist in otp, ignore it\n"); return -EOPNOTSUPP; } ar->id.bmi_ids_valid = true; ar->id.bmi_board_id = board_id; ar->id.bmi_chip_id = chip_id; skip_otp_download: return 0; } static void ath10k_core_check_bdfext(const struct dmi_header *hdr, void *data) { struct ath10k *ar = data; const char *bdf_ext; const char *magic = ATH10K_SMBIOS_BDF_EXT_MAGIC; u8 bdf_enabled; int i; if (hdr->type != ATH10K_SMBIOS_BDF_EXT_TYPE) return; if (hdr->length != ATH10K_SMBIOS_BDF_EXT_LENGTH) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "wrong smbios bdf ext type length (%d).\n", hdr->length); return; } bdf_enabled = *((u8 *)hdr + ATH10K_SMBIOS_BDF_EXT_OFFSET); if (!bdf_enabled) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "bdf variant name not found.\n"); return; } /* Only one string exists (per spec) */ bdf_ext = (char *)hdr + hdr->length; if (memcmp(bdf_ext, magic, strlen(magic)) != 0) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "bdf variant magic does not match.\n"); return; } for (i = 0; i < strlen(bdf_ext); i++) { if (!isascii(bdf_ext[i]) || !isprint(bdf_ext[i])) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "bdf variant name contains non ascii chars.\n"); return; } } /* Copy extension name without magic suffix */ if (strscpy(ar->id.bdf_ext, bdf_ext + strlen(magic), sizeof(ar->id.bdf_ext)) < 0) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "bdf variant string is longer than the buffer can accommodate (variant: %s)\n", bdf_ext); return; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "found and validated bdf variant smbios_type 0x%x bdf %s\n", ATH10K_SMBIOS_BDF_EXT_TYPE, bdf_ext); } static int ath10k_core_check_smbios(struct ath10k *ar) { ar->id.bdf_ext[0] = '\0'; dmi_walk(ath10k_core_check_bdfext, ar); if (ar->id.bdf_ext[0] == '\0') return -ENODATA; return 0; } int ath10k_core_check_dt(struct ath10k *ar) { struct device_node *node; const char *variant = NULL; node = ar->dev->of_node; if (!node) return -ENOENT; of_property_read_string(node, "qcom,ath10k-calibration-variant", &variant); if (!variant) return -ENODATA; if (strscpy(ar->id.bdf_ext, variant, sizeof(ar->id.bdf_ext)) < 0) ath10k_dbg(ar, ATH10K_DBG_BOOT, "bdf variant string is longer than the buffer can accommodate (variant: %s)\n", variant); return 0; } EXPORT_SYMBOL(ath10k_core_check_dt); static int ath10k_download_fw(struct ath10k *ar) { u32 address, data_len; const void *data; int ret; struct pm_qos_request latency_qos; address = ar->hw_params.patch_load_addr; data = ar->running_fw->fw_file.firmware_data; data_len = ar->running_fw->fw_file.firmware_len; ret = ath10k_swap_code_seg_configure(ar, &ar->running_fw->fw_file); if (ret) { ath10k_err(ar, "failed to configure fw code swap: %d\n", ret); return ret; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot uploading firmware image %pK len %d\n", data, data_len); /* Check if device supports to download firmware via * diag copy engine. Downloading firmware via diag CE * greatly reduces the time to download firmware. */ if (ar->hw_params.fw_diag_ce_download) { ret = ath10k_hw_diag_fast_download(ar, address, data, data_len); if (ret == 0) /* firmware upload via diag ce was successful */ return 0; ath10k_warn(ar, "failed to upload firmware via diag ce, trying BMI: %d", ret); } memset(&latency_qos, 0, sizeof(latency_qos)); cpu_latency_qos_add_request(&latency_qos, 0); ret = ath10k_bmi_fast_download(ar, address, data, data_len); cpu_latency_qos_remove_request(&latency_qos); return ret; } void ath10k_core_free_board_files(struct ath10k *ar) { if (!IS_ERR(ar->normal_mode_fw.board)) release_firmware(ar->normal_mode_fw.board); if (!IS_ERR(ar->normal_mode_fw.ext_board)) release_firmware(ar->normal_mode_fw.ext_board); ar->normal_mode_fw.board = NULL; ar->normal_mode_fw.board_data = NULL; ar->normal_mode_fw.board_len = 0; ar->normal_mode_fw.ext_board = NULL; ar->normal_mode_fw.ext_board_data = NULL; ar->normal_mode_fw.ext_board_len = 0; } EXPORT_SYMBOL(ath10k_core_free_board_files); static void ath10k_core_free_firmware_files(struct ath10k *ar) { if (!IS_ERR(ar->normal_mode_fw.fw_file.firmware)) release_firmware(ar->normal_mode_fw.fw_file.firmware); if (!IS_ERR(ar->cal_file)) release_firmware(ar->cal_file); if (!IS_ERR(ar->pre_cal_file)) release_firmware(ar->pre_cal_file); ath10k_swap_code_seg_release(ar, &ar->normal_mode_fw.fw_file); ar->normal_mode_fw.fw_file.otp_data = NULL; ar->normal_mode_fw.fw_file.otp_len = 0; ar->normal_mode_fw.fw_file.firmware = NULL; ar->normal_mode_fw.fw_file.firmware_data = NULL; ar->normal_mode_fw.fw_file.firmware_len = 0; ar->cal_file = NULL; ar->pre_cal_file = NULL; } static int ath10k_fetch_cal_file(struct ath10k *ar) { char filename[100]; /* pre-cal-<bus>-<id>.bin */ scnprintf(filename, sizeof(filename), "pre-cal-%s-%s.bin", ath10k_bus_str(ar->hif.bus), dev_name(ar->dev)); ar->pre_cal_file = ath10k_fetch_fw_file(ar, ATH10K_FW_DIR, filename); if (!IS_ERR(ar->pre_cal_file)) goto success; /* cal-<bus>-<id>.bin */ scnprintf(filename, sizeof(filename), "cal-%s-%s.bin", ath10k_bus_str(ar->hif.bus), dev_name(ar->dev)); ar->cal_file = ath10k_fetch_fw_file(ar, ATH10K_FW_DIR, filename); if (IS_ERR(ar->cal_file)) /* calibration file is optional, don't print any warnings */ return PTR_ERR(ar->cal_file); success: ath10k_dbg(ar, ATH10K_DBG_BOOT, "found calibration file %s/%s\n", ATH10K_FW_DIR, filename); return 0; } static int ath10k_core_fetch_board_data_api_1(struct ath10k *ar, int bd_ie_type) { const struct firmware *fw; char boardname[100]; if (bd_ie_type == ATH10K_BD_IE_BOARD) { scnprintf(boardname, sizeof(boardname), "board-%s-%s.bin", ath10k_bus_str(ar->hif.bus), dev_name(ar->dev)); ar->normal_mode_fw.board = ath10k_fetch_fw_file(ar, ar->hw_params.fw.dir, boardname); if (IS_ERR(ar->normal_mode_fw.board)) { fw = ath10k_fetch_fw_file(ar, ar->hw_params.fw.dir, ATH10K_BOARD_DATA_FILE); ar->normal_mode_fw.board = fw; } if (IS_ERR(ar->normal_mode_fw.board)) return PTR_ERR(ar->normal_mode_fw.board); ar->normal_mode_fw.board_data = ar->normal_mode_fw.board->data; ar->normal_mode_fw.board_len = ar->normal_mode_fw.board->size; } else if (bd_ie_type == ATH10K_BD_IE_BOARD_EXT) { fw = ath10k_fetch_fw_file(ar, ar->hw_params.fw.dir, ATH10K_EBOARD_DATA_FILE); ar->normal_mode_fw.ext_board = fw; if (IS_ERR(ar->normal_mode_fw.ext_board)) return PTR_ERR(ar->normal_mode_fw.ext_board); ar->normal_mode_fw.ext_board_data = ar->normal_mode_fw.ext_board->data; ar->normal_mode_fw.ext_board_len = ar->normal_mode_fw.ext_board->size; } return 0; } static int ath10k_core_parse_bd_ie_board(struct ath10k *ar, const void *buf, size_t buf_len, const char *boardname, int bd_ie_type) { const struct ath10k_fw_ie *hdr; bool name_match_found; int ret, board_ie_id; size_t board_ie_len; const void *board_ie_data; name_match_found = false; /* go through ATH10K_BD_IE_BOARD_ elements */ while (buf_len > sizeof(struct ath10k_fw_ie)) { hdr = buf; board_ie_id = le32_to_cpu(hdr->id); board_ie_len = le32_to_cpu(hdr->len); board_ie_data = hdr->data; buf_len -= sizeof(*hdr); buf += sizeof(*hdr); if (buf_len < ALIGN(board_ie_len, 4)) { ath10k_err(ar, "invalid ATH10K_BD_IE_BOARD length: %zu < %zu\n", buf_len, ALIGN(board_ie_len, 4)); ret = -EINVAL; goto out; } switch (board_ie_id) { case ATH10K_BD_IE_BOARD_NAME: ath10k_dbg_dump(ar, ATH10K_DBG_BOOT, "board name", "", board_ie_data, board_ie_len); if (board_ie_len != strlen(boardname)) break; ret = memcmp(board_ie_data, boardname, strlen(boardname)); if (ret) break; name_match_found = true; ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot found match for name '%s'", boardname); break; case ATH10K_BD_IE_BOARD_DATA: if (!name_match_found) /* no match found */ break; if (bd_ie_type == ATH10K_BD_IE_BOARD) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot found board data for '%s'", boardname); ar->normal_mode_fw.board_data = board_ie_data; ar->normal_mode_fw.board_len = board_ie_len; } else if (bd_ie_type == ATH10K_BD_IE_BOARD_EXT) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot found eboard data for '%s'", boardname); ar->normal_mode_fw.ext_board_data = board_ie_data; ar->normal_mode_fw.ext_board_len = board_ie_len; } ret = 0; goto out; default: ath10k_warn(ar, "unknown ATH10K_BD_IE_BOARD found: %d\n", board_ie_id); break; } /* jump over the padding */ board_ie_len = ALIGN(board_ie_len, 4); buf_len -= board_ie_len; buf += board_ie_len; } /* no match found */ ret = -ENOENT; out: return ret; } static int ath10k_core_search_bd(struct ath10k *ar, const char *boardname, const u8 *data, size_t len) { size_t ie_len; struct ath10k_fw_ie *hdr; int ret = -ENOENT, ie_id; while (len > sizeof(struct ath10k_fw_ie)) { hdr = (struct ath10k_fw_ie *)data; ie_id = le32_to_cpu(hdr->id); ie_len = le32_to_cpu(hdr->len); len -= sizeof(*hdr); data = hdr->data; if (len < ALIGN(ie_len, 4)) { ath10k_err(ar, "invalid length for board ie_id %d ie_len %zu len %zu\n", ie_id, ie_len, len); return -EINVAL; } switch (ie_id) { case ATH10K_BD_IE_BOARD: ret = ath10k_core_parse_bd_ie_board(ar, data, ie_len, boardname, ATH10K_BD_IE_BOARD); if (ret == -ENOENT) /* no match found, continue */ break; /* either found or error, so stop searching */ goto out; case ATH10K_BD_IE_BOARD_EXT: ret = ath10k_core_parse_bd_ie_board(ar, data, ie_len, boardname, ATH10K_BD_IE_BOARD_EXT); if (ret == -ENOENT) /* no match found, continue */ break; /* either found or error, so stop searching */ goto out; } /* jump over the padding */ ie_len = ALIGN(ie_len, 4); len -= ie_len; data += ie_len; } out: /* return result of parse_bd_ie_board() or -ENOENT */ return ret; } static int ath10k_core_fetch_board_data_api_n(struct ath10k *ar, const char *boardname, const char *fallback_boardname1, const char *fallback_boardname2, const char *filename) { size_t len, magic_len; const u8 *data; int ret; /* Skip if already fetched during board data download */ if (!ar->normal_mode_fw.board) ar->normal_mode_fw.board = ath10k_fetch_fw_file(ar, ar->hw_params.fw.dir, filename); if (IS_ERR(ar->normal_mode_fw.board)) return PTR_ERR(ar->normal_mode_fw.board); data = ar->normal_mode_fw.board->data; len = ar->normal_mode_fw.board->size; /* magic has extra null byte padded */ magic_len = strlen(ATH10K_BOARD_MAGIC) + 1; if (len < magic_len) { ath10k_err(ar, "failed to find magic value in %s/%s, file too short: %zu\n", ar->hw_params.fw.dir, filename, len); ret = -EINVAL; goto err; } if (memcmp(data, ATH10K_BOARD_MAGIC, magic_len)) { ath10k_err(ar, "found invalid board magic\n"); ret = -EINVAL; goto err; } /* magic is padded to 4 bytes */ magic_len = ALIGN(magic_len, 4); if (len < magic_len) { ath10k_err(ar, "failed: %s/%s too small to contain board data, len: %zu\n", ar->hw_params.fw.dir, filename, len); ret = -EINVAL; goto err; } data += magic_len; len -= magic_len; /* attempt to find boardname in the IE list */ ret = ath10k_core_search_bd(ar, boardname, data, len); /* if we didn't find it and have a fallback name, try that */ if (ret == -ENOENT && fallback_boardname1) ret = ath10k_core_search_bd(ar, fallback_boardname1, data, len); if (ret == -ENOENT && fallback_boardname2) ret = ath10k_core_search_bd(ar, fallback_boardname2, data, len); if (ret == -ENOENT) { ath10k_err(ar, "failed to fetch board data for %s from %s/%s\n", boardname, ar->hw_params.fw.dir, filename); ret = -ENODATA; } if (ret) goto err; return 0; err: ath10k_core_free_board_files(ar); return ret; } static int ath10k_core_create_board_name(struct ath10k *ar, char *name, size_t name_len, bool with_variant, bool with_chip_id) { /* strlen(',variant=') + strlen(ar->id.bdf_ext) */ char variant[9 + ATH10K_SMBIOS_BDF_EXT_STR_LENGTH] = { 0 }; if (with_variant && ar->id.bdf_ext[0] != '\0') scnprintf(variant, sizeof(variant), ",variant=%s", ar->id.bdf_ext); if (ar->id.bmi_ids_valid) { scnprintf(name, name_len, "bus=%s,bmi-chip-id=%d,bmi-board-id=%d%s", ath10k_bus_str(ar->hif.bus), ar->id.bmi_chip_id, ar->id.bmi_board_id, variant); goto out; } if (ar->id.qmi_ids_valid) { if (with_chip_id) scnprintf(name, name_len, "bus=%s,qmi-board-id=%x,qmi-chip-id=%x%s", ath10k_bus_str(ar->hif.bus), ar->id.qmi_board_id, ar->id.qmi_chip_id, variant); else scnprintf(name, name_len, "bus=%s,qmi-board-id=%x", ath10k_bus_str(ar->hif.bus), ar->id.qmi_board_id); goto out; } scnprintf(name, name_len, "bus=%s,vendor=%04x,device=%04x,subsystem-vendor=%04x,subsystem-device=%04x%s", ath10k_bus_str(ar->hif.bus), ar->id.vendor, ar->id.device, ar->id.subsystem_vendor, ar->id.subsystem_device, variant); out: ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot using board name '%s'\n", name); return 0; } static int ath10k_core_create_eboard_name(struct ath10k *ar, char *name, size_t name_len) { if (ar->id.bmi_ids_valid) { scnprintf(name, name_len, "bus=%s,bmi-chip-id=%d,bmi-eboard-id=%d", ath10k_bus_str(ar->hif.bus), ar->id.bmi_chip_id, ar->id.bmi_eboard_id); ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot using eboard name '%s'\n", name); return 0; } /* Fallback if returned board id is zero */ return -1; } int ath10k_core_fetch_board_file(struct ath10k *ar, int bd_ie_type) { char boardname[100], fallback_boardname1[100], fallback_boardname2[100]; int ret; if (bd_ie_type == ATH10K_BD_IE_BOARD) { /* With variant and chip id */ ret = ath10k_core_create_board_name(ar, boardname, sizeof(boardname), true, true); if (ret) { ath10k_err(ar, "failed to create board name: %d", ret); return ret; } /* Without variant and only chip-id */ ret = ath10k_core_create_board_name(ar, fallback_boardname1, sizeof(boardname), false, true); if (ret) { ath10k_err(ar, "failed to create 1st fallback board name: %d", ret); return ret; } /* Without variant and without chip-id */ ret = ath10k_core_create_board_name(ar, fallback_boardname2, sizeof(boardname), false, false); if (ret) { ath10k_err(ar, "failed to create 2nd fallback board name: %d", ret); return ret; } } else if (bd_ie_type == ATH10K_BD_IE_BOARD_EXT) { ret = ath10k_core_create_eboard_name(ar, boardname, sizeof(boardname)); if (ret) { ath10k_err(ar, "fallback to eboard.bin since board id 0"); goto fallback; } } ar->bd_api = 2; ret = ath10k_core_fetch_board_data_api_n(ar, boardname, fallback_boardname1, fallback_boardname2, ATH10K_BOARD_API2_FILE); if (!ret) goto success; fallback: ar->bd_api = 1; ret = ath10k_core_fetch_board_data_api_1(ar, bd_ie_type); if (ret) { ath10k_err(ar, "failed to fetch board-2.bin or board.bin from %s\n", ar->hw_params.fw.dir); return ret; } success: ath10k_dbg(ar, ATH10K_DBG_BOOT, "using board api %d\n", ar->bd_api); return 0; } EXPORT_SYMBOL(ath10k_core_fetch_board_file); static int ath10k_core_get_ext_board_id_from_otp(struct ath10k *ar) { u32 result, address; u8 ext_board_id; int ret; address = ar->hw_params.patch_load_addr; if (!ar->normal_mode_fw.fw_file.otp_data || !ar->normal_mode_fw.fw_file.otp_len) { ath10k_warn(ar, "failed to retrieve extended board id due to otp binary missing\n"); return -ENODATA; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot upload otp to 0x%x len %zd for ext board id\n", address, ar->normal_mode_fw.fw_file.otp_len); ret = ath10k_bmi_fast_download(ar, address, ar->normal_mode_fw.fw_file.otp_data, ar->normal_mode_fw.fw_file.otp_len); if (ret) { ath10k_err(ar, "could not write otp for ext board id check: %d\n", ret); return ret; } ret = ath10k_bmi_execute(ar, address, BMI_PARAM_GET_EXT_BOARD_ID, &result); if (ret) { ath10k_err(ar, "could not execute otp for ext board id check: %d\n", ret); return ret; } if (!result) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "ext board id does not exist in otp, ignore it\n"); return -EOPNOTSUPP; } ext_board_id = result & ATH10K_BMI_EBOARD_ID_STATUS_MASK; ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot get otp ext board id result 0x%08x ext_board_id %d\n", result, ext_board_id); ar->id.bmi_eboard_id = ext_board_id; return 0; } static int ath10k_download_board_data(struct ath10k *ar, const void *data, size_t data_len) { u32 board_data_size = ar->hw_params.fw.board_size; u32 eboard_data_size = ar->hw_params.fw.ext_board_size; u32 board_address; u32 ext_board_address; int ret; ret = ath10k_push_board_ext_data(ar, data, data_len); if (ret) { ath10k_err(ar, "could not push board ext data (%d)\n", ret); goto exit; } ret = ath10k_bmi_read32(ar, hi_board_data, &board_address); if (ret) { ath10k_err(ar, "could not read board data addr (%d)\n", ret); goto exit; } ret = ath10k_bmi_write_memory(ar, board_address, data, min_t(u32, board_data_size, data_len)); if (ret) { ath10k_err(ar, "could not write board data (%d)\n", ret); goto exit; } ret = ath10k_bmi_write32(ar, hi_board_data_initialized, 1); if (ret) { ath10k_err(ar, "could not write board data bit (%d)\n", ret); goto exit; } if (!ar->id.ext_bid_supported) goto exit; /* Extended board data download */ ret = ath10k_core_get_ext_board_id_from_otp(ar); if (ret == -EOPNOTSUPP) { /* Not fetching ext_board_data if ext board id is 0 */ ath10k_dbg(ar, ATH10K_DBG_BOOT, "otp returned ext board id 0\n"); return 0; } else if (ret) { ath10k_err(ar, "failed to get extended board id: %d\n", ret); goto exit; } ret = ath10k_core_fetch_board_file(ar, ATH10K_BD_IE_BOARD_EXT); if (ret) goto exit; if (ar->normal_mode_fw.ext_board_data) { ext_board_address = board_address + EXT_BOARD_ADDRESS_OFFSET; ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot writing ext board data to addr 0x%x", ext_board_address); ret = ath10k_bmi_write_memory(ar, ext_board_address, ar->normal_mode_fw.ext_board_data, min_t(u32, eboard_data_size, data_len)); if (ret) ath10k_err(ar, "failed to write ext board data: %d\n", ret); } exit: return ret; } static int ath10k_download_and_run_otp(struct ath10k *ar) { u32 result, address = ar->hw_params.patch_load_addr; u32 bmi_otp_exe_param = ar->hw_params.otp_exe_param; int ret; ret = ath10k_download_board_data(ar, ar->running_fw->board_data, ar->running_fw->board_len); if (ret) { ath10k_err(ar, "failed to download board data: %d\n", ret); return ret; } /* OTP is optional */ if (!ar->running_fw->fw_file.otp_data || !ar->running_fw->fw_file.otp_len) { ath10k_warn(ar, "Not running otp, calibration will be incorrect (otp-data %pK otp_len %zd)!\n", ar->running_fw->fw_file.otp_data, ar->running_fw->fw_file.otp_len); return 0; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot upload otp to 0x%x len %zd\n", address, ar->running_fw->fw_file.otp_len); ret = ath10k_bmi_fast_download(ar, address, ar->running_fw->fw_file.otp_data, ar->running_fw->fw_file.otp_len); if (ret) { ath10k_err(ar, "could not write otp (%d)\n", ret); return ret; } /* As of now pre-cal is valid for 10_4 variants */ if (ar->cal_mode == ATH10K_PRE_CAL_MODE_DT || ar->cal_mode == ATH10K_PRE_CAL_MODE_FILE || ar->cal_mode == ATH10K_PRE_CAL_MODE_NVMEM) bmi_otp_exe_param = BMI_PARAM_FLASH_SECTION_ALL; ret = ath10k_bmi_execute(ar, address, bmi_otp_exe_param, &result); if (ret) { ath10k_err(ar, "could not execute otp (%d)\n", ret); return ret; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot otp execute result %d\n", result); if (!(skip_otp || test_bit(ATH10K_FW_FEATURE_IGNORE_OTP_RESULT, ar->running_fw->fw_file.fw_features)) && result != 0) { ath10k_err(ar, "otp calibration failed: %d", result); return -EINVAL; } return 0; } static int ath10k_download_cal_file(struct ath10k *ar, const struct firmware *file) { int ret; if (!file) return -ENOENT; if (IS_ERR(file)) return PTR_ERR(file); ret = ath10k_download_board_data(ar, file->data, file->size); if (ret) { ath10k_err(ar, "failed to download cal_file data: %d\n", ret); return ret; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot cal file downloaded\n"); return 0; } static int ath10k_download_cal_dt(struct ath10k *ar, const char *dt_name) { struct device_node *node; int data_len; void *data; int ret; node = ar->dev->of_node; if (!node) /* Device Tree is optional, don't print any warnings if * there's no node for ath10k. */ return -ENOENT; if (!of_get_property(node, dt_name, &data_len)) { /* The calibration data node is optional */ return -ENOENT; } if (data_len != ar->hw_params.cal_data_len) { ath10k_warn(ar, "invalid calibration data length in DT: %d\n", data_len); ret = -EMSGSIZE; goto out; } data = kmalloc(data_len, GFP_KERNEL); if (!data) { ret = -ENOMEM; goto out; } ret = of_property_read_u8_array(node, dt_name, data, data_len); if (ret) { ath10k_warn(ar, "failed to read calibration data from DT: %d\n", ret); goto out_free; } ret = ath10k_download_board_data(ar, data, data_len); if (ret) { ath10k_warn(ar, "failed to download calibration data from Device Tree: %d\n", ret); goto out_free; } ret = 0; out_free: kfree(data); out: return ret; } static int ath10k_download_cal_eeprom(struct ath10k *ar) { size_t data_len; void *data = NULL; int ret; ret = ath10k_hif_fetch_cal_eeprom(ar, &data, &data_len); if (ret) { if (ret != -EOPNOTSUPP) ath10k_warn(ar, "failed to read calibration data from EEPROM: %d\n", ret); goto out_free; } ret = ath10k_download_board_data(ar, data, data_len); if (ret) { ath10k_warn(ar, "failed to download calibration data from EEPROM: %d\n", ret); goto out_free; } ret = 0; out_free: kfree(data); return ret; } static int ath10k_download_cal_nvmem(struct ath10k *ar, const char *cell_name) { struct nvmem_cell *cell; void *buf; size_t len; int ret; cell = devm_nvmem_cell_get(ar->dev, cell_name); if (IS_ERR(cell)) { ret = PTR_ERR(cell); return ret; } buf = nvmem_cell_read(cell, &len); if (IS_ERR(buf)) return PTR_ERR(buf); if (ar->hw_params.cal_data_len != len) { kfree(buf); ath10k_warn(ar, "invalid calibration data length in nvmem-cell '%s': %zu != %u\n", cell_name, len, ar->hw_params.cal_data_len); return -EMSGSIZE; } ret = ath10k_download_board_data(ar, buf, len); kfree(buf); if (ret) ath10k_warn(ar, "failed to download calibration data from nvmem-cell '%s': %d\n", cell_name, ret); return ret; } int ath10k_core_fetch_firmware_api_n(struct ath10k *ar, const char *name, struct ath10k_fw_file *fw_file) { size_t magic_len, len, ie_len; int ie_id, i, index, bit, ret; struct ath10k_fw_ie *hdr; const u8 *data; __le32 *timestamp, *version; /* first fetch the firmware file (firmware-*.bin) */ fw_file->firmware = ath10k_fetch_fw_file(ar, ar->hw_params.fw.dir, name); if (IS_ERR(fw_file->firmware)) return PTR_ERR(fw_file->firmware); data = fw_file->firmware->data; len = fw_file->firmware->size; /* magic also includes the null byte, check that as well */ magic_len = strlen(ATH10K_FIRMWARE_MAGIC) + 1; if (len < magic_len) { ath10k_err(ar, "firmware file '%s/%s' too small to contain magic: %zu\n", ar->hw_params.fw.dir, name, len); ret = -EINVAL; goto err; } if (memcmp(data, ATH10K_FIRMWARE_MAGIC, magic_len) != 0) { ath10k_err(ar, "invalid firmware magic\n"); ret = -EINVAL; goto err; } /* jump over the padding */ magic_len = ALIGN(magic_len, 4); len -= magic_len; data += magic_len; /* loop elements */ while (len > sizeof(struct ath10k_fw_ie)) { hdr = (struct ath10k_fw_ie *)data; ie_id = le32_to_cpu(hdr->id); ie_len = le32_to_cpu(hdr->len); len -= sizeof(*hdr); data += sizeof(*hdr); if (len < ie_len) { ath10k_err(ar, "invalid length for FW IE %d (%zu < %zu)\n", ie_id, len, ie_len); ret = -EINVAL; goto err; } switch (ie_id) { case ATH10K_FW_IE_FW_VERSION: if (ie_len > sizeof(fw_file->fw_version) - 1) break; memcpy(fw_file->fw_version, data, ie_len); fw_file->fw_version[ie_len] = '\0'; ath10k_dbg(ar, ATH10K_DBG_BOOT, "found fw version %s\n", fw_file->fw_version); break; case ATH10K_FW_IE_TIMESTAMP: if (ie_len != sizeof(u32)) break; timestamp = (__le32 *)data; ath10k_dbg(ar, ATH10K_DBG_BOOT, "found fw timestamp %d\n", le32_to_cpup(timestamp)); break; case ATH10K_FW_IE_FEATURES: ath10k_dbg(ar, ATH10K_DBG_BOOT, "found firmware features ie (%zd B)\n", ie_len); for (i = 0; i < ATH10K_FW_FEATURE_COUNT; i++) { index = i / 8; bit = i % 8; if (index == ie_len) break; if (data[index] & (1 << bit)) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "Enabling feature bit: %i\n", i); __set_bit(i, fw_file->fw_features); } } ath10k_dbg_dump(ar, ATH10K_DBG_BOOT, "features", "", fw_file->fw_features, sizeof(fw_file->fw_features)); break; case ATH10K_FW_IE_FW_IMAGE: ath10k_dbg(ar, ATH10K_DBG_BOOT, "found fw image ie (%zd B)\n", ie_len); fw_file->firmware_data = data; fw_file->firmware_len = ie_len; break; case ATH10K_FW_IE_OTP_IMAGE: ath10k_dbg(ar, ATH10K_DBG_BOOT, "found otp image ie (%zd B)\n", ie_len); fw_file->otp_data = data; fw_file->otp_len = ie_len; break; case ATH10K_FW_IE_WMI_OP_VERSION: if (ie_len != sizeof(u32)) break; version = (__le32 *)data; fw_file->wmi_op_version = le32_to_cpup(version); ath10k_dbg(ar, ATH10K_DBG_BOOT, "found fw ie wmi op version %d\n", fw_file->wmi_op_version); break; case ATH10K_FW_IE_HTT_OP_VERSION: if (ie_len != sizeof(u32)) break; version = (__le32 *)data; fw_file->htt_op_version = le32_to_cpup(version); ath10k_dbg(ar, ATH10K_DBG_BOOT, "found fw ie htt op version %d\n", fw_file->htt_op_version); break; case ATH10K_FW_IE_FW_CODE_SWAP_IMAGE: ath10k_dbg(ar, ATH10K_DBG_BOOT, "found fw code swap image ie (%zd B)\n", ie_len); fw_file->codeswap_data = data; fw_file->codeswap_len = ie_len; break; default: ath10k_warn(ar, "Unknown FW IE: %u\n", le32_to_cpu(hdr->id)); break; } /* jump over the padding */ ie_len = ALIGN(ie_len, 4); len -= ie_len; data += ie_len; } if (!test_bit(ATH10K_FW_FEATURE_NON_BMI, fw_file->fw_features) && (!fw_file->firmware_data || !fw_file->firmware_len)) { ath10k_warn(ar, "No ATH10K_FW_IE_FW_IMAGE found from '%s/%s', skipping\n", ar->hw_params.fw.dir, name); ret = -ENOMEDIUM; goto err; } return 0; err: ath10k_core_free_firmware_files(ar); return ret; } static void ath10k_core_get_fw_name(struct ath10k *ar, char *fw_name, size_t fw_name_len, int fw_api) { switch (ar->hif.bus) { case ATH10K_BUS_SDIO: case ATH10K_BUS_USB: scnprintf(fw_name, fw_name_len, "%s-%s-%d.bin", ATH10K_FW_FILE_BASE, ath10k_bus_str(ar->hif.bus), fw_api); break; case ATH10K_BUS_PCI: case ATH10K_BUS_AHB: case ATH10K_BUS_SNOC: scnprintf(fw_name, fw_name_len, "%s-%d.bin", ATH10K_FW_FILE_BASE, fw_api); break; } } static int ath10k_core_fetch_firmware_files(struct ath10k *ar) { int ret, i; char fw_name[100]; /* calibration file is optional, don't check for any errors */ ath10k_fetch_cal_file(ar); for (i = ATH10K_FW_API_MAX; i >= ATH10K_FW_API_MIN; i--) { ar->fw_api = i; ath10k_dbg(ar, ATH10K_DBG_BOOT, "trying fw api %d\n", ar->fw_api); ath10k_core_get_fw_name(ar, fw_name, sizeof(fw_name), ar->fw_api); ret = ath10k_core_fetch_firmware_api_n(ar, fw_name, &ar->normal_mode_fw.fw_file); if (!ret) goto success; } /* we end up here if we couldn't fetch any firmware */ ath10k_err(ar, "Failed to find firmware-N.bin (N between %d and %d) from %s: %d", ATH10K_FW_API_MIN, ATH10K_FW_API_MAX, ar->hw_params.fw.dir, ret); return ret; success: ath10k_dbg(ar, ATH10K_DBG_BOOT, "using fw api %d\n", ar->fw_api); return 0; } static int ath10k_core_pre_cal_download(struct ath10k *ar) { int ret; ret = ath10k_download_cal_nvmem(ar, "pre-calibration"); if (ret == 0) { ar->cal_mode = ATH10K_PRE_CAL_MODE_NVMEM; goto success; } else if (ret == -EPROBE_DEFER) { return ret; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot did not find a pre-calibration nvmem-cell, try file next: %d\n", ret); ret = ath10k_download_cal_file(ar, ar->pre_cal_file); if (ret == 0) { ar->cal_mode = ATH10K_PRE_CAL_MODE_FILE; goto success; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot did not find a pre calibration file, try DT next: %d\n", ret); ret = ath10k_download_cal_dt(ar, "qcom,ath10k-pre-calibration-data"); if (ret) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "unable to load pre cal data from DT: %d\n", ret); return ret; } ar->cal_mode = ATH10K_PRE_CAL_MODE_DT; success: ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot using calibration mode %s\n", ath10k_cal_mode_str(ar->cal_mode)); return 0; } static int ath10k_core_pre_cal_config(struct ath10k *ar) { int ret; ret = ath10k_core_pre_cal_download(ar); if (ret) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "failed to load pre cal data: %d\n", ret); return ret; } ret = ath10k_core_get_board_id_from_otp(ar); if (ret) { ath10k_err(ar, "failed to get board id: %d\n", ret); return ret; } ret = ath10k_download_and_run_otp(ar); if (ret) { ath10k_err(ar, "failed to run otp: %d\n", ret); return ret; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "pre cal configuration done successfully\n"); return 0; } static int ath10k_download_cal_data(struct ath10k *ar) { int ret; ret = ath10k_core_pre_cal_config(ar); if (ret == 0) return 0; ath10k_dbg(ar, ATH10K_DBG_BOOT, "pre cal download procedure failed, try cal file: %d\n", ret); ret = ath10k_download_cal_nvmem(ar, "calibration"); if (ret == 0) { ar->cal_mode = ATH10K_CAL_MODE_NVMEM; goto done; } else if (ret == -EPROBE_DEFER) { return ret; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot did not find a calibration nvmem-cell, try file next: %d\n", ret); ret = ath10k_download_cal_file(ar, ar->cal_file); if (ret == 0) { ar->cal_mode = ATH10K_CAL_MODE_FILE; goto done; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot did not find a calibration file, try DT next: %d\n", ret); ret = ath10k_download_cal_dt(ar, "qcom,ath10k-calibration-data"); if (ret == 0) { ar->cal_mode = ATH10K_CAL_MODE_DT; goto done; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot did not find DT entry, try target EEPROM next: %d\n", ret); ret = ath10k_download_cal_eeprom(ar); if (ret == 0) { ar->cal_mode = ATH10K_CAL_MODE_EEPROM; goto done; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot did not find target EEPROM entry, try OTP next: %d\n", ret); ret = ath10k_download_and_run_otp(ar); if (ret) { ath10k_err(ar, "failed to run otp: %d\n", ret); return ret; } ar->cal_mode = ATH10K_CAL_MODE_OTP; done: ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot using calibration mode %s\n", ath10k_cal_mode_str(ar->cal_mode)); return 0; } static void ath10k_core_fetch_btcoex_dt(struct ath10k *ar) { struct device_node *node; u8 coex_support = 0; int ret; node = ar->dev->of_node; if (!node) goto out; ret = of_property_read_u8(node, "qcom,coexist-support", &coex_support); if (ret) { ar->coex_support = true; goto out; } if (coex_support) { ar->coex_support = true; } else { ar->coex_support = false; ar->coex_gpio_pin = -1; goto out; } ret = of_property_read_u32(node, "qcom,coexist-gpio-pin", &ar->coex_gpio_pin); if (ret) ar->coex_gpio_pin = -1; out: ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot coex_support %d coex_gpio_pin %d\n", ar->coex_support, ar->coex_gpio_pin); } static int ath10k_init_uart(struct ath10k *ar) { int ret; /* * Explicitly setting UART prints to zero as target turns it on * based on scratch registers. */ ret = ath10k_bmi_write32(ar, hi_serial_enable, 0); if (ret) { ath10k_warn(ar, "could not disable UART prints (%d)\n", ret); return ret; } if (!uart_print) { if (ar->hw_params.uart_pin_workaround) { ret = ath10k_bmi_write32(ar, hi_dbg_uart_txpin, ar->hw_params.uart_pin); if (ret) { ath10k_warn(ar, "failed to set UART TX pin: %d", ret); return ret; } } return 0; } ret = ath10k_bmi_write32(ar, hi_dbg_uart_txpin, ar->hw_params.uart_pin); if (ret) { ath10k_warn(ar, "could not enable UART prints (%d)\n", ret); return ret; } ret = ath10k_bmi_write32(ar, hi_serial_enable, 1); if (ret) { ath10k_warn(ar, "could not enable UART prints (%d)\n", ret); return ret; } /* Set the UART baud rate to 19200. */ ret = ath10k_bmi_write32(ar, hi_desired_baud_rate, 19200); if (ret) { ath10k_warn(ar, "could not set the baud rate (%d)\n", ret); return ret; } ath10k_info(ar, "UART prints enabled\n"); return 0; } static int ath10k_init_hw_params(struct ath10k *ar) { const struct ath10k_hw_params *hw_params; int i; for (i = 0; i < ARRAY_SIZE(ath10k_hw_params_list); i++) { hw_params = &ath10k_hw_params_list[i]; if (hw_params->bus == ar->hif.bus && hw_params->id == ar->target_version && hw_params->dev_id == ar->dev_id) break; } if (i == ARRAY_SIZE(ath10k_hw_params_list)) { ath10k_err(ar, "Unsupported hardware version: 0x%x\n", ar->target_version); return -EINVAL; } ar->hw_params = *hw_params; ath10k_dbg(ar, ATH10K_DBG_BOOT, "Hardware name %s version 0x%x\n", ar->hw_params.name, ar->target_version); return 0; } void ath10k_core_start_recovery(struct ath10k *ar) { if (test_and_set_bit(ATH10K_FLAG_RESTARTING, &ar->dev_flags)) { ath10k_warn(ar, "already restarting\n"); return; } queue_work(ar->workqueue, &ar->restart_work); } EXPORT_SYMBOL(ath10k_core_start_recovery); void ath10k_core_napi_enable(struct ath10k *ar) { lockdep_assert_held(&ar->conf_mutex); if (test_bit(ATH10K_FLAG_NAPI_ENABLED, &ar->dev_flags)) return; napi_enable(&ar->napi); set_bit(ATH10K_FLAG_NAPI_ENABLED, &ar->dev_flags); } EXPORT_SYMBOL(ath10k_core_napi_enable); void ath10k_core_napi_sync_disable(struct ath10k *ar) { lockdep_assert_held(&ar->conf_mutex); if (!test_bit(ATH10K_FLAG_NAPI_ENABLED, &ar->dev_flags)) return; napi_synchronize(&ar->napi); napi_disable(&ar->napi); clear_bit(ATH10K_FLAG_NAPI_ENABLED, &ar->dev_flags); } EXPORT_SYMBOL(ath10k_core_napi_sync_disable); static void ath10k_core_restart(struct work_struct *work) { struct ath10k *ar = container_of(work, struct ath10k, restart_work); int ret; set_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags); /* Place a barrier to make sure the compiler doesn't reorder * CRASH_FLUSH and calling other functions. */ barrier(); ieee80211_stop_queues(ar->hw); ath10k_drain_tx(ar); complete(&ar->scan.started); complete(&ar->scan.completed); complete(&ar->scan.on_channel); complete(&ar->offchan_tx_completed); complete(&ar->install_key_done); complete(&ar->vdev_setup_done); complete(&ar->vdev_delete_done); complete(&ar->thermal.wmi_sync); complete(&ar->bss_survey_done); wake_up(&ar->htt.empty_tx_wq); wake_up(&ar->wmi.tx_credits_wq); wake_up(&ar->peer_mapping_wq); /* TODO: We can have one instance of cancelling coverage_class_work by * moving it to ath10k_halt(), so that both stop() and restart() would * call that but it takes conf_mutex() and if we call cancel_work_sync() * with conf_mutex it will deadlock. */ cancel_work_sync(&ar->set_coverage_class_work); mutex_lock(&ar->conf_mutex); switch (ar->state) { case ATH10K_STATE_ON: ar->state = ATH10K_STATE_RESTARTING; ath10k_halt(ar); ath10k_scan_finish(ar); ieee80211_restart_hw(ar->hw); break; case ATH10K_STATE_OFF: /* this can happen if driver is being unloaded * or if the crash happens during FW probing */ ath10k_warn(ar, "cannot restart a device that hasn't been started\n"); break; case ATH10K_STATE_RESTARTING: /* hw restart might be requested from multiple places */ break; case ATH10K_STATE_RESTARTED: ar->state = ATH10K_STATE_WEDGED; fallthrough; case ATH10K_STATE_WEDGED: ath10k_warn(ar, "device is wedged, will not restart\n"); break; case ATH10K_STATE_UTF: ath10k_warn(ar, "firmware restart in UTF mode not supported\n"); break; } mutex_unlock(&ar->conf_mutex); ret = ath10k_coredump_submit(ar); if (ret) ath10k_warn(ar, "failed to send firmware crash dump via devcoredump: %d", ret); complete(&ar->driver_recovery); } static void ath10k_core_set_coverage_class_work(struct work_struct *work) { struct ath10k *ar = container_of(work, struct ath10k, set_coverage_class_work); if (ar->hw_params.hw_ops->set_coverage_class) ar->hw_params.hw_ops->set_coverage_class(ar, -1); } static int ath10k_core_init_firmware_features(struct ath10k *ar) { struct ath10k_fw_file *fw_file = &ar->normal_mode_fw.fw_file; int max_num_peers; if (test_bit(ATH10K_FW_FEATURE_WMI_10_2, fw_file->fw_features) && !test_bit(ATH10K_FW_FEATURE_WMI_10X, fw_file->fw_features)) { ath10k_err(ar, "feature bits corrupted: 10.2 feature requires 10.x feature to be set as well"); return -EINVAL; } if (fw_file->wmi_op_version >= ATH10K_FW_WMI_OP_VERSION_MAX) { ath10k_err(ar, "unsupported WMI OP version (max %d): %d\n", ATH10K_FW_WMI_OP_VERSION_MAX, fw_file->wmi_op_version); return -EINVAL; } ar->wmi.rx_decap_mode = ATH10K_HW_TXRX_NATIVE_WIFI; switch (ath10k_cryptmode_param) { case ATH10K_CRYPT_MODE_HW: clear_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags); clear_bit(ATH10K_FLAG_HW_CRYPTO_DISABLED, &ar->dev_flags); break; case ATH10K_CRYPT_MODE_SW: if (!test_bit(ATH10K_FW_FEATURE_RAW_MODE_SUPPORT, fw_file->fw_features)) { ath10k_err(ar, "cryptmode > 0 requires raw mode support from firmware"); return -EINVAL; } set_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags); set_bit(ATH10K_FLAG_HW_CRYPTO_DISABLED, &ar->dev_flags); break; default: ath10k_info(ar, "invalid cryptmode: %d\n", ath10k_cryptmode_param); return -EINVAL; } ar->htt.max_num_amsdu = ATH10K_HTT_MAX_NUM_AMSDU_DEFAULT; ar->htt.max_num_ampdu = ATH10K_HTT_MAX_NUM_AMPDU_DEFAULT; if (ath10k_frame_mode == ATH10K_HW_TXRX_RAW) { if (!test_bit(ATH10K_FW_FEATURE_RAW_MODE_SUPPORT, fw_file->fw_features)) { ath10k_err(ar, "rawmode = 1 requires support from firmware"); return -EINVAL; } set_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags); } if (test_bit(ATH10K_FLAG_RAW_MODE, &ar->dev_flags)) { ar->wmi.rx_decap_mode = ATH10K_HW_TXRX_RAW; /* Workaround: * * Firmware A-MSDU aggregation breaks with RAW Tx encap mode * and causes enormous performance issues (malformed frames, * etc). * * Disabling A-MSDU makes RAW mode stable with heavy traffic * albeit a bit slower compared to regular operation. */ ar->htt.max_num_amsdu = 1; } /* Backwards compatibility for firmwares without * ATH10K_FW_IE_WMI_OP_VERSION. */ if (fw_file->wmi_op_version == ATH10K_FW_WMI_OP_VERSION_UNSET) { if (test_bit(ATH10K_FW_FEATURE_WMI_10X, fw_file->fw_features)) { if (test_bit(ATH10K_FW_FEATURE_WMI_10_2, fw_file->fw_features)) fw_file->wmi_op_version = ATH10K_FW_WMI_OP_VERSION_10_2; else fw_file->wmi_op_version = ATH10K_FW_WMI_OP_VERSION_10_1; } else { fw_file->wmi_op_version = ATH10K_FW_WMI_OP_VERSION_MAIN; } } switch (fw_file->wmi_op_version) { case ATH10K_FW_WMI_OP_VERSION_MAIN: max_num_peers = TARGET_NUM_PEERS; ar->max_num_stations = TARGET_NUM_STATIONS; ar->max_num_vdevs = TARGET_NUM_VDEVS; ar->htt.max_num_pending_tx = TARGET_NUM_MSDU_DESC; ar->fw_stats_req_mask = WMI_STAT_PDEV | WMI_STAT_VDEV | WMI_STAT_PEER; ar->max_spatial_stream = WMI_MAX_SPATIAL_STREAM; break; case ATH10K_FW_WMI_OP_VERSION_10_1: case ATH10K_FW_WMI_OP_VERSION_10_2: case ATH10K_FW_WMI_OP_VERSION_10_2_4: if (ath10k_peer_stats_enabled(ar)) { max_num_peers = TARGET_10X_TX_STATS_NUM_PEERS; ar->max_num_stations = TARGET_10X_TX_STATS_NUM_STATIONS; } else { max_num_peers = TARGET_10X_NUM_PEERS; ar->max_num_stations = TARGET_10X_NUM_STATIONS; } ar->max_num_vdevs = TARGET_10X_NUM_VDEVS; ar->htt.max_num_pending_tx = TARGET_10X_NUM_MSDU_DESC; ar->fw_stats_req_mask = WMI_STAT_PEER; ar->max_spatial_stream = WMI_MAX_SPATIAL_STREAM; break; case ATH10K_FW_WMI_OP_VERSION_TLV: max_num_peers = TARGET_TLV_NUM_PEERS; ar->max_num_stations = TARGET_TLV_NUM_STATIONS; ar->max_num_vdevs = TARGET_TLV_NUM_VDEVS; ar->max_num_tdls_vdevs = TARGET_TLV_NUM_TDLS_VDEVS; if (ar->hif.bus == ATH10K_BUS_SDIO) ar->htt.max_num_pending_tx = TARGET_TLV_NUM_MSDU_DESC_HL; else ar->htt.max_num_pending_tx = TARGET_TLV_NUM_MSDU_DESC; ar->wow.max_num_patterns = TARGET_TLV_NUM_WOW_PATTERNS; ar->fw_stats_req_mask = WMI_TLV_STAT_PDEV | WMI_TLV_STAT_VDEV | WMI_TLV_STAT_PEER | WMI_TLV_STAT_PEER_EXTD; ar->max_spatial_stream = WMI_MAX_SPATIAL_STREAM; ar->wmi.mgmt_max_num_pending_tx = TARGET_TLV_MGMT_NUM_MSDU_DESC; break; case ATH10K_FW_WMI_OP_VERSION_10_4: max_num_peers = TARGET_10_4_NUM_PEERS; ar->max_num_stations = TARGET_10_4_NUM_STATIONS; ar->num_active_peers = TARGET_10_4_ACTIVE_PEERS; ar->max_num_vdevs = TARGET_10_4_NUM_VDEVS; ar->num_tids = TARGET_10_4_TGT_NUM_TIDS; ar->fw_stats_req_mask = WMI_10_4_STAT_PEER | WMI_10_4_STAT_PEER_EXTD | WMI_10_4_STAT_VDEV_EXTD; ar->max_spatial_stream = ar->hw_params.max_spatial_stream; ar->max_num_tdls_vdevs = TARGET_10_4_NUM_TDLS_VDEVS; if (test_bit(ATH10K_FW_FEATURE_PEER_FLOW_CONTROL, fw_file->fw_features)) ar->htt.max_num_pending_tx = TARGET_10_4_NUM_MSDU_DESC_PFC; else ar->htt.max_num_pending_tx = TARGET_10_4_NUM_MSDU_DESC; break; case ATH10K_FW_WMI_OP_VERSION_UNSET: case ATH10K_FW_WMI_OP_VERSION_MAX: default: WARN_ON(1); return -EINVAL; } if (ar->hw_params.num_peers) ar->max_num_peers = ar->hw_params.num_peers; else ar->max_num_peers = max_num_peers; /* Backwards compatibility for firmwares without * ATH10K_FW_IE_HTT_OP_VERSION. */ if (fw_file->htt_op_version == ATH10K_FW_HTT_OP_VERSION_UNSET) { switch (fw_file->wmi_op_version) { case ATH10K_FW_WMI_OP_VERSION_MAIN: fw_file->htt_op_version = ATH10K_FW_HTT_OP_VERSION_MAIN; break; case ATH10K_FW_WMI_OP_VERSION_10_1: case ATH10K_FW_WMI_OP_VERSION_10_2: case ATH10K_FW_WMI_OP_VERSION_10_2_4: fw_file->htt_op_version = ATH10K_FW_HTT_OP_VERSION_10_1; break; case ATH10K_FW_WMI_OP_VERSION_TLV: fw_file->htt_op_version = ATH10K_FW_HTT_OP_VERSION_TLV; break; case ATH10K_FW_WMI_OP_VERSION_10_4: case ATH10K_FW_WMI_OP_VERSION_UNSET: case ATH10K_FW_WMI_OP_VERSION_MAX: ath10k_err(ar, "htt op version not found from fw meta data"); return -EINVAL; } } return 0; } static int ath10k_core_reset_rx_filter(struct ath10k *ar) { int ret; int vdev_id; int vdev_type; int vdev_subtype; const u8 *vdev_addr; vdev_id = 0; vdev_type = WMI_VDEV_TYPE_STA; vdev_subtype = ath10k_wmi_get_vdev_subtype(ar, WMI_VDEV_SUBTYPE_NONE); vdev_addr = ar->mac_addr; ret = ath10k_wmi_vdev_create(ar, vdev_id, vdev_type, vdev_subtype, vdev_addr); if (ret) { ath10k_err(ar, "failed to create dummy vdev: %d\n", ret); return ret; } ret = ath10k_wmi_vdev_delete(ar, vdev_id); if (ret) { ath10k_err(ar, "failed to delete dummy vdev: %d\n", ret); return ret; } /* WMI and HTT may use separate HIF pipes and are not guaranteed to be * serialized properly implicitly. * * Moreover (most) WMI commands have no explicit acknowledges. It is * possible to infer it implicitly by poking firmware with echo * command - getting a reply means all preceding comments have been * (mostly) processed. * * In case of vdev create/delete this is sufficient. * * Without this it's possible to end up with a race when HTT Rx ring is * started before vdev create/delete hack is complete allowing a short * window of opportunity to receive (and Tx ACK) a bunch of frames. */ ret = ath10k_wmi_barrier(ar); if (ret) { ath10k_err(ar, "failed to ping firmware: %d\n", ret); return ret; } return 0; } static int ath10k_core_compat_services(struct ath10k *ar) { struct ath10k_fw_file *fw_file = &ar->normal_mode_fw.fw_file; /* all 10.x firmware versions support thermal throttling but don't * advertise the support via service flags so we have to hardcode * it here */ switch (fw_file->wmi_op_version) { case ATH10K_FW_WMI_OP_VERSION_10_1: case ATH10K_FW_WMI_OP_VERSION_10_2: case ATH10K_FW_WMI_OP_VERSION_10_2_4: case ATH10K_FW_WMI_OP_VERSION_10_4: set_bit(WMI_SERVICE_THERM_THROT, ar->wmi.svc_map); break; default: break; } return 0; } #define TGT_IRAM_READ_PER_ITR (8 * 1024) static int ath10k_core_copy_target_iram(struct ath10k *ar) { const struct ath10k_hw_mem_layout *hw_mem; const struct ath10k_mem_region *tmp, *mem_region = NULL; dma_addr_t paddr; void *vaddr = NULL; u8 num_read_itr; int i, ret; u32 len, remaining_len; /* copy target iram feature must work also when * ATH10K_FW_CRASH_DUMP_RAM_DATA is disabled, so * _ath10k_coredump_get_mem_layout() to accomplist that */ hw_mem = _ath10k_coredump_get_mem_layout(ar); if (!hw_mem) /* if CONFIG_DEV_COREDUMP is disabled we get NULL, then * just silently disable the feature by doing nothing */ return 0; for (i = 0; i < hw_mem->region_table.size; i++) { tmp = &hw_mem->region_table.regions[i]; if (tmp->type == ATH10K_MEM_REGION_TYPE_REG) { mem_region = tmp; break; } } if (!mem_region) return -ENOMEM; for (i = 0; i < ar->wmi.num_mem_chunks; i++) { if (ar->wmi.mem_chunks[i].req_id == WMI_IRAM_RECOVERY_HOST_MEM_REQ_ID) { vaddr = ar->wmi.mem_chunks[i].vaddr; len = ar->wmi.mem_chunks[i].len; break; } } if (!vaddr || !len) { ath10k_warn(ar, "No allocated memory for IRAM back up"); return -ENOMEM; } len = (len < mem_region->len) ? len : mem_region->len; paddr = mem_region->start; num_read_itr = len / TGT_IRAM_READ_PER_ITR; remaining_len = len % TGT_IRAM_READ_PER_ITR; for (i = 0; i < num_read_itr; i++) { ret = ath10k_hif_diag_read(ar, paddr, vaddr, TGT_IRAM_READ_PER_ITR); if (ret) { ath10k_warn(ar, "failed to copy firmware IRAM contents: %d", ret); return ret; } paddr += TGT_IRAM_READ_PER_ITR; vaddr += TGT_IRAM_READ_PER_ITR; } if (remaining_len) { ret = ath10k_hif_diag_read(ar, paddr, vaddr, remaining_len); if (ret) { ath10k_warn(ar, "failed to copy firmware IRAM contents: %d", ret); return ret; } } ath10k_dbg(ar, ATH10K_DBG_BOOT, "target IRAM back up completed\n"); return 0; } int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode, const struct ath10k_fw_components *fw) { int status; u32 val; lockdep_assert_held(&ar->conf_mutex); clear_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags); ar->running_fw = fw; if (!test_bit(ATH10K_FW_FEATURE_NON_BMI, ar->running_fw->fw_file.fw_features)) { ath10k_bmi_start(ar); /* Enable hardware clock to speed up firmware download */ if (ar->hw_params.hw_ops->enable_pll_clk) { status = ar->hw_params.hw_ops->enable_pll_clk(ar); ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot enable pll ret %d\n", status); } if (ath10k_init_configure_target(ar)) { status = -EINVAL; goto err; } status = ath10k_download_cal_data(ar); if (status) goto err; /* Some of qca988x solutions are having global reset issue * during target initialization. Bypassing PLL setting before * downloading firmware and letting the SoC run on REF_CLK is * fixing the problem. Corresponding firmware change is also * needed to set the clock source once the target is * initialized. */ if (test_bit(ATH10K_FW_FEATURE_SUPPORTS_SKIP_CLOCK_INIT, ar->running_fw->fw_file.fw_features)) { status = ath10k_bmi_write32(ar, hi_skip_clock_init, 1); if (status) { ath10k_err(ar, "could not write to skip_clock_init: %d\n", status); goto err; } } status = ath10k_download_fw(ar); if (status) goto err; status = ath10k_init_uart(ar); if (status) goto err; if (ar->hif.bus == ATH10K_BUS_SDIO) { status = ath10k_init_sdio(ar, mode); if (status) { ath10k_err(ar, "failed to init SDIO: %d\n", status); goto err; } } } ar->htc.htc_ops.target_send_suspend_complete = ath10k_send_suspend_complete; status = ath10k_htc_init(ar); if (status) { ath10k_err(ar, "could not init HTC (%d)\n", status); goto err; } if (!test_bit(ATH10K_FW_FEATURE_NON_BMI, ar->running_fw->fw_file.fw_features)) { status = ath10k_bmi_done(ar); if (status) goto err; } status = ath10k_wmi_attach(ar); if (status) { ath10k_err(ar, "WMI attach failed: %d\n", status); goto err; } status = ath10k_htt_init(ar); if (status) { ath10k_err(ar, "failed to init htt: %d\n", status); goto err_wmi_detach; } status = ath10k_htt_tx_start(&ar->htt); if (status) { ath10k_err(ar, "failed to alloc htt tx: %d\n", status); goto err_wmi_detach; } /* If firmware indicates Full Rx Reorder support it must be used in a * slightly different manner. Let HTT code know. */ ar->htt.rx_ring.in_ord_rx = !!(test_bit(WMI_SERVICE_RX_FULL_REORDER, ar->wmi.svc_map)); status = ath10k_htt_rx_alloc(&ar->htt); if (status) { ath10k_err(ar, "failed to alloc htt rx: %d\n", status); goto err_htt_tx_detach; } status = ath10k_hif_start(ar); if (status) { ath10k_err(ar, "could not start HIF: %d\n", status); goto err_htt_rx_detach; } status = ath10k_htc_wait_target(&ar->htc); if (status) { ath10k_err(ar, "failed to connect to HTC: %d\n", status); goto err_hif_stop; } status = ath10k_hif_start_post(ar); if (status) { ath10k_err(ar, "failed to swap mailbox: %d\n", status); goto err_hif_stop; } if (mode == ATH10K_FIRMWARE_MODE_NORMAL) { status = ath10k_htt_connect(&ar->htt); if (status) { ath10k_err(ar, "failed to connect htt (%d)\n", status); goto err_hif_stop; } } status = ath10k_wmi_connect(ar); if (status) { ath10k_err(ar, "could not connect wmi: %d\n", status); goto err_hif_stop; } status = ath10k_htc_start(&ar->htc); if (status) { ath10k_err(ar, "failed to start htc: %d\n", status); goto err_hif_stop; } if (mode == ATH10K_FIRMWARE_MODE_NORMAL) { status = ath10k_wmi_wait_for_service_ready(ar); if (status) { ath10k_warn(ar, "wmi service ready event not received"); goto err_hif_stop; } } ath10k_dbg(ar, ATH10K_DBG_BOOT, "firmware %s booted\n", ar->hw->wiphy->fw_version); if (test_bit(ATH10K_FW_FEATURE_IRAM_RECOVERY, ar->running_fw->fw_file.fw_features)) { status = ath10k_core_copy_target_iram(ar); if (status) { ath10k_warn(ar, "failed to copy target iram contents: %d", status); goto err_hif_stop; } } if (test_bit(WMI_SERVICE_EXT_RES_CFG_SUPPORT, ar->wmi.svc_map) && mode == ATH10K_FIRMWARE_MODE_NORMAL) { val = 0; if (ath10k_peer_stats_enabled(ar)) val = WMI_10_4_PEER_STATS; /* Enable vdev stats by default */ val |= WMI_10_4_VDEV_STATS; if (test_bit(WMI_SERVICE_BSS_CHANNEL_INFO_64, ar->wmi.svc_map)) val |= WMI_10_4_BSS_CHANNEL_INFO_64; ath10k_core_fetch_btcoex_dt(ar); /* 10.4 firmware supports BT-Coex without reloading firmware * via pdev param. To support Bluetooth coexistence pdev param, * WMI_COEX_GPIO_SUPPORT of extended resource config should be * enabled always. * * We can still enable BTCOEX if firmware has the support * even though btceox_support value is * ATH10K_DT_BTCOEX_NOT_FOUND */ if (test_bit(WMI_SERVICE_COEX_GPIO, ar->wmi.svc_map) && test_bit(ATH10K_FW_FEATURE_BTCOEX_PARAM, ar->running_fw->fw_file.fw_features) && ar->coex_support) val |= WMI_10_4_COEX_GPIO_SUPPORT; if (test_bit(WMI_SERVICE_TDLS_EXPLICIT_MODE_ONLY, ar->wmi.svc_map)) val |= WMI_10_4_TDLS_EXPLICIT_MODE_ONLY; if (test_bit(WMI_SERVICE_TDLS_UAPSD_BUFFER_STA, ar->wmi.svc_map)) val |= WMI_10_4_TDLS_UAPSD_BUFFER_STA; if (test_bit(WMI_SERVICE_TX_DATA_ACK_RSSI, ar->wmi.svc_map)) val |= WMI_10_4_TX_DATA_ACK_RSSI; if (test_bit(WMI_SERVICE_REPORT_AIRTIME, ar->wmi.svc_map)) val |= WMI_10_4_REPORT_AIRTIME; if (test_bit(WMI_SERVICE_EXT_PEER_TID_CONFIGS_SUPPORT, ar->wmi.svc_map)) val |= WMI_10_4_EXT_PEER_TID_CONFIGS_SUPPORT; status = ath10k_mac_ext_resource_config(ar, val); if (status) { ath10k_err(ar, "failed to send ext resource cfg command : %d\n", status); goto err_hif_stop; } } status = ath10k_wmi_cmd_init(ar); if (status) { ath10k_err(ar, "could not send WMI init command (%d)\n", status); goto err_hif_stop; } status = ath10k_wmi_wait_for_unified_ready(ar); if (status) { ath10k_err(ar, "wmi unified ready event not received\n"); goto err_hif_stop; } status = ath10k_core_compat_services(ar); if (status) { ath10k_err(ar, "compat services failed: %d\n", status); goto err_hif_stop; } status = ath10k_wmi_pdev_set_base_macaddr(ar, ar->mac_addr); if (status && status != -EOPNOTSUPP) { ath10k_err(ar, "failed to set base mac address: %d\n", status); goto err_hif_stop; } /* Some firmware revisions do not properly set up hardware rx filter * registers. * * A known example from QCA9880 and 10.2.4 is that MAC_PCU_ADDR1_MASK * is filled with 0s instead of 1s allowing HW to respond with ACKs to * any frames that matches MAC_PCU_RX_FILTER which is also * misconfigured to accept anything. * * The ADDR1 is programmed using internal firmware structure field and * can't be (easily/sanely) reached from the driver explicitly. It is * possible to implicitly make it correct by creating a dummy vdev and * then deleting it. */ if (ar->hw_params.hw_filter_reset_required && mode == ATH10K_FIRMWARE_MODE_NORMAL) { status = ath10k_core_reset_rx_filter(ar); if (status) { ath10k_err(ar, "failed to reset rx filter: %d\n", status); goto err_hif_stop; } } status = ath10k_htt_rx_ring_refill(ar); if (status) { ath10k_err(ar, "failed to refill htt rx ring: %d\n", status); goto err_hif_stop; } if (ar->max_num_vdevs >= 64) ar->free_vdev_map = 0xFFFFFFFFFFFFFFFFLL; else ar->free_vdev_map = (1LL << ar->max_num_vdevs) - 1; INIT_LIST_HEAD(&ar->arvifs); /* we don't care about HTT in UTF mode */ if (mode == ATH10K_FIRMWARE_MODE_NORMAL) { status = ath10k_htt_setup(&ar->htt); if (status) { ath10k_err(ar, "failed to setup htt: %d\n", status); goto err_hif_stop; } } status = ath10k_debug_start(ar); if (status) goto err_hif_stop; status = ath10k_hif_set_target_log_mode(ar, fw_diag_log); if (status && status != -EOPNOTSUPP) { ath10k_warn(ar, "set target log mode failed: %d\n", status); goto err_hif_stop; } status = ath10k_leds_start(ar); if (status) goto err_hif_stop; return 0; err_hif_stop: ath10k_hif_stop(ar); err_htt_rx_detach: ath10k_htt_rx_free(&ar->htt); err_htt_tx_detach: ath10k_htt_tx_free(&ar->htt); err_wmi_detach: ath10k_wmi_detach(ar); err: return status; } EXPORT_SYMBOL(ath10k_core_start); int ath10k_wait_for_suspend(struct ath10k *ar, u32 suspend_opt) { int ret; unsigned long time_left; reinit_completion(&ar->target_suspend); ret = ath10k_wmi_pdev_suspend_target(ar, suspend_opt); if (ret) { ath10k_warn(ar, "could not suspend target (%d)\n", ret); return ret; } time_left = wait_for_completion_timeout(&ar->target_suspend, 1 * HZ); if (!time_left) { ath10k_warn(ar, "suspend timed out - target pause event never came\n"); return -ETIMEDOUT; } return 0; } void ath10k_core_stop(struct ath10k *ar) { lockdep_assert_held(&ar->conf_mutex); ath10k_debug_stop(ar); /* try to suspend target */ if (ar->state != ATH10K_STATE_RESTARTING && ar->state != ATH10K_STATE_UTF) ath10k_wait_for_suspend(ar, WMI_PDEV_SUSPEND_AND_DISABLE_INTR); ath10k_hif_stop(ar); ath10k_htt_tx_stop(&ar->htt); ath10k_htt_rx_free(&ar->htt); ath10k_wmi_detach(ar); ar->id.bmi_ids_valid = false; } EXPORT_SYMBOL(ath10k_core_stop); /* mac80211 manages fw/hw initialization through start/stop hooks. However in * order to know what hw capabilities should be advertised to mac80211 it is * necessary to load the firmware (and tear it down immediately since start * hook will try to init it again) before registering */ static int ath10k_core_probe_fw(struct ath10k *ar) { struct bmi_target_info target_info; int ret = 0; ret = ath10k_hif_power_up(ar, ATH10K_FIRMWARE_MODE_NORMAL); if (ret) { ath10k_err(ar, "could not power on hif bus (%d)\n", ret); return ret; } switch (ar->hif.bus) { case ATH10K_BUS_SDIO: memset(&target_info, 0, sizeof(target_info)); ret = ath10k_bmi_get_target_info_sdio(ar, &target_info); if (ret) { ath10k_err(ar, "could not get target info (%d)\n", ret); goto err_power_down; } ar->target_version = target_info.version; ar->hw->wiphy->hw_version = target_info.version; break; case ATH10K_BUS_PCI: case ATH10K_BUS_AHB: case ATH10K_BUS_USB: memset(&target_info, 0, sizeof(target_info)); ret = ath10k_bmi_get_target_info(ar, &target_info); if (ret) { ath10k_err(ar, "could not get target info (%d)\n", ret); goto err_power_down; } ar->target_version = target_info.version; ar->hw->wiphy->hw_version = target_info.version; break; case ATH10K_BUS_SNOC: memset(&target_info, 0, sizeof(target_info)); ret = ath10k_hif_get_target_info(ar, &target_info); if (ret) { ath10k_err(ar, "could not get target info (%d)\n", ret); goto err_power_down; } ar->target_version = target_info.version; ar->hw->wiphy->hw_version = target_info.version; break; default: ath10k_err(ar, "incorrect hif bus type: %d\n", ar->hif.bus); } ret = ath10k_init_hw_params(ar); if (ret) { ath10k_err(ar, "could not get hw params (%d)\n", ret); goto err_power_down; } ret = ath10k_core_fetch_firmware_files(ar); if (ret) { ath10k_err(ar, "could not fetch firmware files (%d)\n", ret); goto err_power_down; } BUILD_BUG_ON(sizeof(ar->hw->wiphy->fw_version) != sizeof(ar->normal_mode_fw.fw_file.fw_version)); memcpy(ar->hw->wiphy->fw_version, ar->normal_mode_fw.fw_file.fw_version, sizeof(ar->hw->wiphy->fw_version)); ath10k_debug_print_hwfw_info(ar); if (!test_bit(ATH10K_FW_FEATURE_NON_BMI, ar->normal_mode_fw.fw_file.fw_features)) { ret = ath10k_core_pre_cal_download(ar); if (ret) { /* pre calibration data download is not necessary * for all the chipsets. Ignore failures and continue. */ ath10k_dbg(ar, ATH10K_DBG_BOOT, "could not load pre cal data: %d\n", ret); } ret = ath10k_core_get_board_id_from_otp(ar); if (ret && ret != -EOPNOTSUPP) { ath10k_err(ar, "failed to get board id from otp: %d\n", ret); goto err_free_firmware_files; } ret = ath10k_core_check_smbios(ar); if (ret) ath10k_dbg(ar, ATH10K_DBG_BOOT, "SMBIOS bdf variant name not set.\n"); ret = ath10k_core_check_dt(ar); if (ret) ath10k_dbg(ar, ATH10K_DBG_BOOT, "DT bdf variant name not set.\n"); ret = ath10k_core_fetch_board_file(ar, ATH10K_BD_IE_BOARD); if (ret) { ath10k_err(ar, "failed to fetch board file: %d\n", ret); goto err_free_firmware_files; } ath10k_debug_print_board_info(ar); } device_get_mac_address(ar->dev, ar->mac_addr); ret = ath10k_core_init_firmware_features(ar); if (ret) { ath10k_err(ar, "fatal problem with firmware features: %d\n", ret); goto err_free_firmware_files; } if (!test_bit(ATH10K_FW_FEATURE_NON_BMI, ar->normal_mode_fw.fw_file.fw_features)) { ret = ath10k_swap_code_seg_init(ar, &ar->normal_mode_fw.fw_file); if (ret) { ath10k_err(ar, "failed to initialize code swap segment: %d\n", ret); goto err_free_firmware_files; } } mutex_lock(&ar->conf_mutex); ret = ath10k_core_start(ar, ATH10K_FIRMWARE_MODE_NORMAL, &ar->normal_mode_fw); if (ret) { ath10k_err(ar, "could not init core (%d)\n", ret); goto err_unlock; } ath10k_debug_print_boot_info(ar); ath10k_core_stop(ar); mutex_unlock(&ar->conf_mutex); ath10k_hif_power_down(ar); return 0; err_unlock: mutex_unlock(&ar->conf_mutex); err_free_firmware_files: ath10k_core_free_firmware_files(ar); err_power_down: ath10k_hif_power_down(ar); return ret; } static void ath10k_core_register_work(struct work_struct *work) { struct ath10k *ar = container_of(work, struct ath10k, register_work); int status; /* peer stats are enabled by default */ set_bit(ATH10K_FLAG_PEER_STATS, &ar->dev_flags); status = ath10k_core_probe_fw(ar); if (status) { ath10k_err(ar, "could not probe fw (%d)\n", status); goto err; } status = ath10k_mac_register(ar); if (status) { ath10k_err(ar, "could not register to mac80211 (%d)\n", status); goto err_release_fw; } status = ath10k_coredump_register(ar); if (status) { ath10k_err(ar, "unable to register coredump\n"); goto err_unregister_mac; } status = ath10k_debug_register(ar); if (status) { ath10k_err(ar, "unable to initialize debugfs\n"); goto err_unregister_coredump; } status = ath10k_spectral_create(ar); if (status) { ath10k_err(ar, "failed to initialize spectral\n"); goto err_debug_destroy; } status = ath10k_thermal_register(ar); if (status) { ath10k_err(ar, "could not register thermal device: %d\n", status); goto err_spectral_destroy; } status = ath10k_leds_register(ar); if (status) { ath10k_err(ar, "could not register leds: %d\n", status); goto err_thermal_unregister; } set_bit(ATH10K_FLAG_CORE_REGISTERED, &ar->dev_flags); return; err_thermal_unregister: ath10k_thermal_unregister(ar); err_spectral_destroy: ath10k_spectral_destroy(ar); err_debug_destroy: ath10k_debug_destroy(ar); err_unregister_coredump: ath10k_coredump_unregister(ar); err_unregister_mac: ath10k_mac_unregister(ar); err_release_fw: ath10k_core_free_firmware_files(ar); err: /* TODO: It's probably a good idea to release device from the driver * but calling device_release_driver() here will cause a deadlock. */ return; } int ath10k_core_register(struct ath10k *ar, const struct ath10k_bus_params *bus_params) { ar->bus_param = *bus_params; queue_work(ar->workqueue, &ar->register_work); return 0; } EXPORT_SYMBOL(ath10k_core_register); void ath10k_core_unregister(struct ath10k *ar) { cancel_work_sync(&ar->register_work); if (!test_bit(ATH10K_FLAG_CORE_REGISTERED, &ar->dev_flags)) return; ath10k_leds_unregister(ar); ath10k_thermal_unregister(ar); /* Stop spectral before unregistering from mac80211 to remove the * relayfs debugfs file cleanly. Otherwise the parent debugfs tree * would be already be free'd recursively, leading to a double free. */ ath10k_spectral_destroy(ar); /* We must unregister from mac80211 before we stop HTC and HIF. * Otherwise we will fail to submit commands to FW and mac80211 will be * unhappy about callback failures. */ ath10k_mac_unregister(ar); ath10k_testmode_destroy(ar); ath10k_core_free_firmware_files(ar); ath10k_core_free_board_files(ar); ath10k_debug_unregister(ar); } EXPORT_SYMBOL(ath10k_core_unregister); struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev, enum ath10k_bus bus, enum ath10k_hw_rev hw_rev, const struct ath10k_hif_ops *hif_ops) { struct ath10k *ar; int ret; ar = ath10k_mac_create(priv_size); if (!ar) return NULL; ar->ath_common.priv = ar; ar->ath_common.hw = ar->hw; ar->dev = dev; ar->hw_rev = hw_rev; ar->hif.ops = hif_ops; ar->hif.bus = bus; switch (hw_rev) { case ATH10K_HW_QCA988X: case ATH10K_HW_QCA9887: ar->regs = &qca988x_regs; ar->hw_ce_regs = &qcax_ce_regs; ar->hw_values = &qca988x_values; break; case ATH10K_HW_QCA6174: case ATH10K_HW_QCA9377: ar->regs = &qca6174_regs; ar->hw_ce_regs = &qcax_ce_regs; ar->hw_values = &qca6174_values; break; case ATH10K_HW_QCA99X0: case ATH10K_HW_QCA9984: ar->regs = &qca99x0_regs; ar->hw_ce_regs = &qcax_ce_regs; ar->hw_values = &qca99x0_values; break; case ATH10K_HW_QCA9888: ar->regs = &qca99x0_regs; ar->hw_ce_regs = &qcax_ce_regs; ar->hw_values = &qca9888_values; break; case ATH10K_HW_QCA4019: ar->regs = &qca4019_regs; ar->hw_ce_regs = &qcax_ce_regs; ar->hw_values = &qca4019_values; break; case ATH10K_HW_WCN3990: ar->regs = &wcn3990_regs; ar->hw_ce_regs = &wcn3990_ce_regs; ar->hw_values = &wcn3990_values; break; default: ath10k_err(ar, "unsupported core hardware revision %d\n", hw_rev); ret = -EOPNOTSUPP; goto err_free_mac; } init_completion(&ar->scan.started); init_completion(&ar->scan.completed); init_completion(&ar->scan.on_channel); init_completion(&ar->target_suspend); init_completion(&ar->driver_recovery); init_completion(&ar->wow.wakeup_completed); init_completion(&ar->install_key_done); init_completion(&ar->vdev_setup_done); init_completion(&ar->vdev_delete_done); init_completion(&ar->thermal.wmi_sync); init_completion(&ar->bss_survey_done); init_completion(&ar->peer_delete_done); init_completion(&ar->peer_stats_info_complete); INIT_DELAYED_WORK(&ar->scan.timeout, ath10k_scan_timeout_work); ar->workqueue = create_singlethread_workqueue("ath10k_wq"); if (!ar->workqueue) goto err_free_mac; ar->workqueue_aux = create_singlethread_workqueue("ath10k_aux_wq"); if (!ar->workqueue_aux) goto err_free_wq; ar->workqueue_tx_complete = create_singlethread_workqueue("ath10k_tx_complete_wq"); if (!ar->workqueue_tx_complete) goto err_free_aux_wq; mutex_init(&ar->conf_mutex); mutex_init(&ar->dump_mutex); spin_lock_init(&ar->data_lock); for (int ac = 0; ac < IEEE80211_NUM_ACS; ac++) spin_lock_init(&ar->queue_lock[ac]); INIT_LIST_HEAD(&ar->peers); init_waitqueue_head(&ar->peer_mapping_wq); init_waitqueue_head(&ar->htt.empty_tx_wq); init_waitqueue_head(&ar->wmi.tx_credits_wq); skb_queue_head_init(&ar->htt.rx_indication_head); init_completion(&ar->offchan_tx_completed); INIT_WORK(&ar->offchan_tx_work, ath10k_offchan_tx_work); skb_queue_head_init(&ar->offchan_tx_queue); INIT_WORK(&ar->wmi_mgmt_tx_work, ath10k_mgmt_over_wmi_tx_work); skb_queue_head_init(&ar->wmi_mgmt_tx_queue); INIT_WORK(&ar->register_work, ath10k_core_register_work); INIT_WORK(&ar->restart_work, ath10k_core_restart); INIT_WORK(&ar->set_coverage_class_work, ath10k_core_set_coverage_class_work); ar->napi_dev = alloc_netdev_dummy(0); if (!ar->napi_dev) goto err_free_tx_complete; ret = ath10k_coredump_create(ar); if (ret) goto err_free_netdev; ret = ath10k_debug_create(ar); if (ret) goto err_free_coredump; return ar; err_free_coredump: ath10k_coredump_destroy(ar); err_free_netdev: free_netdev(ar->napi_dev); err_free_tx_complete: destroy_workqueue(ar->workqueue_tx_complete); err_free_aux_wq: destroy_workqueue(ar->workqueue_aux); err_free_wq: destroy_workqueue(ar->workqueue); err_free_mac: ath10k_mac_destroy(ar); return NULL; } EXPORT_SYMBOL(ath10k_core_create); void ath10k_core_destroy(struct ath10k *ar) { destroy_workqueue(ar->workqueue); destroy_workqueue(ar->workqueue_aux); destroy_workqueue(ar->workqueue_tx_complete); free_netdev(ar->napi_dev); ath10k_debug_destroy(ar); ath10k_coredump_destroy(ar); ath10k_htt_tx_destroy(&ar->htt); ath10k_wmi_free_host_mem(ar); ath10k_mac_destroy(ar); } EXPORT_SYMBOL(ath10k_core_destroy); MODULE_AUTHOR("Qualcomm Atheros"); MODULE_DESCRIPTION("Core module for Qualcomm Atheros 802.11ac wireless LAN cards."); MODULE_LICENSE("Dual BSD/GPL"); |
| 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 | // SPDX-License-Identifier: GPL-2.0 #include <linux/utsname.h> #include <net/cfg80211.h> #include "core.h" #include "rdev-ops.h" void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct device *pdev = wiphy_dev(wdev->wiphy); if (pdev->driver) strscpy(info->driver, pdev->driver->name, sizeof(info->driver)); else strscpy(info->driver, "N/A", sizeof(info->driver)); strscpy(info->version, init_utsname()->release, sizeof(info->version)); if (wdev->wiphy->fw_version[0]) strscpy(info->fw_version, wdev->wiphy->fw_version, sizeof(info->fw_version)); else strscpy(info->fw_version, "N/A", sizeof(info->fw_version)); strscpy(info->bus_info, dev_name(wiphy_dev(wdev->wiphy)), sizeof(info->bus_info)); } EXPORT_SYMBOL(cfg80211_get_drvinfo); |
| 1041 1038 1041 1041 754 1029 1029 748 1025 129 12 12 5 5 1027 1031 1030 753 1 754 107 3 104 676 1030 99 247 1 247 248 247 19 52 54 54 52 52 52 51 367 365 3 367 45 42 41 39 3 45 213 213 1 212 213 218 1 217 1 216 218 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 | // SPDX-License-Identifier: GPL-2.0-or-later /* Filesystem parameter parser. * * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/export.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> #include <linux/slab.h> #include <linux/security.h> #include <linux/namei.h> #include "internal.h" static const struct constant_table bool_names[] = { { "0", false }, { "1", true }, { "false", false }, { "no", false }, { "true", true }, { "yes", true }, { }, }; static const struct constant_table * __lookup_constant(const struct constant_table *tbl, const char *name) { for ( ; tbl->name; tbl++) if (strcmp(name, tbl->name) == 0) return tbl; return NULL; } /** * lookup_constant - Look up a constant by name in an ordered table * @tbl: The table of constants to search. * @name: The name to look up. * @not_found: The value to return if the name is not found. */ int lookup_constant(const struct constant_table *tbl, const char *name, int not_found) { const struct constant_table *p = __lookup_constant(tbl, name); return p ? p->value : not_found; } EXPORT_SYMBOL(lookup_constant); static inline bool is_flag(const struct fs_parameter_spec *p) { return p->type == NULL; } static const struct fs_parameter_spec *fs_lookup_key( const struct fs_parameter_spec *desc, struct fs_parameter *param, bool *negated) { const struct fs_parameter_spec *p, *other = NULL; const char *name = param->key; bool want_flag = param->type == fs_value_is_flag; *negated = false; for (p = desc; p->name; p++) { if (strcmp(p->name, name) != 0) continue; if (likely(is_flag(p) == want_flag)) return p; other = p; } if (want_flag) { if (name[0] == 'n' && name[1] == 'o' && name[2]) { for (p = desc; p->name; p++) { if (strcmp(p->name, name + 2) != 0) continue; if (!(p->flags & fs_param_neg_with_no)) continue; *negated = true; return p; } } } return other; } /* * __fs_parse - Parse a filesystem configuration parameter * @log: The filesystem context to log errors through. * @desc: The parameter description to use. * @param: The parameter. * @result: Where to place the result of the parse * * Parse a filesystem configuration parameter and attempt a conversion for a * simple parameter for which this is requested. If successful, the determined * parameter ID is placed into @result->key, the desired type is indicated in * @result->t and any converted value is placed into an appropriate member of * the union in @result. * * The function returns the parameter number if the parameter was matched, * -ENOPARAM if it wasn't matched and @desc->ignore_unknown indicated that * unknown parameters are okay and -EINVAL if there was a conversion issue or * the parameter wasn't recognised and unknowns aren't okay. */ int __fs_parse(struct p_log *log, const struct fs_parameter_spec *desc, struct fs_parameter *param, struct fs_parse_result *result) { const struct fs_parameter_spec *p; result->uint_64 = 0; p = fs_lookup_key(desc, param, &result->negated); if (!p) return -ENOPARAM; if (p->flags & fs_param_deprecated) warn_plog(log, "Deprecated parameter '%s'", param->key); /* Try to turn the type we were given into the type desired by the * parameter and give an error if we can't. */ if (is_flag(p)) { if (param->type != fs_value_is_flag) return inval_plog(log, "Unexpected value for '%s'", param->key); result->boolean = !result->negated; } else { int ret = p->type(log, p, param, result); if (ret) return ret; } return p->opt; } EXPORT_SYMBOL(__fs_parse); /** * fs_lookup_param - Look up a path referred to by a parameter * @fc: The filesystem context to log errors through. * @param: The parameter. * @want_bdev: T if want a blockdev * @flags: Pathwalk flags passed to filename_lookup() * @_path: The result of the lookup */ int fs_lookup_param(struct fs_context *fc, struct fs_parameter *param, bool want_bdev, unsigned int flags, struct path *_path) { struct filename *f; bool put_f; int ret; switch (param->type) { case fs_value_is_string: f = getname_kernel(param->string); if (IS_ERR(f)) return PTR_ERR(f); put_f = true; break; case fs_value_is_filename: f = param->name; put_f = false; break; default: return invalf(fc, "%s: not usable as path", param->key); } ret = filename_lookup(param->dirfd, f, flags, _path, NULL); if (ret < 0) { errorf(fc, "%s: Lookup failure for '%s'", param->key, f->name); goto out; } if (want_bdev && !S_ISBLK(d_backing_inode(_path->dentry)->i_mode)) { path_put(_path); _path->dentry = NULL; _path->mnt = NULL; errorf(fc, "%s: Non-blockdev passed as '%s'", param->key, f->name); ret = -ENOTBLK; } out: if (put_f) putname(f); return ret; } EXPORT_SYMBOL(fs_lookup_param); static int fs_param_bad_value(struct p_log *log, struct fs_parameter *param) { return inval_plog(log, "Bad value for '%s'", param->key); } int fs_param_is_bool(struct p_log *log, const struct fs_parameter_spec *p, struct fs_parameter *param, struct fs_parse_result *result) { int b; if (param->type != fs_value_is_string) return fs_param_bad_value(log, param); if (!*param->string && (p->flags & fs_param_can_be_empty)) return 0; b = lookup_constant(bool_names, param->string, -1); if (b == -1) return fs_param_bad_value(log, param); result->boolean = b; return 0; } EXPORT_SYMBOL(fs_param_is_bool); int fs_param_is_u32(struct p_log *log, const struct fs_parameter_spec *p, struct fs_parameter *param, struct fs_parse_result *result) { int base = (unsigned long)p->data; if (param->type != fs_value_is_string) return fs_param_bad_value(log, param); if (!*param->string && (p->flags & fs_param_can_be_empty)) return 0; if (kstrtouint(param->string, base, &result->uint_32) < 0) return fs_param_bad_value(log, param); return 0; } EXPORT_SYMBOL(fs_param_is_u32); int fs_param_is_s32(struct p_log *log, const struct fs_parameter_spec *p, struct fs_parameter *param, struct fs_parse_result *result) { if (param->type != fs_value_is_string) return fs_param_bad_value(log, param); if (!*param->string && (p->flags & fs_param_can_be_empty)) return 0; if (kstrtoint(param->string, 0, &result->int_32) < 0) return fs_param_bad_value(log, param); return 0; } EXPORT_SYMBOL(fs_param_is_s32); int fs_param_is_u64(struct p_log *log, const struct fs_parameter_spec *p, struct fs_parameter *param, struct fs_parse_result *result) { if (param->type != fs_value_is_string) return fs_param_bad_value(log, param); if (!*param->string && (p->flags & fs_param_can_be_empty)) return 0; if (kstrtoull(param->string, 0, &result->uint_64) < 0) return fs_param_bad_value(log, param); return 0; } EXPORT_SYMBOL(fs_param_is_u64); int fs_param_is_enum(struct p_log *log, const struct fs_parameter_spec *p, struct fs_parameter *param, struct fs_parse_result *result) { const struct constant_table *c; if (param->type != fs_value_is_string) return fs_param_bad_value(log, param); if (!*param->string && (p->flags & fs_param_can_be_empty)) return 0; c = __lookup_constant(p->data, param->string); if (!c) return fs_param_bad_value(log, param); result->uint_32 = c->value; return 0; } EXPORT_SYMBOL(fs_param_is_enum); int fs_param_is_string(struct p_log *log, const struct fs_parameter_spec *p, struct fs_parameter *param, struct fs_parse_result *result) { if (param->type != fs_value_is_string || (!*param->string && !(p->flags & fs_param_can_be_empty))) return fs_param_bad_value(log, param); return 0; } EXPORT_SYMBOL(fs_param_is_string); int fs_param_is_blob(struct p_log *log, const struct fs_parameter_spec *p, struct fs_parameter *param, struct fs_parse_result *result) { if (param->type != fs_value_is_blob) return fs_param_bad_value(log, param); return 0; } EXPORT_SYMBOL(fs_param_is_blob); int fs_param_is_fd(struct p_log *log, const struct fs_parameter_spec *p, struct fs_parameter *param, struct fs_parse_result *result) { switch (param->type) { case fs_value_is_string: if ((!*param->string && !(p->flags & fs_param_can_be_empty)) || kstrtouint(param->string, 0, &result->uint_32) < 0) break; if (result->uint_32 <= INT_MAX) return 0; break; case fs_value_is_file: result->uint_32 = param->dirfd; if (result->uint_32 <= INT_MAX) return 0; break; default: break; } return fs_param_bad_value(log, param); } EXPORT_SYMBOL(fs_param_is_fd); int fs_param_is_uid(struct p_log *log, const struct fs_parameter_spec *p, struct fs_parameter *param, struct fs_parse_result *result) { kuid_t uid; if (fs_param_is_u32(log, p, param, result) != 0) return fs_param_bad_value(log, param); uid = make_kuid(current_user_ns(), result->uint_32); if (!uid_valid(uid)) return inval_plog(log, "Invalid uid '%s'", param->string); result->uid = uid; return 0; } EXPORT_SYMBOL(fs_param_is_uid); int fs_param_is_gid(struct p_log *log, const struct fs_parameter_spec *p, struct fs_parameter *param, struct fs_parse_result *result) { kgid_t gid; if (fs_param_is_u32(log, p, param, result) != 0) return fs_param_bad_value(log, param); gid = make_kgid(current_user_ns(), result->uint_32); if (!gid_valid(gid)) return inval_plog(log, "Invalid gid '%s'", param->string); result->gid = gid; return 0; } EXPORT_SYMBOL(fs_param_is_gid); int fs_param_is_blockdev(struct p_log *log, const struct fs_parameter_spec *p, struct fs_parameter *param, struct fs_parse_result *result) { return 0; } EXPORT_SYMBOL(fs_param_is_blockdev); int fs_param_is_path(struct p_log *log, const struct fs_parameter_spec *p, struct fs_parameter *param, struct fs_parse_result *result) { return 0; } EXPORT_SYMBOL(fs_param_is_path); #ifdef CONFIG_VALIDATE_FS_PARSER /** * validate_constant_table - Validate a constant table * @tbl: The constant table to validate. * @tbl_size: The size of the table. * @low: The lowest permissible value. * @high: The highest permissible value. * @special: One special permissible value outside of the range. */ bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, int low, int high, int special) { size_t i; bool good = true; if (tbl_size == 0) { pr_warn("VALIDATE C-TBL: Empty\n"); return true; } for (i = 0; i < tbl_size; i++) { if (!tbl[i].name) { pr_err("VALIDATE C-TBL[%zu]: Null\n", i); good = false; } else if (i > 0 && tbl[i - 1].name) { int c = strcmp(tbl[i-1].name, tbl[i].name); if (c == 0) { pr_err("VALIDATE C-TBL[%zu]: Duplicate %s\n", i, tbl[i].name); good = false; } if (c > 0) { pr_err("VALIDATE C-TBL[%zu]: Missorted %s>=%s\n", i, tbl[i-1].name, tbl[i].name); good = false; } } if (tbl[i].value != special && (tbl[i].value < low || tbl[i].value > high)) { pr_err("VALIDATE C-TBL[%zu]: %s->%d const out of range (%d-%d)\n", i, tbl[i].name, tbl[i].value, low, high); good = false; } } return good; } /** * fs_validate_description - Validate a parameter description * @name: The parameter name to search for. * @desc: The parameter description to validate. */ bool fs_validate_description(const char *name, const struct fs_parameter_spec *desc) { const struct fs_parameter_spec *param, *p2; bool good = true; for (param = desc; param->name; param++) { /* Check for duplicate parameter names */ for (p2 = desc; p2 < param; p2++) { if (strcmp(param->name, p2->name) == 0) { if (is_flag(param) != is_flag(p2)) continue; pr_err("VALIDATE %s: PARAM[%s]: Duplicate\n", name, param->name); good = false; } } } return good; } #endif /* CONFIG_VALIDATE_FS_PARSER */ |
| 147 147 147 147 147 147 4 147 147 147 147 147 147 141 27 147 147 141 147 147 147 147 147 147 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2013 Politecnico di Torino, Italy * TORSEC group -- https://security.polito.it * * Author: Roberto Sassu <roberto.sassu@polito.it> * * File: ima_template_lib.c * Library of supported template fields. */ #include "ima_template_lib.h" #include <linux/xattr.h> #include <linux/evm.h> static bool ima_template_hash_algo_allowed(u8 algo) { if (algo == HASH_ALGO_SHA1 || algo == HASH_ALGO_MD5) return true; return false; } enum data_formats { DATA_FMT_DIGEST = 0, DATA_FMT_DIGEST_WITH_ALGO, DATA_FMT_DIGEST_WITH_TYPE_AND_ALGO, DATA_FMT_STRING, DATA_FMT_HEX, DATA_FMT_UINT }; enum digest_type { DIGEST_TYPE_IMA, DIGEST_TYPE_VERITY, DIGEST_TYPE__LAST }; #define DIGEST_TYPE_NAME_LEN_MAX 7 /* including NUL */ static const char * const digest_type_name[DIGEST_TYPE__LAST] = { [DIGEST_TYPE_IMA] = "ima", [DIGEST_TYPE_VERITY] = "verity" }; static int ima_write_template_field_data(const void *data, const u32 datalen, enum data_formats datafmt, struct ima_field_data *field_data) { u8 *buf, *buf_ptr; u32 buflen = datalen; if (datafmt == DATA_FMT_STRING) buflen = datalen + 1; buf = kzalloc(buflen, GFP_KERNEL); if (!buf) return -ENOMEM; memcpy(buf, data, datalen); /* * Replace all space characters with underscore for event names and * strings. This avoid that, during the parsing of a measurements list, * filenames with spaces or that end with the suffix ' (deleted)' are * split into multiple template fields (the space is the delimitator * character for measurements lists in ASCII format). */ if (datafmt == DATA_FMT_STRING) { for (buf_ptr = buf; buf_ptr - buf < datalen; buf_ptr++) if (*buf_ptr == ' ') *buf_ptr = '_'; } field_data->data = buf; field_data->len = buflen; return 0; } static void ima_show_template_data_ascii(struct seq_file *m, enum ima_show_type show, enum data_formats datafmt, struct ima_field_data *field_data) { u8 *buf_ptr = field_data->data; u32 buflen = field_data->len; switch (datafmt) { case DATA_FMT_DIGEST_WITH_TYPE_AND_ALGO: case DATA_FMT_DIGEST_WITH_ALGO: buf_ptr = strrchr(field_data->data, ':'); if (buf_ptr != field_data->data) seq_printf(m, "%s", field_data->data); /* skip ':' and '\0' */ buf_ptr += 2; buflen -= buf_ptr - field_data->data; fallthrough; case DATA_FMT_DIGEST: case DATA_FMT_HEX: if (!buflen) break; ima_print_digest(m, buf_ptr, buflen); break; case DATA_FMT_STRING: seq_printf(m, "%s", buf_ptr); break; case DATA_FMT_UINT: switch (field_data->len) { case sizeof(u8): seq_printf(m, "%u", *(u8 *)buf_ptr); break; case sizeof(u16): if (ima_canonical_fmt) seq_printf(m, "%u", le16_to_cpu(*(__le16 *)buf_ptr)); else seq_printf(m, "%u", *(u16 *)buf_ptr); break; case sizeof(u32): if (ima_canonical_fmt) seq_printf(m, "%u", le32_to_cpu(*(__le32 *)buf_ptr)); else seq_printf(m, "%u", *(u32 *)buf_ptr); break; case sizeof(u64): if (ima_canonical_fmt) seq_printf(m, "%llu", le64_to_cpu(*(__le64 *)buf_ptr)); else seq_printf(m, "%llu", *(u64 *)buf_ptr); break; default: break; } break; default: break; } } static void ima_show_template_data_binary(struct seq_file *m, enum ima_show_type show, enum data_formats datafmt, struct ima_field_data *field_data) { u32 len = (show == IMA_SHOW_BINARY_OLD_STRING_FMT) ? strlen(field_data->data) : field_data->len; if (show != IMA_SHOW_BINARY_NO_FIELD_LEN) { u32 field_len = !ima_canonical_fmt ? len : (__force u32)cpu_to_le32(len); ima_putc(m, &field_len, sizeof(field_len)); } if (!len) return; ima_putc(m, field_data->data, len); } static void ima_show_template_field_data(struct seq_file *m, enum ima_show_type show, enum data_formats datafmt, struct ima_field_data *field_data) { switch (show) { case IMA_SHOW_ASCII: ima_show_template_data_ascii(m, show, datafmt, field_data); break; case IMA_SHOW_BINARY: case IMA_SHOW_BINARY_NO_FIELD_LEN: case IMA_SHOW_BINARY_OLD_STRING_FMT: ima_show_template_data_binary(m, show, datafmt, field_data); break; default: break; } } void ima_show_template_digest(struct seq_file *m, enum ima_show_type show, struct ima_field_data *field_data) { ima_show_template_field_data(m, show, DATA_FMT_DIGEST, field_data); } void ima_show_template_digest_ng(struct seq_file *m, enum ima_show_type show, struct ima_field_data *field_data) { ima_show_template_field_data(m, show, DATA_FMT_DIGEST_WITH_ALGO, field_data); } void ima_show_template_digest_ngv2(struct seq_file *m, enum ima_show_type show, struct ima_field_data *field_data) { ima_show_template_field_data(m, show, DATA_FMT_DIGEST_WITH_TYPE_AND_ALGO, field_data); } void ima_show_template_string(struct seq_file *m, enum ima_show_type show, struct ima_field_data *field_data) { ima_show_template_field_data(m, show, DATA_FMT_STRING, field_data); } void ima_show_template_sig(struct seq_file *m, enum ima_show_type show, struct ima_field_data *field_data) { ima_show_template_field_data(m, show, DATA_FMT_HEX, field_data); } void ima_show_template_buf(struct seq_file *m, enum ima_show_type show, struct ima_field_data *field_data) { ima_show_template_field_data(m, show, DATA_FMT_HEX, field_data); } void ima_show_template_uint(struct seq_file *m, enum ima_show_type show, struct ima_field_data *field_data) { ima_show_template_field_data(m, show, DATA_FMT_UINT, field_data); } /** * ima_parse_buf() - Parses lengths and data from an input buffer * @bufstartp: Buffer start address. * @bufendp: Buffer end address. * @bufcurp: Pointer to remaining (non-parsed) data. * @maxfields: Length of fields array. * @fields: Array containing lengths and pointers of parsed data. * @curfields: Number of array items containing parsed data. * @len_mask: Bitmap (if bit is set, data length should not be parsed). * @enforce_mask: Check if curfields == maxfields and/or bufcurp == bufendp. * @bufname: String identifier of the input buffer. * * Return: 0 on success, -EINVAL on error. */ int ima_parse_buf(void *bufstartp, void *bufendp, void **bufcurp, int maxfields, struct ima_field_data *fields, int *curfields, unsigned long *len_mask, int enforce_mask, char *bufname) { void *bufp = bufstartp; int i; for (i = 0; i < maxfields; i++) { if (len_mask == NULL || !test_bit(i, len_mask)) { if (bufp > (bufendp - sizeof(u32))) break; if (ima_canonical_fmt) fields[i].len = le32_to_cpu(*(__le32 *)bufp); else fields[i].len = *(u32 *)bufp; bufp += sizeof(u32); } if (bufp > (bufendp - fields[i].len)) break; fields[i].data = bufp; bufp += fields[i].len; } if ((enforce_mask & ENFORCE_FIELDS) && i != maxfields) { pr_err("%s: nr of fields mismatch: expected: %d, current: %d\n", bufname, maxfields, i); return -EINVAL; } if ((enforce_mask & ENFORCE_BUFEND) && bufp != bufendp) { pr_err("%s: buf end mismatch: expected: %p, current: %p\n", bufname, bufendp, bufp); return -EINVAL; } if (curfields) *curfields = i; if (bufcurp) *bufcurp = bufp; return 0; } static int ima_eventdigest_init_common(const u8 *digest, u32 digestsize, u8 digest_type, u8 hash_algo, struct ima_field_data *field_data) { /* * digest formats: * - DATA_FMT_DIGEST: digest * - DATA_FMT_DIGEST_WITH_ALGO: <hash algo> + ':' + '\0' + digest, * - DATA_FMT_DIGEST_WITH_TYPE_AND_ALGO: * <digest type> + ':' + <hash algo> + ':' + '\0' + digest, * * where 'DATA_FMT_DIGEST' is the original digest format ('d') * with a hash size limitation of 20 bytes, * where <digest type> is either "ima" or "verity", * where <hash algo> is the hash_algo_name[] string. */ u8 buffer[DIGEST_TYPE_NAME_LEN_MAX + CRYPTO_MAX_ALG_NAME + 2 + IMA_MAX_DIGEST_SIZE] = { 0 }; enum data_formats fmt = DATA_FMT_DIGEST; u32 offset = 0; if (digest_type < DIGEST_TYPE__LAST && hash_algo < HASH_ALGO__LAST) { fmt = DATA_FMT_DIGEST_WITH_TYPE_AND_ALGO; offset += 1 + sprintf(buffer, "%s:%s:", digest_type_name[digest_type], hash_algo_name[hash_algo]); } else if (hash_algo < HASH_ALGO__LAST) { fmt = DATA_FMT_DIGEST_WITH_ALGO; offset += 1 + sprintf(buffer, "%s:", hash_algo_name[hash_algo]); } if (digest) memcpy(buffer + offset, digest, digestsize); else /* * If digest is NULL, the event being recorded is a violation. * Make room for the digest by increasing the offset by the * hash algorithm digest size. */ offset += hash_digest_size[hash_algo]; return ima_write_template_field_data(buffer, offset + digestsize, fmt, field_data); } /* * This function writes the digest of an event (with size limit). */ int ima_eventdigest_init(struct ima_event_data *event_data, struct ima_field_data *field_data) { struct ima_max_digest_data hash; struct ima_digest_data *hash_hdr = container_of(&hash.hdr, struct ima_digest_data, hdr); u8 *cur_digest = NULL; u32 cur_digestsize = 0; struct inode *inode; int result; memset(&hash, 0, sizeof(hash)); if (event_data->violation) /* recording a violation. */ goto out; if (ima_template_hash_algo_allowed(event_data->iint->ima_hash->algo)) { cur_digest = event_data->iint->ima_hash->digest; cur_digestsize = event_data->iint->ima_hash->length; goto out; } if ((const char *)event_data->filename == boot_aggregate_name) { if (ima_tpm_chip) { hash.hdr.algo = HASH_ALGO_SHA1; result = ima_calc_boot_aggregate(hash_hdr); /* algo can change depending on available PCR banks */ if (!result && hash.hdr.algo != HASH_ALGO_SHA1) result = -EINVAL; if (result < 0) memset(&hash, 0, sizeof(hash)); } cur_digest = hash_hdr->digest; cur_digestsize = hash_digest_size[HASH_ALGO_SHA1]; goto out; } if (!event_data->file) /* missing info to re-calculate the digest */ return -EINVAL; inode = file_inode(event_data->file); hash.hdr.algo = ima_template_hash_algo_allowed(ima_hash_algo) ? ima_hash_algo : HASH_ALGO_SHA1; result = ima_calc_file_hash(event_data->file, hash_hdr); if (result) { integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, event_data->filename, "collect_data", "failed", result, 0); return result; } cur_digest = hash_hdr->digest; cur_digestsize = hash.hdr.length; out: return ima_eventdigest_init_common(cur_digest, cur_digestsize, DIGEST_TYPE__LAST, HASH_ALGO__LAST, field_data); } /* * This function writes the digest of an event (without size limit). */ int ima_eventdigest_ng_init(struct ima_event_data *event_data, struct ima_field_data *field_data) { u8 *cur_digest = NULL, hash_algo = ima_hash_algo; u32 cur_digestsize = 0; if (event_data->violation) /* recording a violation. */ goto out; cur_digest = event_data->iint->ima_hash->digest; cur_digestsize = event_data->iint->ima_hash->length; hash_algo = event_data->iint->ima_hash->algo; out: return ima_eventdigest_init_common(cur_digest, cur_digestsize, DIGEST_TYPE__LAST, hash_algo, field_data); } /* * This function writes the digest of an event (without size limit), * prefixed with both the digest type and hash algorithm. */ int ima_eventdigest_ngv2_init(struct ima_event_data *event_data, struct ima_field_data *field_data) { u8 *cur_digest = NULL, hash_algo = ima_hash_algo; u32 cur_digestsize = 0; u8 digest_type = DIGEST_TYPE_IMA; if (event_data->violation) /* recording a violation. */ goto out; cur_digest = event_data->iint->ima_hash->digest; cur_digestsize = event_data->iint->ima_hash->length; hash_algo = event_data->iint->ima_hash->algo; if (event_data->iint->flags & IMA_VERITY_REQUIRED) digest_type = DIGEST_TYPE_VERITY; out: return ima_eventdigest_init_common(cur_digest, cur_digestsize, digest_type, hash_algo, field_data); } /* * This function writes the digest of the file which is expected to match the * digest contained in the file's appended signature. */ int ima_eventdigest_modsig_init(struct ima_event_data *event_data, struct ima_field_data *field_data) { enum hash_algo hash_algo; const u8 *cur_digest; u32 cur_digestsize; if (!event_data->modsig) return 0; if (event_data->violation) { /* Recording a violation. */ hash_algo = HASH_ALGO_SHA1; cur_digest = NULL; cur_digestsize = 0; } else { int rc; rc = ima_get_modsig_digest(event_data->modsig, &hash_algo, &cur_digest, &cur_digestsize); if (rc) return rc; else if (hash_algo == HASH_ALGO__LAST || cur_digestsize == 0) /* There was some error collecting the digest. */ return -EINVAL; } return ima_eventdigest_init_common(cur_digest, cur_digestsize, DIGEST_TYPE__LAST, hash_algo, field_data); } static int ima_eventname_init_common(struct ima_event_data *event_data, struct ima_field_data *field_data, bool size_limit) { const char *cur_filename = NULL; struct name_snapshot filename; u32 cur_filename_len = 0; bool snapshot = false; int ret; BUG_ON(event_data->filename == NULL && event_data->file == NULL); if (event_data->filename) { cur_filename = event_data->filename; cur_filename_len = strlen(event_data->filename); if (!size_limit || cur_filename_len <= IMA_EVENT_NAME_LEN_MAX) goto out; } if (event_data->file) { take_dentry_name_snapshot(&filename, event_data->file->f_path.dentry); snapshot = true; cur_filename = filename.name.name; cur_filename_len = strlen(cur_filename); } else /* * Truncate filename if the latter is too long and * the file descriptor is not available. */ cur_filename_len = IMA_EVENT_NAME_LEN_MAX; out: ret = ima_write_template_field_data(cur_filename, cur_filename_len, DATA_FMT_STRING, field_data); if (snapshot) release_dentry_name_snapshot(&filename); return ret; } /* * This function writes the name of an event (with size limit). */ int ima_eventname_init(struct ima_event_data *event_data, struct ima_field_data *field_data) { return ima_eventname_init_common(event_data, field_data, true); } /* * This function writes the name of an event (without size limit). */ int ima_eventname_ng_init(struct ima_event_data *event_data, struct ima_field_data *field_data) { return ima_eventname_init_common(event_data, field_data, false); } /* * ima_eventsig_init - include the file signature as part of the template data */ int ima_eventsig_init(struct ima_event_data *event_data, struct ima_field_data *field_data) { struct evm_ima_xattr_data *xattr_value = event_data->xattr_value; if (!xattr_value || (xattr_value->type != EVM_IMA_XATTR_DIGSIG && xattr_value->type != IMA_VERITY_DIGSIG)) return ima_eventevmsig_init(event_data, field_data); return ima_write_template_field_data(xattr_value, event_data->xattr_len, DATA_FMT_HEX, field_data); } /* * ima_eventbuf_init - include the buffer(kexec-cmldine) as part of the * template data. */ int ima_eventbuf_init(struct ima_event_data *event_data, struct ima_field_data *field_data) { if ((!event_data->buf) || (event_data->buf_len == 0)) return 0; return ima_write_template_field_data(event_data->buf, event_data->buf_len, DATA_FMT_HEX, field_data); } /* * ima_eventmodsig_init - include the appended file signature as part of the * template data */ int ima_eventmodsig_init(struct ima_event_data *event_data, struct ima_field_data *field_data) { const void *data; u32 data_len; int rc; if (!event_data->modsig) return 0; /* * modsig is a runtime structure containing pointers. Get its raw data * instead. */ rc = ima_get_raw_modsig(event_data->modsig, &data, &data_len); if (rc) return rc; return ima_write_template_field_data(data, data_len, DATA_FMT_HEX, field_data); } /* * ima_eventevmsig_init - include the EVM portable signature as part of the * template data */ int ima_eventevmsig_init(struct ima_event_data *event_data, struct ima_field_data *field_data) { struct evm_ima_xattr_data *xattr_data = NULL; int rc = 0; if (!event_data->file) return 0; rc = vfs_getxattr_alloc(&nop_mnt_idmap, file_dentry(event_data->file), XATTR_NAME_EVM, (char **)&xattr_data, 0, GFP_NOFS); if (rc <= 0 || xattr_data->type != EVM_XATTR_PORTABLE_DIGSIG) { rc = 0; goto out; } rc = ima_write_template_field_data((char *)xattr_data, rc, DATA_FMT_HEX, field_data); out: kfree(xattr_data); return rc; } static int ima_eventinodedac_init_common(struct ima_event_data *event_data, struct ima_field_data *field_data, bool get_uid) { unsigned int id; if (!event_data->file) return 0; if (get_uid) id = i_uid_read(file_inode(event_data->file)); else id = i_gid_read(file_inode(event_data->file)); if (ima_canonical_fmt) { if (sizeof(id) == sizeof(u16)) id = (__force u16)cpu_to_le16(id); else id = (__force u32)cpu_to_le32(id); } return ima_write_template_field_data((void *)&id, sizeof(id), DATA_FMT_UINT, field_data); } /* * ima_eventinodeuid_init - include the inode UID as part of the template * data */ int ima_eventinodeuid_init(struct ima_event_data *event_data, struct ima_field_data *field_data) { return ima_eventinodedac_init_common(event_data, field_data, true); } /* * ima_eventinodegid_init - include the inode GID as part of the template * data */ int ima_eventinodegid_init(struct ima_event_data *event_data, struct ima_field_data *field_data) { return ima_eventinodedac_init_common(event_data, field_data, false); } /* * ima_eventinodemode_init - include the inode mode as part of the template * data */ int ima_eventinodemode_init(struct ima_event_data *event_data, struct ima_field_data *field_data) { struct inode *inode; u16 mode; if (!event_data->file) return 0; inode = file_inode(event_data->file); mode = inode->i_mode; if (ima_canonical_fmt) mode = (__force u16)cpu_to_le16(mode); return ima_write_template_field_data((char *)&mode, sizeof(mode), DATA_FMT_UINT, field_data); } static int ima_eventinodexattrs_init_common(struct ima_event_data *event_data, struct ima_field_data *field_data, char type) { u8 *buffer = NULL; int rc; if (!event_data->file) return 0; rc = evm_read_protected_xattrs(file_dentry(event_data->file), NULL, 0, type, ima_canonical_fmt); if (rc < 0) return 0; buffer = kmalloc(rc, GFP_KERNEL); if (!buffer) return 0; rc = evm_read_protected_xattrs(file_dentry(event_data->file), buffer, rc, type, ima_canonical_fmt); if (rc < 0) { rc = 0; goto out; } rc = ima_write_template_field_data((char *)buffer, rc, DATA_FMT_HEX, field_data); out: kfree(buffer); return rc; } /* * ima_eventinodexattrnames_init - include a list of xattr names as part of the * template data */ int ima_eventinodexattrnames_init(struct ima_event_data *event_data, struct ima_field_data *field_data) { return ima_eventinodexattrs_init_common(event_data, field_data, 'n'); } /* * ima_eventinodexattrlengths_init - include a list of xattr lengths as part of * the template data */ int ima_eventinodexattrlengths_init(struct ima_event_data *event_data, struct ima_field_data *field_data) { return ima_eventinodexattrs_init_common(event_data, field_data, 'l'); } /* * ima_eventinodexattrvalues_init - include a list of xattr values as part of * the template data */ int ima_eventinodexattrvalues_init(struct ima_event_data *event_data, struct ima_field_data *field_data) { return ima_eventinodexattrs_init_common(event_data, field_data, 'v'); } |
| 27 27 26 26 25 25 26 26 26 27 18 26 20 6 6 6 6 6 11 10 9 1 5 6 5 4 2 1 1 1 11 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 | /* * Routines to compress and uncompress tcp packets (for transmission * over low speed serial lines). * * Copyright (c) 1989 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms are permitted * provided that the above copyright notice and this paragraph are * duplicated in all such forms and that any documentation, * advertising materials, and other materials related to such * distribution and use acknowledge that the software was developed * by the University of California, Berkeley. The name of the * University may not be used to endorse or promote products derived * from this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * * Van Jacobson (van@helios.ee.lbl.gov), Dec 31, 1989: * - Initial distribution. * * * modified for KA9Q Internet Software Package by * Katie Stevens (dkstevens@ucdavis.edu) * University of California, Davis * Computing Services * - 01-31-90 initial adaptation (from 1.19) * PPP.05 02-15-90 [ks] * PPP.08 05-02-90 [ks] use PPP protocol field to signal compression * PPP.15 09-90 [ks] improve mbuf handling * PPP.16 11-02 [karn] substantially rewritten to use NOS facilities * * - Feb 1991 Bill_Simpson@um.cc.umich.edu * variable number of conversation slots * allow zero or one slots * separate routines * status display * - Jul 1994 Dmitry Gorodchanin * Fixes for memory leaks. * - Oct 1994 Dmitry Gorodchanin * Modularization. * - Jan 1995 Bjorn Ekwall * Use ip_fast_csum from ip.h * - July 1995 Christos A. Polyzols * Spotted bug in tcp option checking * * * This module is a difficult issue. It's clearly inet code but it's also clearly * driver code belonging close to PPP and SLIP */ #include <linux/module.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/kernel.h> #include <net/slhc_vj.h> #ifdef CONFIG_INET /* Entire module is for IP only */ #include <linux/mm.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/termios.h> #include <linux/in.h> #include <linux/fcntl.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <net/ip.h> #include <net/protocol.h> #include <net/icmp.h> #include <net/tcp.h> #include <linux/skbuff.h> #include <net/sock.h> #include <linux/timer.h> #include <linux/uaccess.h> #include <net/checksum.h> #include <asm/unaligned.h> static unsigned char *encode(unsigned char *cp, unsigned short n); static long decode(unsigned char **cpp); static unsigned char * put16(unsigned char *cp, unsigned short x); static unsigned short pull16(unsigned char **cpp); /* Allocate compression data structure * slots must be in range 0 to 255 (zero meaning no compression) * Returns pointer to structure or ERR_PTR() on error. */ struct slcompress * slhc_init(int rslots, int tslots) { short i; struct cstate *ts; struct slcompress *comp; if (rslots < 0 || rslots > 255 || tslots < 0 || tslots > 255) return ERR_PTR(-EINVAL); comp = kzalloc(sizeof(struct slcompress), GFP_KERNEL); if (! comp) goto out_fail; if (rslots > 0) { size_t rsize = rslots * sizeof(struct cstate); comp->rstate = kzalloc(rsize, GFP_KERNEL); if (! comp->rstate) goto out_free; comp->rslot_limit = rslots - 1; } if (tslots > 0) { size_t tsize = tslots * sizeof(struct cstate); comp->tstate = kzalloc(tsize, GFP_KERNEL); if (! comp->tstate) goto out_free2; comp->tslot_limit = tslots - 1; } comp->xmit_oldest = 0; comp->xmit_current = 255; comp->recv_current = 255; /* * don't accept any packets with implicit index until we get * one with an explicit index. Otherwise the uncompress code * will try to use connection 255, which is almost certainly * out of range */ comp->flags |= SLF_TOSS; if ( tslots > 0 ) { ts = comp->tstate; for(i = comp->tslot_limit; i > 0; --i){ ts[i].cs_this = i; ts[i].next = &(ts[i - 1]); } ts[0].next = &(ts[comp->tslot_limit]); ts[0].cs_this = 0; } return comp; out_free2: kfree(comp->rstate); out_free: kfree(comp); out_fail: return ERR_PTR(-ENOMEM); } /* Free a compression data structure */ void slhc_free(struct slcompress *comp) { if ( IS_ERR_OR_NULL(comp) ) return; if ( comp->tstate != NULLSLSTATE ) kfree( comp->tstate ); if ( comp->rstate != NULLSLSTATE ) kfree( comp->rstate ); kfree( comp ); } /* Put a short in host order into a char array in network order */ static inline unsigned char * put16(unsigned char *cp, unsigned short x) { *cp++ = x >> 8; *cp++ = x; return cp; } /* Encode a number */ static unsigned char * encode(unsigned char *cp, unsigned short n) { if(n >= 256 || n == 0){ *cp++ = 0; cp = put16(cp,n); } else { *cp++ = n; } return cp; } /* Pull a 16-bit integer in host order from buffer in network byte order */ static unsigned short pull16(unsigned char **cpp) { short rval; rval = *(*cpp)++; rval <<= 8; rval |= *(*cpp)++; return rval; } /* Decode a number */ static long decode(unsigned char **cpp) { int x; x = *(*cpp)++; if(x == 0){ return pull16(cpp) & 0xffff; /* pull16 returns -1 on error */ } else { return x & 0xff; /* -1 if PULLCHAR returned error */ } } /* * icp and isize are the original packet. * ocp is a place to put a copy if necessary. * cpp is initially a pointer to icp. If the copy is used, * change it to ocp. */ int slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, unsigned char *ocp, unsigned char **cpp, int compress_cid) { struct cstate *ocs = &(comp->tstate[comp->xmit_oldest]); struct cstate *lcs = ocs; struct cstate *cs = lcs->next; unsigned long deltaS, deltaA; short changes = 0; int nlen, hlen; unsigned char new_seq[16]; unsigned char *cp = new_seq; struct iphdr *ip; struct tcphdr *th, *oth; __sum16 csum; /* * Don't play with runt packets. */ if(isize<sizeof(struct iphdr)) return isize; ip = (struct iphdr *) icp; if (ip->version != 4 || ip->ihl < 5) return isize; /* Bail if this packet isn't TCP, or is an IP fragment */ if (ip->protocol != IPPROTO_TCP || (ntohs(ip->frag_off) & 0x3fff)) { /* Send as regular IP */ if(ip->protocol != IPPROTO_TCP) comp->sls_o_nontcp++; else comp->sls_o_tcp++; return isize; } nlen = ip->ihl * 4; if (isize < nlen + sizeof(*th)) return isize; th = (struct tcphdr *)(icp + nlen); if (th->doff < sizeof(struct tcphdr) / 4) return isize; hlen = nlen + th->doff * 4; /* Bail if the TCP packet isn't `compressible' (i.e., ACK isn't set or * some other control bit is set). Also uncompressible if * it's a runt. */ if(hlen > isize || th->syn || th->fin || th->rst || ! (th->ack)){ /* TCP connection stuff; send as regular IP */ comp->sls_o_tcp++; return isize; } /* * Packet is compressible -- we're going to send either a * COMPRESSED_TCP or UNCOMPRESSED_TCP packet. Either way, * we need to locate (or create) the connection state. * * States are kept in a circularly linked list with * xmit_oldest pointing to the end of the list. The * list is kept in lru order by moving a state to the * head of the list whenever it is referenced. Since * the list is short and, empirically, the connection * we want is almost always near the front, we locate * states via linear search. If we don't find a state * for the datagram, the oldest state is (re-)used. */ for ( ; ; ) { if( ip->saddr == cs->cs_ip.saddr && ip->daddr == cs->cs_ip.daddr && th->source == cs->cs_tcp.source && th->dest == cs->cs_tcp.dest) goto found; /* if current equal oldest, at end of list */ if ( cs == ocs ) break; lcs = cs; cs = cs->next; comp->sls_o_searches++; } /* * Didn't find it -- re-use oldest cstate. Send an * uncompressed packet that tells the other side what * connection number we're using for this conversation. * * Note that since the state list is circular, the oldest * state points to the newest and we only need to set * xmit_oldest to update the lru linkage. */ comp->sls_o_misses++; comp->xmit_oldest = lcs->cs_this; goto uncompressed; found: /* * Found it -- move to the front on the connection list. */ if(lcs == ocs) { /* found at most recently used */ } else if (cs == ocs) { /* found at least recently used */ comp->xmit_oldest = lcs->cs_this; } else { /* more than 2 elements */ lcs->next = cs->next; cs->next = ocs->next; ocs->next = cs; } /* * Make sure that only what we expect to change changed. * Check the following: * IP protocol version, header length & type of service. * The "Don't fragment" bit. * The time-to-live field. * The TCP header length. * IP options, if any. * TCP options, if any. * If any of these things are different between the previous & * current datagram, we send the current datagram `uncompressed'. */ oth = &cs->cs_tcp; if(ip->version != cs->cs_ip.version || ip->ihl != cs->cs_ip.ihl || ip->tos != cs->cs_ip.tos || (ip->frag_off & htons(0x4000)) != (cs->cs_ip.frag_off & htons(0x4000)) || ip->ttl != cs->cs_ip.ttl || th->doff != cs->cs_tcp.doff || (ip->ihl > 5 && memcmp(ip+1,cs->cs_ipopt,((ip->ihl)-5)*4) != 0) || (th->doff > 5 && memcmp(th+1,cs->cs_tcpopt,((th->doff)-5)*4) != 0)){ goto uncompressed; } /* * Figure out which of the changing fields changed. The * receiver expects changes in the order: urgent, window, * ack, seq (the order minimizes the number of temporaries * needed in this section of code). */ if(th->urg){ deltaS = ntohs(th->urg_ptr); cp = encode(cp,deltaS); changes |= NEW_U; } else if(th->urg_ptr != oth->urg_ptr){ /* argh! URG not set but urp changed -- a sensible * implementation should never do this but RFC793 * doesn't prohibit the change so we have to deal * with it. */ goto uncompressed; } if((deltaS = ntohs(th->window) - ntohs(oth->window)) != 0){ cp = encode(cp,deltaS); changes |= NEW_W; } if((deltaA = ntohl(th->ack_seq) - ntohl(oth->ack_seq)) != 0L){ if(deltaA > 0x0000ffff) goto uncompressed; cp = encode(cp,deltaA); changes |= NEW_A; } if((deltaS = ntohl(th->seq) - ntohl(oth->seq)) != 0L){ if(deltaS > 0x0000ffff) goto uncompressed; cp = encode(cp,deltaS); changes |= NEW_S; } switch(changes){ case 0: /* Nothing changed. If this packet contains data and the * last one didn't, this is probably a data packet following * an ack (normal on an interactive connection) and we send * it compressed. Otherwise it's probably a retransmit, * retransmitted ack or window probe. Send it uncompressed * in case the other side missed the compressed version. */ if(ip->tot_len != cs->cs_ip.tot_len && ntohs(cs->cs_ip.tot_len) == hlen) break; goto uncompressed; case SPECIAL_I: case SPECIAL_D: /* actual changes match one of our special case encodings -- * send packet uncompressed. */ goto uncompressed; case NEW_S|NEW_A: if(deltaS == deltaA && deltaS == ntohs(cs->cs_ip.tot_len) - hlen){ /* special case for echoed terminal traffic */ changes = SPECIAL_I; cp = new_seq; } break; case NEW_S: if(deltaS == ntohs(cs->cs_ip.tot_len) - hlen){ /* special case for data xfer */ changes = SPECIAL_D; cp = new_seq; } break; } deltaS = ntohs(ip->id) - ntohs(cs->cs_ip.id); if(deltaS != 1){ cp = encode(cp,deltaS); changes |= NEW_I; } if(th->psh) changes |= TCP_PUSH_BIT; /* Grab the cksum before we overwrite it below. Then update our * state with this packet's header. */ csum = th->check; memcpy(&cs->cs_ip,ip,20); memcpy(&cs->cs_tcp,th,20); /* We want to use the original packet as our compressed packet. * (cp - new_seq) is the number of bytes we need for compressed * sequence numbers. In addition we need one byte for the change * mask, one for the connection id and two for the tcp checksum. * So, (cp - new_seq) + 4 bytes of header are needed. */ deltaS = cp - new_seq; if(compress_cid == 0 || comp->xmit_current != cs->cs_this){ cp = ocp; *cpp = ocp; *cp++ = changes | NEW_C; *cp++ = cs->cs_this; comp->xmit_current = cs->cs_this; } else { cp = ocp; *cpp = ocp; *cp++ = changes; } *(__sum16 *)cp = csum; cp += 2; /* deltaS is now the size of the change section of the compressed header */ memcpy(cp,new_seq,deltaS); /* Write list of deltas */ memcpy(cp+deltaS,icp+hlen,isize-hlen); comp->sls_o_compressed++; ocp[0] |= SL_TYPE_COMPRESSED_TCP; return isize - hlen + deltaS + (cp - ocp); /* Update connection state cs & send uncompressed packet (i.e., * a regular ip/tcp packet but with the 'conversation id' we hope * to use on future compressed packets in the protocol field). */ uncompressed: memcpy(&cs->cs_ip,ip,20); memcpy(&cs->cs_tcp,th,20); if (ip->ihl > 5) memcpy(cs->cs_ipopt, ip+1, ((ip->ihl) - 5) * 4); if (th->doff > 5) memcpy(cs->cs_tcpopt, th+1, ((th->doff) - 5) * 4); comp->xmit_current = cs->cs_this; comp->sls_o_uncompressed++; memcpy(ocp, icp, isize); *cpp = ocp; ocp[9] = cs->cs_this; ocp[0] |= SL_TYPE_UNCOMPRESSED_TCP; return isize; } int slhc_uncompress(struct slcompress *comp, unsigned char *icp, int isize) { int changes; long x; struct tcphdr *thp; struct iphdr *ip; struct cstate *cs; int len, hdrlen; unsigned char *cp = icp; /* We've got a compressed packet; read the change byte */ comp->sls_i_compressed++; if(isize < 3){ comp->sls_i_error++; return 0; } changes = *cp++; if(changes & NEW_C){ /* Make sure the state index is in range, then grab the state. * If we have a good state index, clear the 'discard' flag. */ x = *cp++; /* Read conn index */ if(x < 0 || x > comp->rslot_limit) goto bad; /* Check if the cstate is initialized */ if (!comp->rstate[x].initialized) goto bad; comp->flags &=~ SLF_TOSS; comp->recv_current = x; } else { /* this packet has an implicit state index. If we've * had a line error since the last time we got an * explicit state index, we have to toss the packet. */ if(comp->flags & SLF_TOSS){ comp->sls_i_tossed++; return 0; } } cs = &comp->rstate[comp->recv_current]; thp = &cs->cs_tcp; ip = &cs->cs_ip; thp->check = *(__sum16 *)cp; cp += 2; thp->psh = (changes & TCP_PUSH_BIT) ? 1 : 0; /* * we can use the same number for the length of the saved header and * the current one, because the packet wouldn't have been sent * as compressed unless the options were the same as the previous one */ hdrlen = ip->ihl * 4 + thp->doff * 4; switch(changes & SPECIALS_MASK){ case SPECIAL_I: /* Echoed terminal traffic */ { short i; i = ntohs(ip->tot_len) - hdrlen; thp->ack_seq = htonl( ntohl(thp->ack_seq) + i); thp->seq = htonl( ntohl(thp->seq) + i); } break; case SPECIAL_D: /* Unidirectional data */ thp->seq = htonl( ntohl(thp->seq) + ntohs(ip->tot_len) - hdrlen); break; default: if(changes & NEW_U){ thp->urg = 1; if((x = decode(&cp)) == -1) { goto bad; } thp->urg_ptr = htons(x); } else thp->urg = 0; if(changes & NEW_W){ if((x = decode(&cp)) == -1) { goto bad; } thp->window = htons( ntohs(thp->window) + x); } if(changes & NEW_A){ if((x = decode(&cp)) == -1) { goto bad; } thp->ack_seq = htonl( ntohl(thp->ack_seq) + x); } if(changes & NEW_S){ if((x = decode(&cp)) == -1) { goto bad; } thp->seq = htonl( ntohl(thp->seq) + x); } break; } if(changes & NEW_I){ if((x = decode(&cp)) == -1) { goto bad; } ip->id = htons (ntohs (ip->id) + x); } else ip->id = htons (ntohs (ip->id) + 1); /* * At this point, cp points to the first byte of data in the * packet. Put the reconstructed TCP and IP headers back on the * packet. Recalculate IP checksum (but not TCP checksum). */ len = isize - (cp - icp); if (len < 0) goto bad; len += hdrlen; ip->tot_len = htons(len); ip->check = 0; memmove(icp + hdrlen, cp, len - hdrlen); cp = icp; memcpy(cp, ip, 20); cp += 20; if (ip->ihl > 5) { memcpy(cp, cs->cs_ipopt, (ip->ihl - 5) * 4); cp += (ip->ihl - 5) * 4; } put_unaligned(ip_fast_csum(icp, ip->ihl), &((struct iphdr *)icp)->check); memcpy(cp, thp, 20); cp += 20; if (thp->doff > 5) { memcpy(cp, cs->cs_tcpopt, ((thp->doff) - 5) * 4); cp += ((thp->doff) - 5) * 4; } return len; bad: comp->sls_i_error++; return slhc_toss( comp ); } int slhc_remember(struct slcompress *comp, unsigned char *icp, int isize) { struct cstate *cs; unsigned ihl; unsigned char index; if(isize < 20) { /* The packet is shorter than a legal IP header */ comp->sls_i_runt++; return slhc_toss( comp ); } /* Peek at the IP header's IHL field to find its length */ ihl = icp[0] & 0xf; if(ihl < 20 / 4){ /* The IP header length field is too small */ comp->sls_i_runt++; return slhc_toss( comp ); } index = icp[9]; icp[9] = IPPROTO_TCP; if (ip_fast_csum(icp, ihl)) { /* Bad IP header checksum; discard */ comp->sls_i_badcheck++; return slhc_toss( comp ); } if(index > comp->rslot_limit) { comp->sls_i_error++; return slhc_toss(comp); } /* Update local state */ cs = &comp->rstate[comp->recv_current = index]; comp->flags &=~ SLF_TOSS; memcpy(&cs->cs_ip,icp,20); memcpy(&cs->cs_tcp,icp + ihl*4,20); if (ihl > 5) memcpy(cs->cs_ipopt, icp + sizeof(struct iphdr), (ihl - 5) * 4); if (cs->cs_tcp.doff > 5) memcpy(cs->cs_tcpopt, icp + ihl*4 + sizeof(struct tcphdr), (cs->cs_tcp.doff - 5) * 4); cs->cs_hsize = ihl*2 + cs->cs_tcp.doff*2; cs->initialized = true; /* Put headers back on packet * Neither header checksum is recalculated */ comp->sls_i_uncompressed++; return isize; } int slhc_toss(struct slcompress *comp) { if ( comp == NULLSLCOMPR ) return 0; comp->flags |= SLF_TOSS; return 0; } #else /* CONFIG_INET */ int slhc_toss(struct slcompress *comp) { printk(KERN_DEBUG "Called IP function on non IP-system: slhc_toss"); return -EINVAL; } int slhc_uncompress(struct slcompress *comp, unsigned char *icp, int isize) { printk(KERN_DEBUG "Called IP function on non IP-system: slhc_uncompress"); return -EINVAL; } int slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, unsigned char *ocp, unsigned char **cpp, int compress_cid) { printk(KERN_DEBUG "Called IP function on non IP-system: slhc_compress"); return -EINVAL; } int slhc_remember(struct slcompress *comp, unsigned char *icp, int isize) { printk(KERN_DEBUG "Called IP function on non IP-system: slhc_remember"); return -EINVAL; } void slhc_free(struct slcompress *comp) { printk(KERN_DEBUG "Called IP function on non IP-system: slhc_free"); } struct slcompress * slhc_init(int rslots, int tslots) { printk(KERN_DEBUG "Called IP function on non IP-system: slhc_init"); return NULL; } #endif /* CONFIG_INET */ /* VJ header compression */ EXPORT_SYMBOL(slhc_init); EXPORT_SYMBOL(slhc_free); EXPORT_SYMBOL(slhc_remember); EXPORT_SYMBOL(slhc_compress); EXPORT_SYMBOL(slhc_uncompress); EXPORT_SYMBOL(slhc_toss); MODULE_DESCRIPTION("Compression helpers for SLIP (serial line)"); MODULE_LICENSE("Dual BSD/GPL"); |
| 276 276 276 276 276 1 1 1 276 276 1 1 241 43 28 43 275 274 274 273 273 266 265 15 16 276 276 276 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 | // SPDX-License-Identifier: GPL-2.0-only /* * Pluggable TCP upper layer protocol support. * * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved. * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved. * */ #include <linux/module.h> #include <linux/mm.h> #include <linux/types.h> #include <linux/list.h> #include <linux/gfp.h> #include <net/tcp.h> static DEFINE_SPINLOCK(tcp_ulp_list_lock); static LIST_HEAD(tcp_ulp_list); /* Simple linear search, don't expect many entries! */ static struct tcp_ulp_ops *tcp_ulp_find(const char *name) { struct tcp_ulp_ops *e; list_for_each_entry_rcu(e, &tcp_ulp_list, list, lockdep_is_held(&tcp_ulp_list_lock)) { if (strcmp(e->name, name) == 0) return e; } return NULL; } static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name) { const struct tcp_ulp_ops *ulp = NULL; rcu_read_lock(); ulp = tcp_ulp_find(name); #ifdef CONFIG_MODULES if (!ulp && capable(CAP_NET_ADMIN)) { rcu_read_unlock(); request_module("tcp-ulp-%s", name); rcu_read_lock(); ulp = tcp_ulp_find(name); } #endif if (!ulp || !try_module_get(ulp->owner)) ulp = NULL; rcu_read_unlock(); return ulp; } /* Attach new upper layer protocol to the list * of available protocols. */ int tcp_register_ulp(struct tcp_ulp_ops *ulp) { int ret = 0; spin_lock(&tcp_ulp_list_lock); if (tcp_ulp_find(ulp->name)) ret = -EEXIST; else list_add_tail_rcu(&ulp->list, &tcp_ulp_list); spin_unlock(&tcp_ulp_list_lock); return ret; } EXPORT_SYMBOL_GPL(tcp_register_ulp); void tcp_unregister_ulp(struct tcp_ulp_ops *ulp) { spin_lock(&tcp_ulp_list_lock); list_del_rcu(&ulp->list); spin_unlock(&tcp_ulp_list_lock); synchronize_rcu(); } EXPORT_SYMBOL_GPL(tcp_unregister_ulp); /* Build string with list of available upper layer protocl values */ void tcp_get_available_ulp(char *buf, size_t maxlen) { struct tcp_ulp_ops *ulp_ops; size_t offs = 0; *buf = '\0'; rcu_read_lock(); list_for_each_entry_rcu(ulp_ops, &tcp_ulp_list, list) { offs += snprintf(buf + offs, maxlen - offs, "%s%s", offs == 0 ? "" : " ", ulp_ops->name); if (WARN_ON_ONCE(offs >= maxlen)) break; } rcu_read_unlock(); } void tcp_update_ulp(struct sock *sk, struct proto *proto, void (*write_space)(struct sock *sk)) { struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ulp_ops->update) icsk->icsk_ulp_ops->update(sk, proto, write_space); } void tcp_cleanup_ulp(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); /* No sock_owned_by_me() check here as at the time the * stack calls this function, the socket is dead and * about to be destroyed. */ if (!icsk->icsk_ulp_ops) return; if (icsk->icsk_ulp_ops->release) icsk->icsk_ulp_ops->release(sk); module_put(icsk->icsk_ulp_ops->owner); icsk->icsk_ulp_ops = NULL; } static int __tcp_set_ulp(struct sock *sk, const struct tcp_ulp_ops *ulp_ops) { struct inet_connection_sock *icsk = inet_csk(sk); int err; err = -EEXIST; if (icsk->icsk_ulp_ops) goto out_err; if (sk->sk_socket) clear_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags); err = -ENOTCONN; if (!ulp_ops->clone && sk->sk_state == TCP_LISTEN) goto out_err; err = ulp_ops->init(sk); if (err) goto out_err; icsk->icsk_ulp_ops = ulp_ops; return 0; out_err: module_put(ulp_ops->owner); return err; } int tcp_set_ulp(struct sock *sk, const char *name) { const struct tcp_ulp_ops *ulp_ops; sock_owned_by_me(sk); ulp_ops = __tcp_ulp_find_autoload(name); if (!ulp_ops) return -ENOENT; return __tcp_set_ulp(sk, ulp_ops); } |
| 7872 7965 7967 442 357 87 24 7735 14 14 14 14 6 7754 7967 54 54 6323 717 718 714 2 1 1 1 717 719 7946 7813 7812 7813 9 7810 14 9 2 2 14 14 6 6 8562 8565 8540 59 14 6 56 4 4 56 8560 8568 6 1187 565 538 8559 722 155 611 885 36 8566 5213 2446 4408 8447 4329 1 8572 8566 4328 4292 8572 8571 8561 8562 6667 6774 7981 2 2855 7796 32 7800 32 7758 7757 3010 12 12 12 12 7760 30 13 30 7776 7780 7760 7754 7724 29 7722 2433 131 7754 7739 37 33 37 37 7738 7742 7742 3 3 7743 7737 854 4 4 7741 2229 7725 7726 7725 7729 7733 7731 5 38 202 202 200 24 176 176 176 176 202 26 26 26 192 8078 192 7346 5646 5638 5647 7354 4 7352 7351 1688 5 1 5428 5425 5278 5277 324 5428 5340 5342 3 5343 5341 5344 5194 327 328 329 5342 5344 882 881 1 880 880 880 2 2006 1372 2007 1555 1262 43 43 43 43 53 3 2 2 1 1 1 31 27 27 29 30 30 30 86 83 5 83 83 83 85 8 8 8 2 6 8 15 9 14 14 642 625 643 18 639 623 623 1 624 625 17 15 1 627 630 631 631 2839 2809 2572 201 27 2 26 26 26 201 201 201 200 201 6400 5746 5743 25 23 6 5731 5728 5729 5733 54 5730 7122 6918 5749 30 26 2841 427 426 810 5 2527 2531 53 53 53 991 914 989 913 913 32 7065 7041 1911 6912 7067 172 172 168 1977 972 746 60 60 60 3222 3225 3220 16 3 3 3215 67 3221 62 368 375 375 374 363 2026 2027 12 12 2029 16 16 2027 1262 2027 2027 2025 870 2001 756 756 2027 5 2027 1403 1356 84 1047 1402 1391 2011 342 2 339 2011 885 7119 7126 398 7035 2048 2030 7012 6767 6766 2537 425 7007 5926 2002 869 870 2033 351 342 80 7 77 77 77 340 340 340 12 11 12 256 254 8 8 2 6 6 252 1 4 3 3452 482 117 483 351 256 482 482 482 3 3 3454 7477 3412 8 3418 6867 6868 1633 6860 1659 7486 4210 14878 14876 14878 14383 13454 14880 525 320 525 7448 2466 2466 7463 2349 6532 640 7760 5097 7763 118 7757 7729 375 7737 7738 482 3127 7592 7734 7277 7280 7712 7706 1643 7276 7271 1721 127 1655 7271 10 5 7792 7795 82 7799 7780 143 7793 7794 7796 33 19 33 19 14 7780 5085 5085 1 3594 3423 3412 3414 3408 65 387 3 2 1 344 323 10 335 313 313 23 23 335 3543 10 7795 2343 6 2340 77 1 78 2372 12 12 2374 2343 2374 66 2225 2222 125 2221 2371 2404 157 2281 2280 9 2276 1 2271 2137 2280 1029 1029 1025 998 998 1026 1083 69 1029 1029 3 1027 1026 998 1027 276 654 12 1738 1744 1741 1739 1736 1738 1734 1734 1734 1506 1508 9 1500 1497 202 202 202 199 382 1 383 368 291 290 278 61 230 200 1 1 201 201 201 201 2106 11 11 1 11 316 316 316 315 315 313 314 314 313 312 311 311 316 102 231 311 311 974 974 56 974 813 974 973 194 193 14 182 183 18 166 166 194 177 194 179 22 177 192 276 1368 260 263 262 260 260 257 49 49 49 49 42 3721 3720 5868 5870 202 198 3566 3621 3621 2525 5865 5847 4 3 5870 130 130 129 130 128 1 632 632 622 1 64 64 1 13 61 47 9 38 38 38 21 2555 2552 2554 2314 2554 327 4 4 2555 2315 1065 2315 640 63 640 638 20 2315 64 59 12 2272 2270 103 2271 94 1 2211 558 557 553 702 112 94 5907 92 2 92 5851 5248 3 5250 5247 1753 897 858 897 896 2553 1773 896 1753 2555 2473 681 2472 9 2500 1750 1773 2473 559 5440 354 5444 486 5794 5447 5795 5448 5792 859 858 851 5788 5781 559 5436 144 5783 5758 5754 5597 5596 130 840 839 134 164 163 163 163 163 160 163 160 160 158 160 157 157 156 2 9 8 8 7 8 91 83 83 20 5987 5987 9 5980 93 5934 5938 5909 5596 5795 5802 216 5638 1221 5939 1220 5975 5975 51 5925 5928 5927 21 21 21 21 20 20 20 20 891 5 76 846 845 845 842 828 815 890 133 154 153 156 57 708 5 107 107 107 107 21 106 107 107 107 106 103 174 174 174 25 160 160 158 110 105 20 30 59 173 95 82 640 640 639 637 635 636 611 602 602 181 552 552 549 532 1 599 385 245 245 66 66 64 64 64 7 63 63 63 57 6 59 64 76 76 68 66 67 2 64 64 63 64 65 66 74 57 57 180 180 179 179 179 179 178 1 178 174 1 177 174 174 174 63 63 50 49 48 4 45 39 39 44 37 46 46 48 1 61 3 2 46 45 22 23 40 40 52 53 53 52 52 49 64 2 62 12 53 52 53 1 64 39 25 60 60 59 59 59 59 59 55 55 29 42 55 1 55 49 29 38 49 59 59 4 59 47 3 44 43 43 40 40 39 43 58 29 32 226 222 218 46 25 219 218 217 42 18 41 4 217 216 216 216 216 17 21 18 191 3 6 184 215 215 214 214 190 1 214 29 1 214 203 19 3 19 1 203 186 22 211 197 211 40 211 203 203 211 223 143 141 140 141 120 120 118 118 1 117 116 116 86 114 105 3 100 101 22 22 12 100 69 70 99 11 98 97 92 97 112 113 112 113 117 1 139 81 9 55 18 18 18 18 19 19 10 9 9 9 9 9 7 9 9 9 9 11 7 7 2 11 10 10 11 12 7 7 7 7 6 6 6 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 | // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/namei.c * * Copyright (C) 1991, 1992 Linus Torvalds */ /* * Some corrections by tytso. */ /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname * lookup logic. */ /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture. */ #include <linux/init.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/wordpart.h> #include <linux/fs.h> #include <linux/filelock.h> #include <linux/namei.h> #include <linux/pagemap.h> #include <linux/sched/mm.h> #include <linux/fsnotify.h> #include <linux/personality.h> #include <linux/security.h> #include <linux/syscalls.h> #include <linux/mount.h> #include <linux/audit.h> #include <linux/capability.h> #include <linux/file.h> #include <linux/fcntl.h> #include <linux/device_cgroup.h> #include <linux/fs_struct.h> #include <linux/posix_acl.h> #include <linux/hash.h> #include <linux/bitops.h> #include <linux/init_task.h> #include <linux/uaccess.h> #include "internal.h" #include "mount.h" /* [Feb-1997 T. Schoebel-Theuer] * Fundamental changes in the pathname lookup mechanisms (namei) * were necessary because of omirr. The reason is that omirr needs * to know the _real_ pathname, not the user-supplied one, in case * of symlinks (and also when transname replacements occur). * * The new code replaces the old recursive symlink resolution with * an iterative one (in case of non-nested symlink chains). It does * this with calls to <fs>_follow_link(). * As a side effect, dir_namei(), _namei() and follow_link() are now * replaced with a single function lookup_dentry() that can handle all * the special cases of the former code. * * With the new dcache, the pathname is stored at each inode, at least as * long as the refcount of the inode is positive. As a side effect, the * size of the dcache depends on the inode cache and thus is dynamic. * * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink * resolution to correspond with current state of the code. * * Note that the symlink resolution is not *completely* iterative. * There is still a significant amount of tail- and mid- recursion in * the algorithm. Also, note that <fs>_readlink() is not used in * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink() * may return different results than <fs>_follow_link(). Many virtual * filesystems (including /proc) exhibit this behavior. */ /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation: * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL * and the name already exists in form of a symlink, try to create the new * name indicated by the symlink. The old code always complained that the * name already exists, due to not following the symlink even if its target * is nonexistent. The new semantics affects also mknod() and link() when * the name is a symlink pointing to a non-existent name. * * I don't know which semantics is the right one, since I have no access * to standards. But I found by trial that HP-UX 9.0 has the full "new" * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the * "old" one. Personally, I think the new semantics is much more logical. * Note that "ln old new" where "new" is a symlink pointing to a non-existing * file does succeed in both HP-UX and SunOs, but not in Solaris * and in the old Linux semantics. */ /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink * semantics. See the comments in "open_namei" and "do_link" below. * * [10-Sep-98 Alan Modra] Another symlink change. */ /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks: * inside the path - always follow. * in the last component in creation/removal/renaming - never follow. * if LOOKUP_FOLLOW passed - follow. * if the pathname has trailing slashes - follow. * otherwise - don't follow. * (applied in that order). * * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT * restored for 2.4. This is the last surviving part of old 4.2BSD bug. * During the 2.4 we need to fix the userland stuff depending on it - * hopefully we will be able to get rid of that wart in 2.5. So far only * XEmacs seems to be relying on it... */ /* * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland) * implemented. Let's see if raised priority of ->s_vfs_rename_mutex gives * any extra contention... */ /* In order to reduce some races, while at the same time doing additional * checking and hopefully speeding things up, we copy filenames to the * kernel data space before using them.. * * POSIX.1 2.4: an empty pathname is invalid (ENOENT). * PATH_MAX includes the nul terminator --RR. */ #define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname)) struct filename * getname_flags(const char __user *filename, int flags) { struct filename *result; char *kname; int len; result = audit_reusename(filename); if (result) return result; result = __getname(); if (unlikely(!result)) return ERR_PTR(-ENOMEM); /* * First, try to embed the struct filename inside the names_cache * allocation */ kname = (char *)result->iname; result->name = kname; len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX); /* * Handle both empty path and copy failure in one go. */ if (unlikely(len <= 0)) { if (unlikely(len < 0)) { __putname(result); return ERR_PTR(len); } /* The empty path is special. */ if (!(flags & LOOKUP_EMPTY)) { __putname(result); return ERR_PTR(-ENOENT); } } /* * Uh-oh. We have a name that's approaching PATH_MAX. Allocate a * separate struct filename so we can dedicate the entire * names_cache allocation for the pathname, and re-do the copy from * userland. */ if (unlikely(len == EMBEDDED_NAME_MAX)) { const size_t size = offsetof(struct filename, iname[1]); kname = (char *)result; /* * size is chosen that way we to guarantee that * result->iname[0] is within the same object and that * kname can't be equal to result->iname, no matter what. */ result = kzalloc(size, GFP_KERNEL); if (unlikely(!result)) { __putname(kname); return ERR_PTR(-ENOMEM); } result->name = kname; len = strncpy_from_user(kname, filename, PATH_MAX); if (unlikely(len < 0)) { __putname(kname); kfree(result); return ERR_PTR(len); } /* The empty path is special. */ if (unlikely(!len) && !(flags & LOOKUP_EMPTY)) { __putname(kname); kfree(result); return ERR_PTR(-ENOENT); } if (unlikely(len == PATH_MAX)) { __putname(kname); kfree(result); return ERR_PTR(-ENAMETOOLONG); } } atomic_set(&result->refcnt, 1); result->uptr = filename; result->aname = NULL; audit_getname(result); return result; } struct filename * getname_uflags(const char __user *filename, int uflags) { int flags = (uflags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0; return getname_flags(filename, flags); } struct filename * getname(const char __user * filename) { return getname_flags(filename, 0); } struct filename * getname_kernel(const char * filename) { struct filename *result; int len = strlen(filename) + 1; result = __getname(); if (unlikely(!result)) return ERR_PTR(-ENOMEM); if (len <= EMBEDDED_NAME_MAX) { result->name = (char *)result->iname; } else if (len <= PATH_MAX) { const size_t size = offsetof(struct filename, iname[1]); struct filename *tmp; tmp = kmalloc(size, GFP_KERNEL); if (unlikely(!tmp)) { __putname(result); return ERR_PTR(-ENOMEM); } tmp->name = (char *)result; result = tmp; } else { __putname(result); return ERR_PTR(-ENAMETOOLONG); } memcpy((char *)result->name, filename, len); result->uptr = NULL; result->aname = NULL; atomic_set(&result->refcnt, 1); audit_getname(result); return result; } EXPORT_SYMBOL(getname_kernel); void putname(struct filename *name) { if (IS_ERR(name)) return; if (WARN_ON_ONCE(!atomic_read(&name->refcnt))) return; if (!atomic_dec_and_test(&name->refcnt)) return; if (name->name != name->iname) { __putname(name->name); kfree(name); } else __putname(name); } EXPORT_SYMBOL(putname); /** * check_acl - perform ACL permission checking * @idmap: idmap of the mount the inode was found from * @inode: inode to check permissions on * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...) * * This function performs the ACL permission checking. Since this function * retrieve POSIX acls it needs to know whether it is called from a blocking or * non-blocking context and thus cares about the MAY_NOT_BLOCK bit. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ static int check_acl(struct mnt_idmap *idmap, struct inode *inode, int mask) { #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *acl; if (mask & MAY_NOT_BLOCK) { acl = get_cached_acl_rcu(inode, ACL_TYPE_ACCESS); if (!acl) return -EAGAIN; /* no ->get_inode_acl() calls in RCU mode... */ if (is_uncached_acl(acl)) return -ECHILD; return posix_acl_permission(idmap, inode, acl, mask); } acl = get_inode_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl) { int error = posix_acl_permission(idmap, inode, acl, mask); posix_acl_release(acl); return error; } #endif return -EAGAIN; } /** * acl_permission_check - perform basic UNIX permission checking * @idmap: idmap of the mount the inode was found from * @inode: inode to check permissions on * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...) * * This function performs the basic UNIX permission checking. Since this * function may retrieve POSIX acls it needs to know whether it is called from a * blocking or non-blocking context and thus cares about the MAY_NOT_BLOCK bit. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ static int acl_permission_check(struct mnt_idmap *idmap, struct inode *inode, int mask) { unsigned int mode = inode->i_mode; vfsuid_t vfsuid; /* Are we the owner? If so, ACL's don't matter */ vfsuid = i_uid_into_vfsuid(idmap, inode); if (likely(vfsuid_eq_kuid(vfsuid, current_fsuid()))) { mask &= 7; mode >>= 6; return (mask & ~mode) ? -EACCES : 0; } /* Do we have ACL's? */ if (IS_POSIXACL(inode) && (mode & S_IRWXG)) { int error = check_acl(idmap, inode, mask); if (error != -EAGAIN) return error; } /* Only RWX matters for group/other mode bits */ mask &= 7; /* * Are the group permissions different from * the other permissions in the bits we care * about? Need to check group ownership if so. */ if (mask & (mode ^ (mode >> 3))) { vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode); if (vfsgid_in_group_p(vfsgid)) mode >>= 3; } /* Bits in 'mode' clear that we require? */ return (mask & ~mode) ? -EACCES : 0; } /** * generic_permission - check for access rights on a Posix-like filesystem * @idmap: idmap of the mount the inode was found from * @inode: inode to check access rights for * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, * %MAY_NOT_BLOCK ...) * * Used to check for read/write/execute permissions on a file. * We use "fsuid" for this, letting us set arbitrary permissions * for filesystem access without changing the "normal" uids which * are used for other things. * * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk * request cannot be satisfied (eg. requires blocking or too much complexity). * It would then be called again in ref-walk mode. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ int generic_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { int ret; /* * Do the basic permission checks. */ ret = acl_permission_check(idmap, inode, mask); if (ret != -EACCES) return ret; if (S_ISDIR(inode->i_mode)) { /* DACs are overridable for directories */ if (!(mask & MAY_WRITE)) if (capable_wrt_inode_uidgid(idmap, inode, CAP_DAC_READ_SEARCH)) return 0; if (capable_wrt_inode_uidgid(idmap, inode, CAP_DAC_OVERRIDE)) return 0; return -EACCES; } /* * Searching includes executable on directories, else just read. */ mask &= MAY_READ | MAY_WRITE | MAY_EXEC; if (mask == MAY_READ) if (capable_wrt_inode_uidgid(idmap, inode, CAP_DAC_READ_SEARCH)) return 0; /* * Read/write DACs are always overridable. * Executable DACs are overridable when there is * at least one exec bit set. */ if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO)) if (capable_wrt_inode_uidgid(idmap, inode, CAP_DAC_OVERRIDE)) return 0; return -EACCES; } EXPORT_SYMBOL(generic_permission); /** * do_inode_permission - UNIX permission checking * @idmap: idmap of the mount the inode was found from * @inode: inode to check permissions on * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...) * * We _really_ want to just do "generic_permission()" without * even looking at the inode->i_op values. So we keep a cache * flag in inode->i_opflags, that says "this has not special * permission function, use the fast case". */ static inline int do_inode_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) { if (likely(inode->i_op->permission)) return inode->i_op->permission(idmap, inode, mask); /* This gets set once for the inode lifetime */ spin_lock(&inode->i_lock); inode->i_opflags |= IOP_FASTPERM; spin_unlock(&inode->i_lock); } return generic_permission(idmap, inode, mask); } /** * sb_permission - Check superblock-level permissions * @sb: Superblock of inode to check permission on * @inode: Inode to check permission on * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) * * Separate out file-system wide checks from inode-specific permission checks. */ static int sb_permission(struct super_block *sb, struct inode *inode, int mask) { if (unlikely(mask & MAY_WRITE)) { umode_t mode = inode->i_mode; /* Nobody gets write access to a read-only fs. */ if (sb_rdonly(sb) && (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) return -EROFS; } return 0; } /** * inode_permission - Check for access rights to a given inode * @idmap: idmap of the mount the inode was found from * @inode: Inode to check permission on * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) * * Check for read/write/execute permissions on an inode. We use fs[ug]id for * this, letting us set arbitrary permissions for filesystem access without * changing the "normal" UIDs which are used for other things. * * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask. */ int inode_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { int retval; retval = sb_permission(inode->i_sb, inode, mask); if (retval) return retval; if (unlikely(mask & MAY_WRITE)) { /* * Nobody gets write access to an immutable file. */ if (IS_IMMUTABLE(inode)) return -EPERM; /* * Updating mtime will likely cause i_uid and i_gid to be * written back improperly if their true value is unknown * to the vfs. */ if (HAS_UNMAPPED_ID(idmap, inode)) return -EACCES; } retval = do_inode_permission(idmap, inode, mask); if (retval) return retval; retval = devcgroup_inode_permission(inode, mask); if (retval) return retval; return security_inode_permission(inode, mask); } EXPORT_SYMBOL(inode_permission); /** * path_get - get a reference to a path * @path: path to get the reference to * * Given a path increment the reference count to the dentry and the vfsmount. */ void path_get(const struct path *path) { mntget(path->mnt); dget(path->dentry); } EXPORT_SYMBOL(path_get); /** * path_put - put a reference to a path * @path: path to put the reference to * * Given a path decrement the reference count to the dentry and the vfsmount. */ void path_put(const struct path *path) { dput(path->dentry); mntput(path->mnt); } EXPORT_SYMBOL(path_put); #define EMBEDDED_LEVELS 2 struct nameidata { struct path path; struct qstr last; struct path root; struct inode *inode; /* path.dentry.d_inode */ unsigned int flags, state; unsigned seq, next_seq, m_seq, r_seq; int last_type; unsigned depth; int total_link_count; struct saved { struct path link; struct delayed_call done; const char *name; unsigned seq; } *stack, internal[EMBEDDED_LEVELS]; struct filename *name; struct nameidata *saved; unsigned root_seq; int dfd; vfsuid_t dir_vfsuid; umode_t dir_mode; } __randomize_layout; #define ND_ROOT_PRESET 1 #define ND_ROOT_GRABBED 2 #define ND_JUMPED 4 static void __set_nameidata(struct nameidata *p, int dfd, struct filename *name) { struct nameidata *old = current->nameidata; p->stack = p->internal; p->depth = 0; p->dfd = dfd; p->name = name; p->path.mnt = NULL; p->path.dentry = NULL; p->total_link_count = old ? old->total_link_count : 0; p->saved = old; current->nameidata = p; } static inline void set_nameidata(struct nameidata *p, int dfd, struct filename *name, const struct path *root) { __set_nameidata(p, dfd, name); p->state = 0; if (unlikely(root)) { p->state = ND_ROOT_PRESET; p->root = *root; } } static void restore_nameidata(void) { struct nameidata *now = current->nameidata, *old = now->saved; current->nameidata = old; if (old) old->total_link_count = now->total_link_count; if (now->stack != now->internal) kfree(now->stack); } static bool nd_alloc_stack(struct nameidata *nd) { struct saved *p; p= kmalloc_array(MAXSYMLINKS, sizeof(struct saved), nd->flags & LOOKUP_RCU ? GFP_ATOMIC : GFP_KERNEL); if (unlikely(!p)) return false; memcpy(p, nd->internal, sizeof(nd->internal)); nd->stack = p; return true; } /** * path_connected - Verify that a dentry is below mnt.mnt_root * @mnt: The mountpoint to check. * @dentry: The dentry to check. * * Rename can sometimes move a file or directory outside of a bind * mount, path_connected allows those cases to be detected. */ static bool path_connected(struct vfsmount *mnt, struct dentry *dentry) { struct super_block *sb = mnt->mnt_sb; /* Bind mounts can have disconnected paths */ if (mnt->mnt_root == sb->s_root) return true; return is_subdir(dentry, mnt->mnt_root); } static void drop_links(struct nameidata *nd) { int i = nd->depth; while (i--) { struct saved *last = nd->stack + i; do_delayed_call(&last->done); clear_delayed_call(&last->done); } } static void leave_rcu(struct nameidata *nd) { nd->flags &= ~LOOKUP_RCU; nd->seq = nd->next_seq = 0; rcu_read_unlock(); } static void terminate_walk(struct nameidata *nd) { drop_links(nd); if (!(nd->flags & LOOKUP_RCU)) { int i; path_put(&nd->path); for (i = 0; i < nd->depth; i++) path_put(&nd->stack[i].link); if (nd->state & ND_ROOT_GRABBED) { path_put(&nd->root); nd->state &= ~ND_ROOT_GRABBED; } } else { leave_rcu(nd); } nd->depth = 0; nd->path.mnt = NULL; nd->path.dentry = NULL; } /* path_put is needed afterwards regardless of success or failure */ static bool __legitimize_path(struct path *path, unsigned seq, unsigned mseq) { int res = __legitimize_mnt(path->mnt, mseq); if (unlikely(res)) { if (res > 0) path->mnt = NULL; path->dentry = NULL; return false; } if (unlikely(!lockref_get_not_dead(&path->dentry->d_lockref))) { path->dentry = NULL; return false; } return !read_seqcount_retry(&path->dentry->d_seq, seq); } static inline bool legitimize_path(struct nameidata *nd, struct path *path, unsigned seq) { return __legitimize_path(path, seq, nd->m_seq); } static bool legitimize_links(struct nameidata *nd) { int i; if (unlikely(nd->flags & LOOKUP_CACHED)) { drop_links(nd); nd->depth = 0; return false; } for (i = 0; i < nd->depth; i++) { struct saved *last = nd->stack + i; if (unlikely(!legitimize_path(nd, &last->link, last->seq))) { drop_links(nd); nd->depth = i + 1; return false; } } return true; } static bool legitimize_root(struct nameidata *nd) { /* Nothing to do if nd->root is zero or is managed by the VFS user. */ if (!nd->root.mnt || (nd->state & ND_ROOT_PRESET)) return true; nd->state |= ND_ROOT_GRABBED; return legitimize_path(nd, &nd->root, nd->root_seq); } /* * Path walking has 2 modes, rcu-walk and ref-walk (see * Documentation/filesystems/path-lookup.txt). In situations when we can't * continue in RCU mode, we attempt to drop out of rcu-walk mode and grab * normal reference counts on dentries and vfsmounts to transition to ref-walk * mode. Refcounts are grabbed at the last known good point before rcu-walk * got stuck, so ref-walk may continue from there. If this is not successful * (eg. a seqcount has changed), then failure is returned and it's up to caller * to restart the path walk from the beginning in ref-walk mode. */ /** * try_to_unlazy - try to switch to ref-walk mode. * @nd: nameidata pathwalk data * Returns: true on success, false on failure * * try_to_unlazy attempts to legitimize the current nd->path and nd->root * for ref-walk mode. * Must be called from rcu-walk context. * Nothing should touch nameidata between try_to_unlazy() failure and * terminate_walk(). */ static bool try_to_unlazy(struct nameidata *nd) { struct dentry *parent = nd->path.dentry; BUG_ON(!(nd->flags & LOOKUP_RCU)); if (unlikely(!legitimize_links(nd))) goto out1; if (unlikely(!legitimize_path(nd, &nd->path, nd->seq))) goto out; if (unlikely(!legitimize_root(nd))) goto out; leave_rcu(nd); BUG_ON(nd->inode != parent->d_inode); return true; out1: nd->path.mnt = NULL; nd->path.dentry = NULL; out: leave_rcu(nd); return false; } /** * try_to_unlazy_next - try to switch to ref-walk mode. * @nd: nameidata pathwalk data * @dentry: next dentry to step into * Returns: true on success, false on failure * * Similar to try_to_unlazy(), but here we have the next dentry already * picked by rcu-walk and want to legitimize that in addition to the current * nd->path and nd->root for ref-walk mode. Must be called from rcu-walk context. * Nothing should touch nameidata between try_to_unlazy_next() failure and * terminate_walk(). */ static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry) { int res; BUG_ON(!(nd->flags & LOOKUP_RCU)); if (unlikely(!legitimize_links(nd))) goto out2; res = __legitimize_mnt(nd->path.mnt, nd->m_seq); if (unlikely(res)) { if (res > 0) goto out2; goto out1; } if (unlikely(!lockref_get_not_dead(&nd->path.dentry->d_lockref))) goto out1; /* * We need to move both the parent and the dentry from the RCU domain * to be properly refcounted. And the sequence number in the dentry * validates *both* dentry counters, since we checked the sequence * number of the parent after we got the child sequence number. So we * know the parent must still be valid if the child sequence number is */ if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) goto out; if (read_seqcount_retry(&dentry->d_seq, nd->next_seq)) goto out_dput; /* * Sequence counts matched. Now make sure that the root is * still valid and get it if required. */ if (unlikely(!legitimize_root(nd))) goto out_dput; leave_rcu(nd); return true; out2: nd->path.mnt = NULL; out1: nd->path.dentry = NULL; out: leave_rcu(nd); return false; out_dput: leave_rcu(nd); dput(dentry); return false; } static inline int d_revalidate(struct dentry *dentry, unsigned int flags) { if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) return dentry->d_op->d_revalidate(dentry, flags); else return 1; } /** * complete_walk - successful completion of path walk * @nd: pointer nameidata * * If we had been in RCU mode, drop out of it and legitimize nd->path. * Revalidate the final result, unless we'd already done that during * the path walk or the filesystem doesn't ask for it. Return 0 on * success, -error on failure. In case of failure caller does not * need to drop nd->path. */ static int complete_walk(struct nameidata *nd) { struct dentry *dentry = nd->path.dentry; int status; if (nd->flags & LOOKUP_RCU) { /* * We don't want to zero nd->root for scoped-lookups or * externally-managed nd->root. */ if (!(nd->state & ND_ROOT_PRESET)) if (!(nd->flags & LOOKUP_IS_SCOPED)) nd->root.mnt = NULL; nd->flags &= ~LOOKUP_CACHED; if (!try_to_unlazy(nd)) return -ECHILD; } if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) { /* * While the guarantee of LOOKUP_IS_SCOPED is (roughly) "don't * ever step outside the root during lookup" and should already * be guaranteed by the rest of namei, we want to avoid a namei * BUG resulting in userspace being given a path that was not * scoped within the root at some point during the lookup. * * So, do a final sanity-check to make sure that in the * worst-case scenario (a complete bypass of LOOKUP_IS_SCOPED) * we won't silently return an fd completely outside of the * requested root to userspace. * * Userspace could move the path outside the root after this * check, but as discussed elsewhere this is not a concern (the * resolved file was inside the root at some point). */ if (!path_is_under(&nd->path, &nd->root)) return -EXDEV; } if (likely(!(nd->state & ND_JUMPED))) return 0; if (likely(!(dentry->d_flags & DCACHE_OP_WEAK_REVALIDATE))) return 0; status = dentry->d_op->d_weak_revalidate(dentry, nd->flags); if (status > 0) return 0; if (!status) status = -ESTALE; return status; } static int set_root(struct nameidata *nd) { struct fs_struct *fs = current->fs; /* * Jumping to the real root in a scoped-lookup is a BUG in namei, but we * still have to ensure it doesn't happen because it will cause a breakout * from the dirfd. */ if (WARN_ON(nd->flags & LOOKUP_IS_SCOPED)) return -ENOTRECOVERABLE; if (nd->flags & LOOKUP_RCU) { unsigned seq; do { seq = read_seqcount_begin(&fs->seq); nd->root = fs->root; nd->root_seq = __read_seqcount_begin(&nd->root.dentry->d_seq); } while (read_seqcount_retry(&fs->seq, seq)); } else { get_fs_root(fs, &nd->root); nd->state |= ND_ROOT_GRABBED; } return 0; } static int nd_jump_root(struct nameidata *nd) { if (unlikely(nd->flags & LOOKUP_BENEATH)) return -EXDEV; if (unlikely(nd->flags & LOOKUP_NO_XDEV)) { /* Absolute path arguments to path_init() are allowed. */ if (nd->path.mnt != NULL && nd->path.mnt != nd->root.mnt) return -EXDEV; } if (!nd->root.mnt) { int error = set_root(nd); if (error) return error; } if (nd->flags & LOOKUP_RCU) { struct dentry *d; nd->path = nd->root; d = nd->path.dentry; nd->inode = d->d_inode; nd->seq = nd->root_seq; if (read_seqcount_retry(&d->d_seq, nd->seq)) return -ECHILD; } else { path_put(&nd->path); nd->path = nd->root; path_get(&nd->path); nd->inode = nd->path.dentry->d_inode; } nd->state |= ND_JUMPED; return 0; } /* * Helper to directly jump to a known parsed path from ->get_link, * caller must have taken a reference to path beforehand. */ int nd_jump_link(const struct path *path) { int error = -ELOOP; struct nameidata *nd = current->nameidata; if (unlikely(nd->flags & LOOKUP_NO_MAGICLINKS)) goto err; error = -EXDEV; if (unlikely(nd->flags & LOOKUP_NO_XDEV)) { if (nd->path.mnt != path->mnt) goto err; } /* Not currently safe for scoped-lookups. */ if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) goto err; path_put(&nd->path); nd->path = *path; nd->inode = nd->path.dentry->d_inode; nd->state |= ND_JUMPED; return 0; err: path_put(path); return error; } static inline void put_link(struct nameidata *nd) { struct saved *last = nd->stack + --nd->depth; do_delayed_call(&last->done); if (!(nd->flags & LOOKUP_RCU)) path_put(&last->link); } static int sysctl_protected_symlinks __read_mostly; static int sysctl_protected_hardlinks __read_mostly; static int sysctl_protected_fifos __read_mostly; static int sysctl_protected_regular __read_mostly; #ifdef CONFIG_SYSCTL static struct ctl_table namei_sysctls[] = { { .procname = "protected_symlinks", .data = &sysctl_protected_symlinks, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "protected_hardlinks", .data = &sysctl_protected_hardlinks, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "protected_fifos", .data = &sysctl_protected_fifos, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, { .procname = "protected_regular", .data = &sysctl_protected_regular, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, }; static int __init init_fs_namei_sysctls(void) { register_sysctl_init("fs", namei_sysctls); return 0; } fs_initcall(init_fs_namei_sysctls); #endif /* CONFIG_SYSCTL */ /** * may_follow_link - Check symlink following for unsafe situations * @nd: nameidata pathwalk data * @inode: Used for idmapping. * * In the case of the sysctl_protected_symlinks sysctl being enabled, * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is * in a sticky world-writable directory. This is to protect privileged * processes from failing races against path names that may change out * from under them by way of other users creating malicious symlinks. * It will permit symlinks to be followed only when outside a sticky * world-writable directory, or when the uid of the symlink and follower * match, or when the directory owner matches the symlink's owner. * * Returns 0 if following the symlink is allowed, -ve on error. */ static inline int may_follow_link(struct nameidata *nd, const struct inode *inode) { struct mnt_idmap *idmap; vfsuid_t vfsuid; if (!sysctl_protected_symlinks) return 0; idmap = mnt_idmap(nd->path.mnt); vfsuid = i_uid_into_vfsuid(idmap, inode); /* Allowed if owner and follower match. */ if (vfsuid_eq_kuid(vfsuid, current_fsuid())) return 0; /* Allowed if parent directory not sticky and world-writable. */ if ((nd->dir_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH)) return 0; /* Allowed if parent directory and link owner match. */ if (vfsuid_valid(nd->dir_vfsuid) && vfsuid_eq(nd->dir_vfsuid, vfsuid)) return 0; if (nd->flags & LOOKUP_RCU) return -ECHILD; audit_inode(nd->name, nd->stack[0].link.dentry, 0); audit_log_path_denied(AUDIT_ANOM_LINK, "follow_link"); return -EACCES; } /** * safe_hardlink_source - Check for safe hardlink conditions * @idmap: idmap of the mount the inode was found from * @inode: the source inode to hardlink from * * Return false if at least one of the following conditions: * - inode is not a regular file * - inode is setuid * - inode is setgid and group-exec * - access failure for read and write * * Otherwise returns true. */ static bool safe_hardlink_source(struct mnt_idmap *idmap, struct inode *inode) { umode_t mode = inode->i_mode; /* Special files should not get pinned to the filesystem. */ if (!S_ISREG(mode)) return false; /* Setuid files should not get pinned to the filesystem. */ if (mode & S_ISUID) return false; /* Executable setgid files should not get pinned to the filesystem. */ if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) return false; /* Hardlinking to unreadable or unwritable sources is dangerous. */ if (inode_permission(idmap, inode, MAY_READ | MAY_WRITE)) return false; return true; } /** * may_linkat - Check permissions for creating a hardlink * @idmap: idmap of the mount the inode was found from * @link: the source to hardlink from * * Block hardlink when all of: * - sysctl_protected_hardlinks enabled * - fsuid does not match inode * - hardlink source is unsafe (see safe_hardlink_source() above) * - not CAP_FOWNER in a namespace with the inode owner uid mapped * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. * * Returns 0 if successful, -ve on error. */ int may_linkat(struct mnt_idmap *idmap, const struct path *link) { struct inode *inode = link->dentry->d_inode; /* Inode writeback is not safe when the uid or gid are invalid. */ if (!vfsuid_valid(i_uid_into_vfsuid(idmap, inode)) || !vfsgid_valid(i_gid_into_vfsgid(idmap, inode))) return -EOVERFLOW; if (!sysctl_protected_hardlinks) return 0; /* Source inode owner (or CAP_FOWNER) can hardlink all they like, * otherwise, it must be a safe source. */ if (safe_hardlink_source(idmap, inode) || inode_owner_or_capable(idmap, inode)) return 0; audit_log_path_denied(AUDIT_ANOM_LINK, "linkat"); return -EPERM; } /** * may_create_in_sticky - Check whether an O_CREAT open in a sticky directory * should be allowed, or not, on files that already * exist. * @idmap: idmap of the mount the inode was found from * @nd: nameidata pathwalk data * @inode: the inode of the file to open * * Block an O_CREAT open of a FIFO (or a regular file) when: * - sysctl_protected_fifos (or sysctl_protected_regular) is enabled * - the file already exists * - we are in a sticky directory * - we don't own the file * - the owner of the directory doesn't own the file * - the directory is world writable * If the sysctl_protected_fifos (or sysctl_protected_regular) is set to 2 * the directory doesn't have to be world writable: being group writable will * be enough. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. * * Returns 0 if the open is allowed, -ve on error. */ static int may_create_in_sticky(struct mnt_idmap *idmap, struct nameidata *nd, struct inode *const inode) { umode_t dir_mode = nd->dir_mode; vfsuid_t dir_vfsuid = nd->dir_vfsuid, i_vfsuid; if (likely(!(dir_mode & S_ISVTX))) return 0; if (S_ISREG(inode->i_mode) && !sysctl_protected_regular) return 0; if (S_ISFIFO(inode->i_mode) && !sysctl_protected_fifos) return 0; i_vfsuid = i_uid_into_vfsuid(idmap, inode); if (vfsuid_eq(i_vfsuid, dir_vfsuid)) return 0; if (vfsuid_eq_kuid(i_vfsuid, current_fsuid())) return 0; if (likely(dir_mode & 0002)) { audit_log_path_denied(AUDIT_ANOM_CREAT, "sticky_create"); return -EACCES; } if (dir_mode & 0020) { if (sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) { audit_log_path_denied(AUDIT_ANOM_CREAT, "sticky_create_fifo"); return -EACCES; } if (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode)) { audit_log_path_denied(AUDIT_ANOM_CREAT, "sticky_create_regular"); return -EACCES; } } return 0; } /* * follow_up - Find the mountpoint of path's vfsmount * * Given a path, find the mountpoint of its source file system. * Replace @path with the path of the mountpoint in the parent mount. * Up is towards /. * * Return 1 if we went up a level and 0 if we were already at the * root. */ int follow_up(struct path *path) { struct mount *mnt = real_mount(path->mnt); struct mount *parent; struct dentry *mountpoint; read_seqlock_excl(&mount_lock); parent = mnt->mnt_parent; if (parent == mnt) { read_sequnlock_excl(&mount_lock); return 0; } mntget(&parent->mnt); mountpoint = dget(mnt->mnt_mountpoint); read_sequnlock_excl(&mount_lock); dput(path->dentry); path->dentry = mountpoint; mntput(path->mnt); path->mnt = &parent->mnt; return 1; } EXPORT_SYMBOL(follow_up); static bool choose_mountpoint_rcu(struct mount *m, const struct path *root, struct path *path, unsigned *seqp) { while (mnt_has_parent(m)) { struct dentry *mountpoint = m->mnt_mountpoint; m = m->mnt_parent; if (unlikely(root->dentry == mountpoint && root->mnt == &m->mnt)) break; if (mountpoint != m->mnt.mnt_root) { path->mnt = &m->mnt; path->dentry = mountpoint; *seqp = read_seqcount_begin(&mountpoint->d_seq); return true; } } return false; } static bool choose_mountpoint(struct mount *m, const struct path *root, struct path *path) { bool found; rcu_read_lock(); while (1) { unsigned seq, mseq = read_seqbegin(&mount_lock); found = choose_mountpoint_rcu(m, root, path, &seq); if (unlikely(!found)) { if (!read_seqretry(&mount_lock, mseq)) break; } else { if (likely(__legitimize_path(path, seq, mseq))) break; rcu_read_unlock(); path_put(path); rcu_read_lock(); } } rcu_read_unlock(); return found; } /* * Perform an automount * - return -EISDIR to tell follow_managed() to stop and return the path we * were called with. */ static int follow_automount(struct path *path, int *count, unsigned lookup_flags) { struct dentry *dentry = path->dentry; /* We don't want to mount if someone's just doing a stat - * unless they're stat'ing a directory and appended a '/' to * the name. * * We do, however, want to mount if someone wants to open or * create a file of any type under the mountpoint, wants to * traverse through the mountpoint or wants to open the * mounted directory. Also, autofs may mark negative dentries * as being automount points. These will need the attentions * of the daemon to instantiate them before they can be used. */ if (!(lookup_flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) && dentry->d_inode) return -EISDIR; if (count && (*count)++ >= MAXSYMLINKS) return -ELOOP; return finish_automount(dentry->d_op->d_automount(path), path); } /* * mount traversal - out-of-line part. One note on ->d_flags accesses - * dentries are pinned but not locked here, so negative dentry can go * positive right under us. Use of smp_load_acquire() provides a barrier * sufficient for ->d_inode and ->d_flags consistency. */ static int __traverse_mounts(struct path *path, unsigned flags, bool *jumped, int *count, unsigned lookup_flags) { struct vfsmount *mnt = path->mnt; bool need_mntput = false; int ret = 0; while (flags & DCACHE_MANAGED_DENTRY) { /* Allow the filesystem to manage the transit without i_mutex * being held. */ if (flags & DCACHE_MANAGE_TRANSIT) { ret = path->dentry->d_op->d_manage(path, false); flags = smp_load_acquire(&path->dentry->d_flags); if (ret < 0) break; } if (flags & DCACHE_MOUNTED) { // something's mounted on it.. struct vfsmount *mounted = lookup_mnt(path); if (mounted) { // ... in our namespace dput(path->dentry); if (need_mntput) mntput(path->mnt); path->mnt = mounted; path->dentry = dget(mounted->mnt_root); // here we know it's positive flags = path->dentry->d_flags; need_mntput = true; continue; } } if (!(flags & DCACHE_NEED_AUTOMOUNT)) break; // uncovered automount point ret = follow_automount(path, count, lookup_flags); flags = smp_load_acquire(&path->dentry->d_flags); if (ret < 0) break; } if (ret == -EISDIR) ret = 0; // possible if you race with several mount --move if (need_mntput && path->mnt == mnt) mntput(path->mnt); if (!ret && unlikely(d_flags_negative(flags))) ret = -ENOENT; *jumped = need_mntput; return ret; } static inline int traverse_mounts(struct path *path, bool *jumped, int *count, unsigned lookup_flags) { unsigned flags = smp_load_acquire(&path->dentry->d_flags); /* fastpath */ if (likely(!(flags & DCACHE_MANAGED_DENTRY))) { *jumped = false; if (unlikely(d_flags_negative(flags))) return -ENOENT; return 0; } return __traverse_mounts(path, flags, jumped, count, lookup_flags); } int follow_down_one(struct path *path) { struct vfsmount *mounted; mounted = lookup_mnt(path); if (mounted) { dput(path->dentry); mntput(path->mnt); path->mnt = mounted; path->dentry = dget(mounted->mnt_root); return 1; } return 0; } EXPORT_SYMBOL(follow_down_one); /* * Follow down to the covering mount currently visible to userspace. At each * point, the filesystem owning that dentry may be queried as to whether the * caller is permitted to proceed or not. */ int follow_down(struct path *path, unsigned int flags) { struct vfsmount *mnt = path->mnt; bool jumped; int ret = traverse_mounts(path, &jumped, NULL, flags); if (path->mnt != mnt) mntput(mnt); return ret; } EXPORT_SYMBOL(follow_down); /* * Try to skip to top of mountpoint pile in rcuwalk mode. Fail if * we meet a managed dentry that would need blocking. */ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path) { struct dentry *dentry = path->dentry; unsigned int flags = dentry->d_flags; if (likely(!(flags & DCACHE_MANAGED_DENTRY))) return true; if (unlikely(nd->flags & LOOKUP_NO_XDEV)) return false; for (;;) { /* * Don't forget we might have a non-mountpoint managed dentry * that wants to block transit. */ if (unlikely(flags & DCACHE_MANAGE_TRANSIT)) { int res = dentry->d_op->d_manage(path, true); if (res) return res == -EISDIR; flags = dentry->d_flags; } if (flags & DCACHE_MOUNTED) { struct mount *mounted = __lookup_mnt(path->mnt, dentry); if (mounted) { path->mnt = &mounted->mnt; dentry = path->dentry = mounted->mnt.mnt_root; nd->state |= ND_JUMPED; nd->next_seq = read_seqcount_begin(&dentry->d_seq); flags = dentry->d_flags; // makes sure that non-RCU pathwalk could reach // this state. if (read_seqretry(&mount_lock, nd->m_seq)) return false; continue; } if (read_seqretry(&mount_lock, nd->m_seq)) return false; } return !(flags & DCACHE_NEED_AUTOMOUNT); } } static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry, struct path *path) { bool jumped; int ret; path->mnt = nd->path.mnt; path->dentry = dentry; if (nd->flags & LOOKUP_RCU) { unsigned int seq = nd->next_seq; if (likely(__follow_mount_rcu(nd, path))) return 0; // *path and nd->next_seq might've been clobbered path->mnt = nd->path.mnt; path->dentry = dentry; nd->next_seq = seq; if (!try_to_unlazy_next(nd, dentry)) return -ECHILD; } ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags); if (jumped) { if (unlikely(nd->flags & LOOKUP_NO_XDEV)) ret = -EXDEV; else nd->state |= ND_JUMPED; } if (unlikely(ret)) { dput(path->dentry); if (path->mnt != nd->path.mnt) mntput(path->mnt); } return ret; } /* * This looks up the name in dcache and possibly revalidates the found dentry. * NULL is returned if the dentry does not exist in the cache. */ static struct dentry *lookup_dcache(const struct qstr *name, struct dentry *dir, unsigned int flags) { struct dentry *dentry = d_lookup(dir, name); if (dentry) { int error = d_revalidate(dentry, flags); if (unlikely(error <= 0)) { if (!error) d_invalidate(dentry); dput(dentry); return ERR_PTR(error); } } return dentry; } /* * Parent directory has inode locked exclusive. This is one * and only case when ->lookup() gets called on non in-lookup * dentries - as the matter of fact, this only gets called * when directory is guaranteed to have no in-lookup children * at all. */ struct dentry *lookup_one_qstr_excl(const struct qstr *name, struct dentry *base, unsigned int flags) { struct dentry *dentry = lookup_dcache(name, base, flags); struct dentry *old; struct inode *dir = base->d_inode; if (dentry) return dentry; /* Don't create child dentry for a dead directory. */ if (unlikely(IS_DEADDIR(dir))) return ERR_PTR(-ENOENT); dentry = d_alloc(base, name); if (unlikely(!dentry)) return ERR_PTR(-ENOMEM); old = dir->i_op->lookup(dir, dentry, flags); if (unlikely(old)) { dput(dentry); dentry = old; } return dentry; } EXPORT_SYMBOL(lookup_one_qstr_excl); static struct dentry *lookup_fast(struct nameidata *nd) { struct dentry *dentry, *parent = nd->path.dentry; int status = 1; /* * Rename seqlock is not required here because in the off chance * of a false negative due to a concurrent rename, the caller is * going to fall back to non-racy lookup. */ if (nd->flags & LOOKUP_RCU) { dentry = __d_lookup_rcu(parent, &nd->last, &nd->next_seq); if (unlikely(!dentry)) { if (!try_to_unlazy(nd)) return ERR_PTR(-ECHILD); return NULL; } /* * This sequence count validates that the parent had no * changes while we did the lookup of the dentry above. */ if (read_seqcount_retry(&parent->d_seq, nd->seq)) return ERR_PTR(-ECHILD); status = d_revalidate(dentry, nd->flags); if (likely(status > 0)) return dentry; if (!try_to_unlazy_next(nd, dentry)) return ERR_PTR(-ECHILD); if (status == -ECHILD) /* we'd been told to redo it in non-rcu mode */ status = d_revalidate(dentry, nd->flags); } else { dentry = __d_lookup(parent, &nd->last); if (unlikely(!dentry)) return NULL; status = d_revalidate(dentry, nd->flags); } if (unlikely(status <= 0)) { if (!status) d_invalidate(dentry); dput(dentry); return ERR_PTR(status); } return dentry; } /* Fast lookup failed, do it the slow way */ static struct dentry *__lookup_slow(const struct qstr *name, struct dentry *dir, unsigned int flags) { struct dentry *dentry, *old; struct inode *inode = dir->d_inode; DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); /* Don't go there if it's already dead */ if (unlikely(IS_DEADDIR(inode))) return ERR_PTR(-ENOENT); again: dentry = d_alloc_parallel(dir, name, &wq); if (IS_ERR(dentry)) return dentry; if (unlikely(!d_in_lookup(dentry))) { int error = d_revalidate(dentry, flags); if (unlikely(error <= 0)) { if (!error) { d_invalidate(dentry); dput(dentry); goto again; } dput(dentry); dentry = ERR_PTR(error); } } else { old = inode->i_op->lookup(inode, dentry, flags); d_lookup_done(dentry); if (unlikely(old)) { dput(dentry); dentry = old; } } return dentry; } static struct dentry *lookup_slow(const struct qstr *name, struct dentry *dir, unsigned int flags) { struct inode *inode = dir->d_inode; struct dentry *res; inode_lock_shared(inode); res = __lookup_slow(name, dir, flags); inode_unlock_shared(inode); return res; } static inline int may_lookup(struct mnt_idmap *idmap, struct nameidata *restrict nd) { int err, mask; mask = nd->flags & LOOKUP_RCU ? MAY_NOT_BLOCK : 0; err = inode_permission(idmap, nd->inode, mask | MAY_EXEC); if (likely(!err)) return 0; // If we failed, and we weren't in LOOKUP_RCU, it's final if (!(nd->flags & LOOKUP_RCU)) return err; // Drop out of RCU mode to make sure it wasn't transient if (!try_to_unlazy(nd)) return -ECHILD; // redo it all non-lazy if (err != -ECHILD) // hard error return err; return inode_permission(idmap, nd->inode, MAY_EXEC); } static int reserve_stack(struct nameidata *nd, struct path *link) { if (unlikely(nd->total_link_count++ >= MAXSYMLINKS)) return -ELOOP; if (likely(nd->depth != EMBEDDED_LEVELS)) return 0; if (likely(nd->stack != nd->internal)) return 0; if (likely(nd_alloc_stack(nd))) return 0; if (nd->flags & LOOKUP_RCU) { // we need to grab link before we do unlazy. And we can't skip // unlazy even if we fail to grab the link - cleanup needs it bool grabbed_link = legitimize_path(nd, link, nd->next_seq); if (!try_to_unlazy(nd) || !grabbed_link) return -ECHILD; if (nd_alloc_stack(nd)) return 0; } return -ENOMEM; } enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4}; static const char *pick_link(struct nameidata *nd, struct path *link, struct inode *inode, int flags) { struct saved *last; const char *res; int error = reserve_stack(nd, link); if (unlikely(error)) { if (!(nd->flags & LOOKUP_RCU)) path_put(link); return ERR_PTR(error); } last = nd->stack + nd->depth++; last->link = *link; clear_delayed_call(&last->done); last->seq = nd->next_seq; if (flags & WALK_TRAILING) { error = may_follow_link(nd, inode); if (unlikely(error)) return ERR_PTR(error); } if (unlikely(nd->flags & LOOKUP_NO_SYMLINKS) || unlikely(link->mnt->mnt_flags & MNT_NOSYMFOLLOW)) return ERR_PTR(-ELOOP); if (!(nd->flags & LOOKUP_RCU)) { touch_atime(&last->link); cond_resched(); } else if (atime_needs_update(&last->link, inode)) { if (!try_to_unlazy(nd)) return ERR_PTR(-ECHILD); touch_atime(&last->link); } error = security_inode_follow_link(link->dentry, inode, nd->flags & LOOKUP_RCU); if (unlikely(error)) return ERR_PTR(error); res = READ_ONCE(inode->i_link); if (!res) { const char * (*get)(struct dentry *, struct inode *, struct delayed_call *); get = inode->i_op->get_link; if (nd->flags & LOOKUP_RCU) { res = get(NULL, inode, &last->done); if (res == ERR_PTR(-ECHILD) && try_to_unlazy(nd)) res = get(link->dentry, inode, &last->done); } else { res = get(link->dentry, inode, &last->done); } if (!res) goto all_done; if (IS_ERR(res)) return res; } if (*res == '/') { error = nd_jump_root(nd); if (unlikely(error)) return ERR_PTR(error); while (unlikely(*++res == '/')) ; } if (*res) return res; all_done: // pure jump put_link(nd); return NULL; } /* * Do we need to follow links? We _really_ want to be able * to do this check without having to look at inode->i_op, * so we keep a cache of "no, this doesn't need follow_link" * for the common case. * * NOTE: dentry must be what nd->next_seq had been sampled from. */ static const char *step_into(struct nameidata *nd, int flags, struct dentry *dentry) { struct path path; struct inode *inode; int err = handle_mounts(nd, dentry, &path); if (err < 0) return ERR_PTR(err); inode = path.dentry->d_inode; if (likely(!d_is_symlink(path.dentry)) || ((flags & WALK_TRAILING) && !(nd->flags & LOOKUP_FOLLOW)) || (flags & WALK_NOFOLLOW)) { /* not a symlink or should not follow */ if (nd->flags & LOOKUP_RCU) { if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq)) return ERR_PTR(-ECHILD); if (unlikely(!inode)) return ERR_PTR(-ENOENT); } else { dput(nd->path.dentry); if (nd->path.mnt != path.mnt) mntput(nd->path.mnt); } nd->path = path; nd->inode = inode; nd->seq = nd->next_seq; return NULL; } if (nd->flags & LOOKUP_RCU) { /* make sure that d_is_symlink above matches inode */ if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq)) return ERR_PTR(-ECHILD); } else { if (path.mnt == nd->path.mnt) mntget(path.mnt); } return pick_link(nd, &path, inode, flags); } static struct dentry *follow_dotdot_rcu(struct nameidata *nd) { struct dentry *parent, *old; if (path_equal(&nd->path, &nd->root)) goto in_root; if (unlikely(nd->path.dentry == nd->path.mnt->mnt_root)) { struct path path; unsigned seq; if (!choose_mountpoint_rcu(real_mount(nd->path.mnt), &nd->root, &path, &seq)) goto in_root; if (unlikely(nd->flags & LOOKUP_NO_XDEV)) return ERR_PTR(-ECHILD); nd->path = path; nd->inode = path.dentry->d_inode; nd->seq = seq; // makes sure that non-RCU pathwalk could reach this state if (read_seqretry(&mount_lock, nd->m_seq)) return ERR_PTR(-ECHILD); /* we know that mountpoint was pinned */ } old = nd->path.dentry; parent = old->d_parent; nd->next_seq = read_seqcount_begin(&parent->d_seq); // makes sure that non-RCU pathwalk could reach this state if (read_seqcount_retry(&old->d_seq, nd->seq)) return ERR_PTR(-ECHILD); if (unlikely(!path_connected(nd->path.mnt, parent))) return ERR_PTR(-ECHILD); return parent; in_root: if (read_seqretry(&mount_lock, nd->m_seq)) return ERR_PTR(-ECHILD); if (unlikely(nd->flags & LOOKUP_BENEATH)) return ERR_PTR(-ECHILD); nd->next_seq = nd->seq; return nd->path.dentry; } static struct dentry *follow_dotdot(struct nameidata *nd) { struct dentry *parent; if (path_equal(&nd->path, &nd->root)) goto in_root; if (unlikely(nd->path.dentry == nd->path.mnt->mnt_root)) { struct path path; if (!choose_mountpoint(real_mount(nd->path.mnt), &nd->root, &path)) goto in_root; path_put(&nd->path); nd->path = path; nd->inode = path.dentry->d_inode; if (unlikely(nd->flags & LOOKUP_NO_XDEV)) return ERR_PTR(-EXDEV); } /* rare case of legitimate dget_parent()... */ parent = dget_parent(nd->path.dentry); if (unlikely(!path_connected(nd->path.mnt, parent))) { dput(parent); return ERR_PTR(-ENOENT); } return parent; in_root: if (unlikely(nd->flags & LOOKUP_BENEATH)) return ERR_PTR(-EXDEV); return dget(nd->path.dentry); } static const char *handle_dots(struct nameidata *nd, int type) { if (type == LAST_DOTDOT) { const char *error = NULL; struct dentry *parent; if (!nd->root.mnt) { error = ERR_PTR(set_root(nd)); if (error) return error; } if (nd->flags & LOOKUP_RCU) parent = follow_dotdot_rcu(nd); else parent = follow_dotdot(nd); if (IS_ERR(parent)) return ERR_CAST(parent); error = step_into(nd, WALK_NOFOLLOW, parent); if (unlikely(error)) return error; if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) { /* * If there was a racing rename or mount along our * path, then we can't be sure that ".." hasn't jumped * above nd->root (and so userspace should retry or use * some fallback). */ smp_rmb(); if (__read_seqcount_retry(&mount_lock.seqcount, nd->m_seq)) return ERR_PTR(-EAGAIN); if (__read_seqcount_retry(&rename_lock.seqcount, nd->r_seq)) return ERR_PTR(-EAGAIN); } } return NULL; } static const char *walk_component(struct nameidata *nd, int flags) { struct dentry *dentry; /* * "." and ".." are special - ".." especially so because it has * to be able to know about the current root directory and * parent relationships. */ if (unlikely(nd->last_type != LAST_NORM)) { if (!(flags & WALK_MORE) && nd->depth) put_link(nd); return handle_dots(nd, nd->last_type); } dentry = lookup_fast(nd); if (IS_ERR(dentry)) return ERR_CAST(dentry); if (unlikely(!dentry)) { dentry = lookup_slow(&nd->last, nd->path.dentry, nd->flags); if (IS_ERR(dentry)) return ERR_CAST(dentry); } if (!(flags & WALK_MORE) && nd->depth) put_link(nd); return step_into(nd, flags, dentry); } /* * We can do the critical dentry name comparison and hashing * operations one word at a time, but we are limited to: * * - Architectures with fast unaligned word accesses. We could * do a "get_unaligned()" if this helps and is sufficiently * fast. * * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we * do not trap on the (extremely unlikely) case of a page * crossing operation. * * - Furthermore, we need an efficient 64-bit compile for the * 64-bit case in order to generate the "number of bytes in * the final mask". Again, that could be replaced with a * efficient population count instruction or similar. */ #ifdef CONFIG_DCACHE_WORD_ACCESS #include <asm/word-at-a-time.h> #ifdef HASH_MIX /* Architecture provides HASH_MIX and fold_hash() in <asm/hash.h> */ #elif defined(CONFIG_64BIT) /* * Register pressure in the mixing function is an issue, particularly * on 32-bit x86, but almost any function requires one state value and * one temporary. Instead, use a function designed for two state values * and no temporaries. * * This function cannot create a collision in only two iterations, so * we have two iterations to achieve avalanche. In those two iterations, * we have six layers of mixing, which is enough to spread one bit's * influence out to 2^6 = 64 state bits. * * Rotate constants are scored by considering either 64 one-bit input * deltas or 64*63/2 = 2016 two-bit input deltas, and finding the * probability of that delta causing a change to each of the 128 output * bits, using a sample of random initial states. * * The Shannon entropy of the computed probabilities is then summed * to produce a score. Ideally, any input change has a 50% chance of * toggling any given output bit. * * Mixing scores (in bits) for (12,45): * Input delta: 1-bit 2-bit * 1 round: 713.3 42542.6 * 2 rounds: 2753.7 140389.8 * 3 rounds: 5954.1 233458.2 * 4 rounds: 7862.6 256672.2 * Perfect: 8192 258048 * (64*128) (64*63/2 * 128) */ #define HASH_MIX(x, y, a) \ ( x ^= (a), \ y ^= x, x = rol64(x,12),\ x += y, y = rol64(y,45),\ y *= 9 ) /* * Fold two longs into one 32-bit hash value. This must be fast, but * latency isn't quite as critical, as there is a fair bit of additional * work done before the hash value is used. */ static inline unsigned int fold_hash(unsigned long x, unsigned long y) { y ^= x * GOLDEN_RATIO_64; y *= GOLDEN_RATIO_64; return y >> 32; } #else /* 32-bit case */ /* * Mixing scores (in bits) for (7,20): * Input delta: 1-bit 2-bit * 1 round: 330.3 9201.6 * 2 rounds: 1246.4 25475.4 * 3 rounds: 1907.1 31295.1 * 4 rounds: 2042.3 31718.6 * Perfect: 2048 31744 * (32*64) (32*31/2 * 64) */ #define HASH_MIX(x, y, a) \ ( x ^= (a), \ y ^= x, x = rol32(x, 7),\ x += y, y = rol32(y,20),\ y *= 9 ) static inline unsigned int fold_hash(unsigned long x, unsigned long y) { /* Use arch-optimized multiply if one exists */ return __hash_32(y ^ __hash_32(x)); } #endif /* * Return the hash of a string of known length. This is carfully * designed to match hash_name(), which is the more critical function. * In particular, we must end by hashing a final word containing 0..7 * payload bytes, to match the way that hash_name() iterates until it * finds the delimiter after the name. */ unsigned int full_name_hash(const void *salt, const char *name, unsigned int len) { unsigned long a, x = 0, y = (unsigned long)salt; for (;;) { if (!len) goto done; a = load_unaligned_zeropad(name); if (len < sizeof(unsigned long)) break; HASH_MIX(x, y, a); name += sizeof(unsigned long); len -= sizeof(unsigned long); } x ^= a & bytemask_from_count(len); done: return fold_hash(x, y); } EXPORT_SYMBOL(full_name_hash); /* Return the "hash_len" (hash and length) of a null-terminated string */ u64 hashlen_string(const void *salt, const char *name) { unsigned long a = 0, x = 0, y = (unsigned long)salt; unsigned long adata, mask, len; const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; len = 0; goto inside; do { HASH_MIX(x, y, a); len += sizeof(unsigned long); inside: a = load_unaligned_zeropad(name+len); } while (!has_zero(a, &adata, &constants)); adata = prep_zero_mask(a, adata, &constants); mask = create_zero_mask(adata); x ^= a & zero_bytemask(mask); return hashlen_create(fold_hash(x, y), len + find_zero(mask)); } EXPORT_SYMBOL(hashlen_string); /* * Calculate the length and hash of the path component, and * return the length as the result. */ static inline const char *hash_name(struct nameidata *nd, const char *name, unsigned long *lastword) { unsigned long a, b, x, y = (unsigned long)nd->path.dentry; unsigned long adata, bdata, mask, len; const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; /* * The first iteration is special, because it can result in * '.' and '..' and has no mixing other than the final fold. */ a = load_unaligned_zeropad(name); b = a ^ REPEAT_BYTE('/'); if (has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants)) { adata = prep_zero_mask(a, adata, &constants); bdata = prep_zero_mask(b, bdata, &constants); mask = create_zero_mask(adata | bdata); a &= zero_bytemask(mask); *lastword = a; len = find_zero(mask); nd->last.hash = fold_hash(a, y); nd->last.len = len; return name + len; } len = 0; x = 0; do { HASH_MIX(x, y, a); len += sizeof(unsigned long); a = load_unaligned_zeropad(name+len); b = a ^ REPEAT_BYTE('/'); } while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants))); adata = prep_zero_mask(a, adata, &constants); bdata = prep_zero_mask(b, bdata, &constants); mask = create_zero_mask(adata | bdata); a &= zero_bytemask(mask); x ^= a; len += find_zero(mask); *lastword = 0; // Multi-word components cannot be DOT or DOTDOT nd->last.hash = fold_hash(x, y); nd->last.len = len; return name + len; } /* * Note that the 'last' word is always zero-masked, but * was loaded as a possibly big-endian word. */ #ifdef __BIG_ENDIAN #define LAST_WORD_IS_DOT (0x2eul << (BITS_PER_LONG-8)) #define LAST_WORD_IS_DOTDOT (0x2e2eul << (BITS_PER_LONG-16)) #endif #else /* !CONFIG_DCACHE_WORD_ACCESS: Slow, byte-at-a-time version */ /* Return the hash of a string of known length */ unsigned int full_name_hash(const void *salt, const char *name, unsigned int len) { unsigned long hash = init_name_hash(salt); while (len--) hash = partial_name_hash((unsigned char)*name++, hash); return end_name_hash(hash); } EXPORT_SYMBOL(full_name_hash); /* Return the "hash_len" (hash and length) of a null-terminated string */ u64 hashlen_string(const void *salt, const char *name) { unsigned long hash = init_name_hash(salt); unsigned long len = 0, c; c = (unsigned char)*name; while (c) { len++; hash = partial_name_hash(c, hash); c = (unsigned char)name[len]; } return hashlen_create(end_name_hash(hash), len); } EXPORT_SYMBOL(hashlen_string); /* * We know there's a real path component here of at least * one character. */ static inline const char *hash_name(struct nameidata *nd, const char *name, unsigned long *lastword) { unsigned long hash = init_name_hash(nd->path.dentry); unsigned long len = 0, c, last = 0; c = (unsigned char)*name; do { last = (last << 8) + c; len++; hash = partial_name_hash(c, hash); c = (unsigned char)name[len]; } while (c && c != '/'); // This is reliable for DOT or DOTDOT, since the component // cannot contain NUL characters - top bits being zero means // we cannot have had any other pathnames. *lastword = last; nd->last.hash = end_name_hash(hash); nd->last.len = len; return name + len; } #endif #ifndef LAST_WORD_IS_DOT #define LAST_WORD_IS_DOT 0x2e #define LAST_WORD_IS_DOTDOT 0x2e2e #endif /* * Name resolution. * This is the basic name resolution function, turning a pathname into * the final dentry. We expect 'base' to be positive and a directory. * * Returns 0 and nd will have valid dentry and mnt on success. * Returns error and drops reference to input namei data on failure. */ static int link_path_walk(const char *name, struct nameidata *nd) { int depth = 0; // depth <= nd->depth int err; nd->last_type = LAST_ROOT; nd->flags |= LOOKUP_PARENT; if (IS_ERR(name)) return PTR_ERR(name); while (*name=='/') name++; if (!*name) { nd->dir_mode = 0; // short-circuit the 'hardening' idiocy return 0; } /* At this point we know we have a real path component. */ for(;;) { struct mnt_idmap *idmap; const char *link; unsigned long lastword; idmap = mnt_idmap(nd->path.mnt); err = may_lookup(idmap, nd); if (err) return err; nd->last.name = name; name = hash_name(nd, name, &lastword); switch(lastword) { case LAST_WORD_IS_DOTDOT: nd->last_type = LAST_DOTDOT; nd->state |= ND_JUMPED; break; case LAST_WORD_IS_DOT: nd->last_type = LAST_DOT; break; default: nd->last_type = LAST_NORM; nd->state &= ~ND_JUMPED; struct dentry *parent = nd->path.dentry; if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { err = parent->d_op->d_hash(parent, &nd->last); if (err < 0) return err; } } if (!*name) goto OK; /* * If it wasn't NUL, we know it was '/'. Skip that * slash, and continue until no more slashes. */ do { name++; } while (unlikely(*name == '/')); if (unlikely(!*name)) { OK: /* pathname or trailing symlink, done */ if (!depth) { nd->dir_vfsuid = i_uid_into_vfsuid(idmap, nd->inode); nd->dir_mode = nd->inode->i_mode; nd->flags &= ~LOOKUP_PARENT; return 0; } /* last component of nested symlink */ name = nd->stack[--depth].name; link = walk_component(nd, 0); } else { /* not the last component */ link = walk_component(nd, WALK_MORE); } if (unlikely(link)) { if (IS_ERR(link)) return PTR_ERR(link); /* a symlink to follow */ nd->stack[depth++].name = name; name = link; continue; } if (unlikely(!d_can_lookup(nd->path.dentry))) { if (nd->flags & LOOKUP_RCU) { if (!try_to_unlazy(nd)) return -ECHILD; } return -ENOTDIR; } } } /* must be paired with terminate_walk() */ static const char *path_init(struct nameidata *nd, unsigned flags) { int error; const char *s = nd->name->name; /* LOOKUP_CACHED requires RCU, ask caller to retry */ if ((flags & (LOOKUP_RCU | LOOKUP_CACHED)) == LOOKUP_CACHED) return ERR_PTR(-EAGAIN); if (!*s) flags &= ~LOOKUP_RCU; if (flags & LOOKUP_RCU) rcu_read_lock(); else nd->seq = nd->next_seq = 0; nd->flags = flags; nd->state |= ND_JUMPED; nd->m_seq = __read_seqcount_begin(&mount_lock.seqcount); nd->r_seq = __read_seqcount_begin(&rename_lock.seqcount); smp_rmb(); if (nd->state & ND_ROOT_PRESET) { struct dentry *root = nd->root.dentry; struct inode *inode = root->d_inode; if (*s && unlikely(!d_can_lookup(root))) return ERR_PTR(-ENOTDIR); nd->path = nd->root; nd->inode = inode; if (flags & LOOKUP_RCU) { nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); nd->root_seq = nd->seq; } else { path_get(&nd->path); } return s; } nd->root.mnt = NULL; /* Absolute pathname -- fetch the root (LOOKUP_IN_ROOT uses nd->dfd). */ if (*s == '/' && !(flags & LOOKUP_IN_ROOT)) { error = nd_jump_root(nd); if (unlikely(error)) return ERR_PTR(error); return s; } /* Relative pathname -- get the starting-point it is relative to. */ if (nd->dfd == AT_FDCWD) { if (flags & LOOKUP_RCU) { struct fs_struct *fs = current->fs; unsigned seq; do { seq = read_seqcount_begin(&fs->seq); nd->path = fs->pwd; nd->inode = nd->path.dentry->d_inode; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); } while (read_seqcount_retry(&fs->seq, seq)); } else { get_fs_pwd(current->fs, &nd->path); nd->inode = nd->path.dentry->d_inode; } } else { /* Caller must check execute permissions on the starting path component */ struct fd f = fdget_raw(nd->dfd); struct dentry *dentry; if (!f.file) return ERR_PTR(-EBADF); if (flags & LOOKUP_LINKAT_EMPTY) { if (f.file->f_cred != current_cred() && !ns_capable(f.file->f_cred->user_ns, CAP_DAC_READ_SEARCH)) { fdput(f); return ERR_PTR(-ENOENT); } } dentry = f.file->f_path.dentry; if (*s && unlikely(!d_can_lookup(dentry))) { fdput(f); return ERR_PTR(-ENOTDIR); } nd->path = f.file->f_path; if (flags & LOOKUP_RCU) { nd->inode = nd->path.dentry->d_inode; nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); } else { path_get(&nd->path); nd->inode = nd->path.dentry->d_inode; } fdput(f); } /* For scoped-lookups we need to set the root to the dirfd as well. */ if (flags & LOOKUP_IS_SCOPED) { nd->root = nd->path; if (flags & LOOKUP_RCU) { nd->root_seq = nd->seq; } else { path_get(&nd->root); nd->state |= ND_ROOT_GRABBED; } } return s; } static inline const char *lookup_last(struct nameidata *nd) { if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len]) nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; return walk_component(nd, WALK_TRAILING); } static int handle_lookup_down(struct nameidata *nd) { if (!(nd->flags & LOOKUP_RCU)) dget(nd->path.dentry); nd->next_seq = nd->seq; return PTR_ERR(step_into(nd, WALK_NOFOLLOW, nd->path.dentry)); } /* Returns 0 and nd will be valid on success; Returns error, otherwise. */ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path) { const char *s = path_init(nd, flags); int err; if (unlikely(flags & LOOKUP_DOWN) && !IS_ERR(s)) { err = handle_lookup_down(nd); if (unlikely(err < 0)) s = ERR_PTR(err); } while (!(err = link_path_walk(s, nd)) && (s = lookup_last(nd)) != NULL) ; if (!err && unlikely(nd->flags & LOOKUP_MOUNTPOINT)) { err = handle_lookup_down(nd); nd->state &= ~ND_JUMPED; // no d_weak_revalidate(), please... } if (!err) err = complete_walk(nd); if (!err && nd->flags & LOOKUP_DIRECTORY) if (!d_can_lookup(nd->path.dentry)) err = -ENOTDIR; if (!err) { *path = nd->path; nd->path.mnt = NULL; nd->path.dentry = NULL; } terminate_walk(nd); return err; } int filename_lookup(int dfd, struct filename *name, unsigned flags, struct path *path, struct path *root) { int retval; struct nameidata nd; if (IS_ERR(name)) return PTR_ERR(name); set_nameidata(&nd, dfd, name, root); retval = path_lookupat(&nd, flags | LOOKUP_RCU, path); if (unlikely(retval == -ECHILD)) retval = path_lookupat(&nd, flags, path); if (unlikely(retval == -ESTALE)) retval = path_lookupat(&nd, flags | LOOKUP_REVAL, path); if (likely(!retval)) audit_inode(name, path->dentry, flags & LOOKUP_MOUNTPOINT ? AUDIT_INODE_NOEVAL : 0); restore_nameidata(); return retval; } /* Returns 0 and nd will be valid on success; Returns error, otherwise. */ static int path_parentat(struct nameidata *nd, unsigned flags, struct path *parent) { const char *s = path_init(nd, flags); int err = link_path_walk(s, nd); if (!err) err = complete_walk(nd); if (!err) { *parent = nd->path; nd->path.mnt = NULL; nd->path.dentry = NULL; } terminate_walk(nd); return err; } /* Note: this does not consume "name" */ static int __filename_parentat(int dfd, struct filename *name, unsigned int flags, struct path *parent, struct qstr *last, int *type, const struct path *root) { int retval; struct nameidata nd; if (IS_ERR(name)) return PTR_ERR(name); set_nameidata(&nd, dfd, name, root); retval = path_parentat(&nd, flags | LOOKUP_RCU, parent); if (unlikely(retval == -ECHILD)) retval = path_parentat(&nd, flags, parent); if (unlikely(retval == -ESTALE)) retval = path_parentat(&nd, flags | LOOKUP_REVAL, parent); if (likely(!retval)) { *last = nd.last; *type = nd.last_type; audit_inode(name, parent->dentry, AUDIT_INODE_PARENT); } restore_nameidata(); return retval; } static int filename_parentat(int dfd, struct filename *name, unsigned int flags, struct path *parent, struct qstr *last, int *type) { return __filename_parentat(dfd, name, flags, parent, last, type, NULL); } /* does lookup, returns the object with parent locked */ static struct dentry *__kern_path_locked(int dfd, struct filename *name, struct path *path) { struct dentry *d; struct qstr last; int type, error; error = filename_parentat(dfd, name, 0, path, &last, &type); if (error) return ERR_PTR(error); if (unlikely(type != LAST_NORM)) { path_put(path); return ERR_PTR(-EINVAL); } inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT); d = lookup_one_qstr_excl(&last, path->dentry, 0); if (IS_ERR(d)) { inode_unlock(path->dentry->d_inode); path_put(path); } return d; } struct dentry *kern_path_locked(const char *name, struct path *path) { struct filename *filename = getname_kernel(name); struct dentry *res = __kern_path_locked(AT_FDCWD, filename, path); putname(filename); return res; } struct dentry *user_path_locked_at(int dfd, const char __user *name, struct path *path) { struct filename *filename = getname(name); struct dentry *res = __kern_path_locked(dfd, filename, path); putname(filename); return res; } EXPORT_SYMBOL(user_path_locked_at); int kern_path(const char *name, unsigned int flags, struct path *path) { struct filename *filename = getname_kernel(name); int ret = filename_lookup(AT_FDCWD, filename, flags, path, NULL); putname(filename); return ret; } EXPORT_SYMBOL(kern_path); /** * vfs_path_parent_lookup - lookup a parent path relative to a dentry-vfsmount pair * @filename: filename structure * @flags: lookup flags * @parent: pointer to struct path to fill * @last: last component * @type: type of the last component * @root: pointer to struct path of the base directory */ int vfs_path_parent_lookup(struct filename *filename, unsigned int flags, struct path *parent, struct qstr *last, int *type, const struct path *root) { return __filename_parentat(AT_FDCWD, filename, flags, parent, last, type, root); } EXPORT_SYMBOL(vfs_path_parent_lookup); /** * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair * @dentry: pointer to dentry of the base directory * @mnt: pointer to vfs mount of the base directory * @name: pointer to file name * @flags: lookup flags * @path: pointer to struct path to fill */ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, const char *name, unsigned int flags, struct path *path) { struct filename *filename; struct path root = {.mnt = mnt, .dentry = dentry}; int ret; filename = getname_kernel(name); /* the first argument of filename_lookup() is ignored with root */ ret = filename_lookup(AT_FDCWD, filename, flags, path, &root); putname(filename); return ret; } EXPORT_SYMBOL(vfs_path_lookup); static int lookup_one_common(struct mnt_idmap *idmap, const char *name, struct dentry *base, int len, struct qstr *this) { this->name = name; this->len = len; this->hash = full_name_hash(base, name, len); if (!len) return -EACCES; if (is_dot_dotdot(name, len)) return -EACCES; while (len--) { unsigned int c = *(const unsigned char *)name++; if (c == '/' || c == '\0') return -EACCES; } /* * See if the low-level filesystem might want * to use its own hash.. */ if (base->d_flags & DCACHE_OP_HASH) { int err = base->d_op->d_hash(base, this); if (err < 0) return err; } return inode_permission(idmap, base->d_inode, MAY_EXEC); } /** * try_lookup_one_len - filesystem helper to lookup single pathname component * @name: pathname component to lookup * @base: base directory to lookup from * @len: maximum length @len should be interpreted to * * Look up a dentry by name in the dcache, returning NULL if it does not * currently exist. The function does not try to create a dentry. * * Note that this routine is purely a helper for filesystem usage and should * not be called by generic code. * * The caller must hold base->i_mutex. */ struct dentry *try_lookup_one_len(const char *name, struct dentry *base, int len) { struct qstr this; int err; WARN_ON_ONCE(!inode_is_locked(base->d_inode)); err = lookup_one_common(&nop_mnt_idmap, name, base, len, &this); if (err) return ERR_PTR(err); return lookup_dcache(&this, base, 0); } EXPORT_SYMBOL(try_lookup_one_len); /** * lookup_one_len - filesystem helper to lookup single pathname component * @name: pathname component to lookup * @base: base directory to lookup from * @len: maximum length @len should be interpreted to * * Note that this routine is purely a helper for filesystem usage and should * not be called by generic code. * * The caller must hold base->i_mutex. */ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) { struct dentry *dentry; struct qstr this; int err; WARN_ON_ONCE(!inode_is_locked(base->d_inode)); err = lookup_one_common(&nop_mnt_idmap, name, base, len, &this); if (err) return ERR_PTR(err); dentry = lookup_dcache(&this, base, 0); return dentry ? dentry : __lookup_slow(&this, base, 0); } EXPORT_SYMBOL(lookup_one_len); /** * lookup_one - filesystem helper to lookup single pathname component * @idmap: idmap of the mount the lookup is performed from * @name: pathname component to lookup * @base: base directory to lookup from * @len: maximum length @len should be interpreted to * * Note that this routine is purely a helper for filesystem usage and should * not be called by generic code. * * The caller must hold base->i_mutex. */ struct dentry *lookup_one(struct mnt_idmap *idmap, const char *name, struct dentry *base, int len) { struct dentry *dentry; struct qstr this; int err; WARN_ON_ONCE(!inode_is_locked(base->d_inode)); err = lookup_one_common(idmap, name, base, len, &this); if (err) return ERR_PTR(err); dentry = lookup_dcache(&this, base, 0); return dentry ? dentry : __lookup_slow(&this, base, 0); } EXPORT_SYMBOL(lookup_one); /** * lookup_one_unlocked - filesystem helper to lookup single pathname component * @idmap: idmap of the mount the lookup is performed from * @name: pathname component to lookup * @base: base directory to lookup from * @len: maximum length @len should be interpreted to * * Note that this routine is purely a helper for filesystem usage and should * not be called by generic code. * * Unlike lookup_one_len, it should be called without the parent * i_mutex held, and will take the i_mutex itself if necessary. */ struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap, const char *name, struct dentry *base, int len) { struct qstr this; int err; struct dentry *ret; err = lookup_one_common(idmap, name, base, len, &this); if (err) return ERR_PTR(err); ret = lookup_dcache(&this, base, 0); if (!ret) ret = lookup_slow(&this, base, 0); return ret; } EXPORT_SYMBOL(lookup_one_unlocked); /** * lookup_one_positive_unlocked - filesystem helper to lookup single * pathname component * @idmap: idmap of the mount the lookup is performed from * @name: pathname component to lookup * @base: base directory to lookup from * @len: maximum length @len should be interpreted to * * This helper will yield ERR_PTR(-ENOENT) on negatives. The helper returns * known positive or ERR_PTR(). This is what most of the users want. * * Note that pinned negative with unlocked parent _can_ become positive at any * time, so callers of lookup_one_unlocked() need to be very careful; pinned * positives have >d_inode stable, so this one avoids such problems. * * Note that this routine is purely a helper for filesystem usage and should * not be called by generic code. * * The helper should be called without i_mutex held. */ struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap, const char *name, struct dentry *base, int len) { struct dentry *ret = lookup_one_unlocked(idmap, name, base, len); if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) { dput(ret); ret = ERR_PTR(-ENOENT); } return ret; } EXPORT_SYMBOL(lookup_one_positive_unlocked); /** * lookup_one_len_unlocked - filesystem helper to lookup single pathname component * @name: pathname component to lookup * @base: base directory to lookup from * @len: maximum length @len should be interpreted to * * Note that this routine is purely a helper for filesystem usage and should * not be called by generic code. * * Unlike lookup_one_len, it should be called without the parent * i_mutex held, and will take the i_mutex itself if necessary. */ struct dentry *lookup_one_len_unlocked(const char *name, struct dentry *base, int len) { return lookup_one_unlocked(&nop_mnt_idmap, name, base, len); } EXPORT_SYMBOL(lookup_one_len_unlocked); /* * Like lookup_one_len_unlocked(), except that it yields ERR_PTR(-ENOENT) * on negatives. Returns known positive or ERR_PTR(); that's what * most of the users want. Note that pinned negative with unlocked parent * _can_ become positive at any time, so callers of lookup_one_len_unlocked() * need to be very careful; pinned positives have ->d_inode stable, so * this one avoids such problems. */ struct dentry *lookup_positive_unlocked(const char *name, struct dentry *base, int len) { return lookup_one_positive_unlocked(&nop_mnt_idmap, name, base, len); } EXPORT_SYMBOL(lookup_positive_unlocked); #ifdef CONFIG_UNIX98_PTYS int path_pts(struct path *path) { /* Find something mounted on "pts" in the same directory as * the input path. */ struct dentry *parent = dget_parent(path->dentry); struct dentry *child; struct qstr this = QSTR_INIT("pts", 3); if (unlikely(!path_connected(path->mnt, parent))) { dput(parent); return -ENOENT; } dput(path->dentry); path->dentry = parent; child = d_hash_and_lookup(parent, &this); if (IS_ERR_OR_NULL(child)) return -ENOENT; path->dentry = child; dput(parent); follow_down(path, 0); return 0; } #endif int user_path_at(int dfd, const char __user *name, unsigned flags, struct path *path) { struct filename *filename = getname_flags(name, flags); int ret = filename_lookup(dfd, filename, flags, path, NULL); putname(filename); return ret; } EXPORT_SYMBOL(user_path_at); int __check_sticky(struct mnt_idmap *idmap, struct inode *dir, struct inode *inode) { kuid_t fsuid = current_fsuid(); if (vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), fsuid)) return 0; if (vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, dir), fsuid)) return 0; return !capable_wrt_inode_uidgid(idmap, inode, CAP_FOWNER); } EXPORT_SYMBOL(__check_sticky); /* * Check whether we can remove a link victim from directory dir, check * whether the type of victim is right. * 1. We can't do it if dir is read-only (done in permission()) * 2. We should have write and exec permissions on dir * 3. We can't remove anything from append-only dir * 4. We can't do anything with immutable dir (done in permission()) * 5. If the sticky bit on dir is set we should either * a. be owner of dir, or * b. be owner of victim, or * c. have CAP_FOWNER capability * 6. If the victim is append-only or immutable we can't do antyhing with * links pointing to it. * 7. If the victim has an unknown uid or gid we can't change the inode. * 8. If we were asked to remove a directory and victim isn't one - ENOTDIR. * 9. If we were asked to remove a non-directory and victim isn't one - EISDIR. * 10. We can't remove a root or mountpoint. * 11. We don't allow removal of NFS sillyrenamed files; it's handled by * nfs_async_unlink(). */ static int may_delete(struct mnt_idmap *idmap, struct inode *dir, struct dentry *victim, bool isdir) { struct inode *inode = d_backing_inode(victim); int error; if (d_is_negative(victim)) return -ENOENT; BUG_ON(!inode); BUG_ON(victim->d_parent->d_inode != dir); /* Inode writeback is not safe when the uid or gid are invalid. */ if (!vfsuid_valid(i_uid_into_vfsuid(idmap, inode)) || !vfsgid_valid(i_gid_into_vfsgid(idmap, inode))) return -EOVERFLOW; audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); error = inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC); if (error) return error; if (IS_APPEND(dir)) return -EPERM; if (check_sticky(idmap, dir, inode) || IS_APPEND(inode) || IS_IMMUTABLE(inode) || IS_SWAPFILE(inode) || HAS_UNMAPPED_ID(idmap, inode)) return -EPERM; if (isdir) { if (!d_is_dir(victim)) return -ENOTDIR; if (IS_ROOT(victim)) return -EBUSY; } else if (d_is_dir(victim)) return -EISDIR; if (IS_DEADDIR(dir)) return -ENOENT; if (victim->d_flags & DCACHE_NFSFS_RENAMED) return -EBUSY; return 0; } /* Check whether we can create an object with dentry child in directory * dir. * 1. We can't do it if child already exists (open has special treatment for * this case, but since we are inlined it's OK) * 2. We can't do it if dir is read-only (done in permission()) * 3. We can't do it if the fs can't represent the fsuid or fsgid. * 4. We should have write and exec permissions on dir * 5. We can't do it if dir is immutable (done in permission()) */ static inline int may_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *child) { audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE); if (child->d_inode) return -EEXIST; if (IS_DEADDIR(dir)) return -ENOENT; if (!fsuidgid_has_mapping(dir->i_sb, idmap)) return -EOVERFLOW; return inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC); } // p1 != p2, both are on the same filesystem, ->s_vfs_rename_mutex is held static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2) { struct dentry *p = p1, *q = p2, *r; while ((r = p->d_parent) != p2 && r != p) p = r; if (r == p2) { // p is a child of p2 and an ancestor of p1 or p1 itself inode_lock_nested(p2->d_inode, I_MUTEX_PARENT); inode_lock_nested(p1->d_inode, I_MUTEX_PARENT2); return p; } // p is the root of connected component that contains p1 // p2 does not occur on the path from p to p1 while ((r = q->d_parent) != p1 && r != p && r != q) q = r; if (r == p1) { // q is a child of p1 and an ancestor of p2 or p2 itself inode_lock_nested(p1->d_inode, I_MUTEX_PARENT); inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2); return q; } else if (likely(r == p)) { // both p2 and p1 are descendents of p inode_lock_nested(p1->d_inode, I_MUTEX_PARENT); inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2); return NULL; } else { // no common ancestor at the time we'd been called mutex_unlock(&p1->d_sb->s_vfs_rename_mutex); return ERR_PTR(-EXDEV); } } /* * p1 and p2 should be directories on the same fs. */ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) { if (p1 == p2) { inode_lock_nested(p1->d_inode, I_MUTEX_PARENT); return NULL; } mutex_lock(&p1->d_sb->s_vfs_rename_mutex); return lock_two_directories(p1, p2); } EXPORT_SYMBOL(lock_rename); /* * c1 and p2 should be on the same fs. */ struct dentry *lock_rename_child(struct dentry *c1, struct dentry *p2) { if (READ_ONCE(c1->d_parent) == p2) { /* * hopefully won't need to touch ->s_vfs_rename_mutex at all. */ inode_lock_nested(p2->d_inode, I_MUTEX_PARENT); /* * now that p2 is locked, nobody can move in or out of it, * so the test below is safe. */ if (likely(c1->d_parent == p2)) return NULL; /* * c1 got moved out of p2 while we'd been taking locks; * unlock and fall back to slow case. */ inode_unlock(p2->d_inode); } mutex_lock(&c1->d_sb->s_vfs_rename_mutex); /* * nobody can move out of any directories on this fs. */ if (likely(c1->d_parent != p2)) return lock_two_directories(c1->d_parent, p2); /* * c1 got moved into p2 while we were taking locks; * we need p2 locked and ->s_vfs_rename_mutex unlocked, * for consistency with lock_rename(). */ inode_lock_nested(p2->d_inode, I_MUTEX_PARENT); mutex_unlock(&c1->d_sb->s_vfs_rename_mutex); return NULL; } EXPORT_SYMBOL(lock_rename_child); void unlock_rename(struct dentry *p1, struct dentry *p2) { inode_unlock(p1->d_inode); if (p1 != p2) { inode_unlock(p2->d_inode); mutex_unlock(&p1->d_sb->s_vfs_rename_mutex); } } EXPORT_SYMBOL(unlock_rename); /** * vfs_prepare_mode - prepare the mode to be used for a new inode * @idmap: idmap of the mount the inode was found from * @dir: parent directory of the new inode * @mode: mode of the new inode * @mask_perms: allowed permission by the vfs * @type: type of file to be created * * This helper consolidates and enforces vfs restrictions on the @mode of a new * object to be created. * * Umask stripping depends on whether the filesystem supports POSIX ACLs (see * the kernel documentation for mode_strip_umask()). Moving umask stripping * after setgid stripping allows the same ordering for both non-POSIX ACL and * POSIX ACL supporting filesystems. * * Note that it's currently valid for @type to be 0 if a directory is created. * Filesystems raise that flag individually and we need to check whether each * filesystem can deal with receiving S_IFDIR from the vfs before we enforce a * non-zero type. * * Returns: mode to be passed to the filesystem */ static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap, const struct inode *dir, umode_t mode, umode_t mask_perms, umode_t type) { mode = mode_strip_sgid(idmap, dir, mode); mode = mode_strip_umask(dir, mode); /* * Apply the vfs mandated allowed permission mask and set the type of * file to be created before we call into the filesystem. */ mode &= (mask_perms & ~S_IFMT); mode |= (type & S_IFMT); return mode; } /** * vfs_create - create new file * @idmap: idmap of the mount the inode was found from * @dir: inode of the parent directory * @dentry: dentry of the child file * @mode: mode of the child file * @want_excl: whether the file must not yet exist * * Create a new file. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ int vfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool want_excl) { int error; error = may_create(idmap, dir, dentry); if (error) return error; if (!dir->i_op->create) return -EACCES; /* shouldn't it be ENOSYS? */ mode = vfs_prepare_mode(idmap, dir, mode, S_IALLUGO, S_IFREG); error = security_inode_create(dir, dentry, mode); if (error) return error; error = dir->i_op->create(idmap, dir, dentry, mode, want_excl); if (!error) fsnotify_create(dir, dentry); return error; } EXPORT_SYMBOL(vfs_create); int vfs_mkobj(struct dentry *dentry, umode_t mode, int (*f)(struct dentry *, umode_t, void *), void *arg) { struct inode *dir = dentry->d_parent->d_inode; int error = may_create(&nop_mnt_idmap, dir, dentry); if (error) return error; mode &= S_IALLUGO; mode |= S_IFREG; error = security_inode_create(dir, dentry, mode); if (error) return error; error = f(dentry, mode, arg); if (!error) fsnotify_create(dir, dentry); return error; } EXPORT_SYMBOL(vfs_mkobj); bool may_open_dev(const struct path *path) { return !(path->mnt->mnt_flags & MNT_NODEV) && !(path->mnt->mnt_sb->s_iflags & SB_I_NODEV); } static int may_open(struct mnt_idmap *idmap, const struct path *path, int acc_mode, int flag) { struct dentry *dentry = path->dentry; struct inode *inode = dentry->d_inode; int error; if (!inode) return -ENOENT; switch (inode->i_mode & S_IFMT) { case S_IFLNK: return -ELOOP; case S_IFDIR: if (acc_mode & MAY_WRITE) return -EISDIR; if (acc_mode & MAY_EXEC) return -EACCES; break; case S_IFBLK: case S_IFCHR: if (!may_open_dev(path)) return -EACCES; fallthrough; case S_IFIFO: case S_IFSOCK: if (acc_mode & MAY_EXEC) return -EACCES; flag &= ~O_TRUNC; break; case S_IFREG: if ((acc_mode & MAY_EXEC) && path_noexec(path)) return -EACCES; break; } error = inode_permission(idmap, inode, MAY_OPEN | acc_mode); if (error) return error; /* * An append-only file must be opened in append mode for writing. */ if (IS_APPEND(inode)) { if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND)) return -EPERM; if (flag & O_TRUNC) return -EPERM; } /* O_NOATIME can only be set by the owner or superuser */ if (flag & O_NOATIME && !inode_owner_or_capable(idmap, inode)) return -EPERM; return 0; } static int handle_truncate(struct mnt_idmap *idmap, struct file *filp) { const struct path *path = &filp->f_path; struct inode *inode = path->dentry->d_inode; int error = get_write_access(inode); if (error) return error; error = security_file_truncate(filp); if (!error) { error = do_truncate(idmap, path->dentry, 0, ATTR_MTIME|ATTR_CTIME|ATTR_OPEN, filp); } put_write_access(inode); return error; } static inline int open_to_namei_flags(int flag) { if ((flag & O_ACCMODE) == 3) flag--; return flag; } static int may_o_create(struct mnt_idmap *idmap, const struct path *dir, struct dentry *dentry, umode_t mode) { int error = security_path_mknod(dir, dentry, mode, 0); if (error) return error; if (!fsuidgid_has_mapping(dir->dentry->d_sb, idmap)) return -EOVERFLOW; error = inode_permission(idmap, dir->dentry->d_inode, MAY_WRITE | MAY_EXEC); if (error) return error; return security_inode_create(dir->dentry->d_inode, dentry, mode); } /* * Attempt to atomically look up, create and open a file from a negative * dentry. * * Returns 0 if successful. The file will have been created and attached to * @file by the filesystem calling finish_open(). * * If the file was looked up only or didn't need creating, FMODE_OPENED won't * be set. The caller will need to perform the open themselves. @path will * have been updated to point to the new dentry. This may be negative. * * Returns an error code otherwise. */ static struct dentry *atomic_open(struct nameidata *nd, struct dentry *dentry, struct file *file, int open_flag, umode_t mode) { struct dentry *const DENTRY_NOT_SET = (void *) -1UL; struct inode *dir = nd->path.dentry->d_inode; int error; if (nd->flags & LOOKUP_DIRECTORY) open_flag |= O_DIRECTORY; file->f_path.dentry = DENTRY_NOT_SET; file->f_path.mnt = nd->path.mnt; error = dir->i_op->atomic_open(dir, dentry, file, open_to_namei_flags(open_flag), mode); d_lookup_done(dentry); if (!error) { if (file->f_mode & FMODE_OPENED) { if (unlikely(dentry != file->f_path.dentry)) { dput(dentry); dentry = dget(file->f_path.dentry); } } else if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) { error = -EIO; } else { if (file->f_path.dentry) { dput(dentry); dentry = file->f_path.dentry; } if (unlikely(d_is_negative(dentry))) error = -ENOENT; } } if (error) { dput(dentry); dentry = ERR_PTR(error); } return dentry; } /* * Look up and maybe create and open the last component. * * Must be called with parent locked (exclusive in O_CREAT case). * * Returns 0 on success, that is, if * the file was successfully atomically created (if necessary) and opened, or * the file was not completely opened at this time, though lookups and * creations were performed. * These case are distinguished by presence of FMODE_OPENED on file->f_mode. * In the latter case dentry returned in @path might be negative if O_CREAT * hadn't been specified. * * An error code is returned on failure. */ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, const struct open_flags *op, bool got_write) { struct mnt_idmap *idmap; struct dentry *dir = nd->path.dentry; struct inode *dir_inode = dir->d_inode; int open_flag = op->open_flag; struct dentry *dentry; int error, create_error = 0; umode_t mode = op->mode; DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); if (unlikely(IS_DEADDIR(dir_inode))) return ERR_PTR(-ENOENT); file->f_mode &= ~FMODE_CREATED; dentry = d_lookup(dir, &nd->last); for (;;) { if (!dentry) { dentry = d_alloc_parallel(dir, &nd->last, &wq); if (IS_ERR(dentry)) return dentry; } if (d_in_lookup(dentry)) break; error = d_revalidate(dentry, nd->flags); if (likely(error > 0)) break; if (error) goto out_dput; d_invalidate(dentry); dput(dentry); dentry = NULL; } if (dentry->d_inode) { /* Cached positive dentry: will open in f_op->open */ return dentry; } /* * Checking write permission is tricky, bacuse we don't know if we are * going to actually need it: O_CREAT opens should work as long as the * file exists. But checking existence breaks atomicity. The trick is * to check access and if not granted clear O_CREAT from the flags. * * Another problem is returing the "right" error value (e.g. for an * O_EXCL open we want to return EEXIST not EROFS). */ if (unlikely(!got_write)) open_flag &= ~O_TRUNC; idmap = mnt_idmap(nd->path.mnt); if (open_flag & O_CREAT) { if (open_flag & O_EXCL) open_flag &= ~O_TRUNC; mode = vfs_prepare_mode(idmap, dir->d_inode, mode, mode, mode); if (likely(got_write)) create_error = may_o_create(idmap, &nd->path, dentry, mode); else create_error = -EROFS; } if (create_error) open_flag &= ~O_CREAT; if (dir_inode->i_op->atomic_open) { dentry = atomic_open(nd, dentry, file, open_flag, mode); if (unlikely(create_error) && dentry == ERR_PTR(-ENOENT)) dentry = ERR_PTR(create_error); return dentry; } if (d_in_lookup(dentry)) { struct dentry *res = dir_inode->i_op->lookup(dir_inode, dentry, nd->flags); d_lookup_done(dentry); if (unlikely(res)) { if (IS_ERR(res)) { error = PTR_ERR(res); goto out_dput; } dput(dentry); dentry = res; } } /* Negative dentry, just create the file */ if (!dentry->d_inode && (open_flag & O_CREAT)) { file->f_mode |= FMODE_CREATED; audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE); if (!dir_inode->i_op->create) { error = -EACCES; goto out_dput; } error = dir_inode->i_op->create(idmap, dir_inode, dentry, mode, open_flag & O_EXCL); if (error) goto out_dput; } if (unlikely(create_error) && !dentry->d_inode) { error = create_error; goto out_dput; } return dentry; out_dput: dput(dentry); return ERR_PTR(error); } static const char *open_last_lookups(struct nameidata *nd, struct file *file, const struct open_flags *op) { struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; bool got_write = false; struct dentry *dentry; const char *res; nd->flags |= op->intent; if (nd->last_type != LAST_NORM) { if (nd->depth) put_link(nd); return handle_dots(nd, nd->last_type); } if (!(open_flag & O_CREAT)) { if (nd->last.name[nd->last.len]) nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; /* we _can_ be in RCU mode here */ dentry = lookup_fast(nd); if (IS_ERR(dentry)) return ERR_CAST(dentry); if (likely(dentry)) goto finish_lookup; if (WARN_ON_ONCE(nd->flags & LOOKUP_RCU)) return ERR_PTR(-ECHILD); } else { /* create side of things */ if (nd->flags & LOOKUP_RCU) { if (!try_to_unlazy(nd)) return ERR_PTR(-ECHILD); } audit_inode(nd->name, dir, AUDIT_INODE_PARENT); /* trailing slashes? */ if (unlikely(nd->last.name[nd->last.len])) return ERR_PTR(-EISDIR); } if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { got_write = !mnt_want_write(nd->path.mnt); /* * do _not_ fail yet - we might not need that or fail with * a different error; let lookup_open() decide; we'll be * dropping this one anyway. */ } if (open_flag & O_CREAT) inode_lock(dir->d_inode); else inode_lock_shared(dir->d_inode); dentry = lookup_open(nd, file, op, got_write); if (!IS_ERR(dentry)) { if (file->f_mode & FMODE_CREATED) fsnotify_create(dir->d_inode, dentry); if (file->f_mode & FMODE_OPENED) fsnotify_open(file); } if (open_flag & O_CREAT) inode_unlock(dir->d_inode); else inode_unlock_shared(dir->d_inode); if (got_write) mnt_drop_write(nd->path.mnt); if (IS_ERR(dentry)) return ERR_CAST(dentry); if (file->f_mode & (FMODE_OPENED | FMODE_CREATED)) { dput(nd->path.dentry); nd->path.dentry = dentry; return NULL; } finish_lookup: if (nd->depth) put_link(nd); res = step_into(nd, WALK_TRAILING, dentry); if (unlikely(res)) nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); return res; } /* * Handle the last step of open() */ static int do_open(struct nameidata *nd, struct file *file, const struct open_flags *op) { struct mnt_idmap *idmap; int open_flag = op->open_flag; bool do_truncate; int acc_mode; int error; if (!(file->f_mode & (FMODE_OPENED | FMODE_CREATED))) { error = complete_walk(nd); if (error) return error; } if (!(file->f_mode & FMODE_CREATED)) audit_inode(nd->name, nd->path.dentry, 0); idmap = mnt_idmap(nd->path.mnt); if (open_flag & O_CREAT) { if ((open_flag & O_EXCL) && !(file->f_mode & FMODE_CREATED)) return -EEXIST; if (d_is_dir(nd->path.dentry)) return -EISDIR; error = may_create_in_sticky(idmap, nd, d_backing_inode(nd->path.dentry)); if (unlikely(error)) return error; } if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry)) return -ENOTDIR; do_truncate = false; acc_mode = op->acc_mode; if (file->f_mode & FMODE_CREATED) { /* Don't check for write permission, don't truncate */ open_flag &= ~O_TRUNC; acc_mode = 0; } else if (d_is_reg(nd->path.dentry) && open_flag & O_TRUNC) { error = mnt_want_write(nd->path.mnt); if (error) return error; do_truncate = true; } error = may_open(idmap, &nd->path, acc_mode, open_flag); if (!error && !(file->f_mode & FMODE_OPENED)) error = vfs_open(&nd->path, file); if (!error) error = security_file_post_open(file, op->acc_mode); if (!error && do_truncate) error = handle_truncate(idmap, file); if (unlikely(error > 0)) { WARN_ON(1); error = -EINVAL; } if (do_truncate) mnt_drop_write(nd->path.mnt); return error; } /** * vfs_tmpfile - create tmpfile * @idmap: idmap of the mount the inode was found from * @parentpath: pointer to the path of the base directory * @file: file descriptor of the new tmpfile * @mode: mode of the new tmpfile * * Create a temporary file. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ int vfs_tmpfile(struct mnt_idmap *idmap, const struct path *parentpath, struct file *file, umode_t mode) { struct dentry *child; struct inode *dir = d_inode(parentpath->dentry); struct inode *inode; int error; int open_flag = file->f_flags; /* we want directory to be writable */ error = inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC); if (error) return error; if (!dir->i_op->tmpfile) return -EOPNOTSUPP; child = d_alloc(parentpath->dentry, &slash_name); if (unlikely(!child)) return -ENOMEM; file->f_path.mnt = parentpath->mnt; file->f_path.dentry = child; mode = vfs_prepare_mode(idmap, dir, mode, mode, mode); error = dir->i_op->tmpfile(idmap, dir, file, mode); dput(child); if (file->f_mode & FMODE_OPENED) fsnotify_open(file); if (error) return error; /* Don't check for other permissions, the inode was just created */ error = may_open(idmap, &file->f_path, 0, file->f_flags); if (error) return error; inode = file_inode(file); if (!(open_flag & O_EXCL)) { spin_lock(&inode->i_lock); inode->i_state |= I_LINKABLE; spin_unlock(&inode->i_lock); } security_inode_post_create_tmpfile(idmap, inode); return 0; } /** * kernel_tmpfile_open - open a tmpfile for kernel internal use * @idmap: idmap of the mount the inode was found from * @parentpath: path of the base directory * @mode: mode of the new tmpfile * @open_flag: flags * @cred: credentials for open * * Create and open a temporary file. The file is not accounted in nr_files, * hence this is only for kernel internal use, and must not be installed into * file tables or such. */ struct file *kernel_tmpfile_open(struct mnt_idmap *idmap, const struct path *parentpath, umode_t mode, int open_flag, const struct cred *cred) { struct file *file; int error; file = alloc_empty_file_noaccount(open_flag, cred); if (IS_ERR(file)) return file; error = vfs_tmpfile(idmap, parentpath, file, mode); if (error) { fput(file); file = ERR_PTR(error); } return file; } EXPORT_SYMBOL(kernel_tmpfile_open); static int do_tmpfile(struct nameidata *nd, unsigned flags, const struct open_flags *op, struct file *file) { struct path path; int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path); if (unlikely(error)) return error; error = mnt_want_write(path.mnt); if (unlikely(error)) goto out; error = vfs_tmpfile(mnt_idmap(path.mnt), &path, file, op->mode); if (error) goto out2; audit_inode(nd->name, file->f_path.dentry, 0); out2: mnt_drop_write(path.mnt); out: path_put(&path); return error; } static int do_o_path(struct nameidata *nd, unsigned flags, struct file *file) { struct path path; int error = path_lookupat(nd, flags, &path); if (!error) { audit_inode(nd->name, path.dentry, 0); error = vfs_open(&path, file); path_put(&path); } return error; } static struct file *path_openat(struct nameidata *nd, const struct open_flags *op, unsigned flags) { struct file *file; int error; file = alloc_empty_file(op->open_flag, current_cred()); if (IS_ERR(file)) return file; if (unlikely(file->f_flags & __O_TMPFILE)) { error = do_tmpfile(nd, flags, op, file); } else if (unlikely(file->f_flags & O_PATH)) { error = do_o_path(nd, flags, file); } else { const char *s = path_init(nd, flags); while (!(error = link_path_walk(s, nd)) && (s = open_last_lookups(nd, file, op)) != NULL) ; if (!error) error = do_open(nd, file, op); terminate_walk(nd); } if (likely(!error)) { if (likely(file->f_mode & FMODE_OPENED)) return file; WARN_ON(1); error = -EINVAL; } fput(file); if (error == -EOPENSTALE) { if (flags & LOOKUP_RCU) error = -ECHILD; else error = -ESTALE; } return ERR_PTR(error); } struct file *do_filp_open(int dfd, struct filename *pathname, const struct open_flags *op) { struct nameidata nd; int flags = op->lookup_flags; struct file *filp; set_nameidata(&nd, dfd, pathname, NULL); filp = path_openat(&nd, op, flags | LOOKUP_RCU); if (unlikely(filp == ERR_PTR(-ECHILD))) filp = path_openat(&nd, op, flags); if (unlikely(filp == ERR_PTR(-ESTALE))) filp = path_openat(&nd, op, flags | LOOKUP_REVAL); restore_nameidata(); return filp; } struct file *do_file_open_root(const struct path *root, const char *name, const struct open_flags *op) { struct nameidata nd; struct file *file; struct filename *filename; int flags = op->lookup_flags; if (d_is_symlink(root->dentry) && op->intent & LOOKUP_OPEN) return ERR_PTR(-ELOOP); filename = getname_kernel(name); if (IS_ERR(filename)) return ERR_CAST(filename); set_nameidata(&nd, -1, filename, root); file = path_openat(&nd, op, flags | LOOKUP_RCU); if (unlikely(file == ERR_PTR(-ECHILD))) file = path_openat(&nd, op, flags); if (unlikely(file == ERR_PTR(-ESTALE))) file = path_openat(&nd, op, flags | LOOKUP_REVAL); restore_nameidata(); putname(filename); return file; } static struct dentry *filename_create(int dfd, struct filename *name, struct path *path, unsigned int lookup_flags) { struct dentry *dentry = ERR_PTR(-EEXIST); struct qstr last; bool want_dir = lookup_flags & LOOKUP_DIRECTORY; unsigned int reval_flag = lookup_flags & LOOKUP_REVAL; unsigned int create_flags = LOOKUP_CREATE | LOOKUP_EXCL; int type; int err2; int error; error = filename_parentat(dfd, name, reval_flag, path, &last, &type); if (error) return ERR_PTR(error); /* * Yucky last component or no last component at all? * (foo/., foo/.., /////) */ if (unlikely(type != LAST_NORM)) goto out; /* don't fail immediately if it's r/o, at least try to report other errors */ err2 = mnt_want_write(path->mnt); /* * Do the final lookup. Suppress 'create' if there is a trailing * '/', and a directory wasn't requested. */ if (last.name[last.len] && !want_dir) create_flags = 0; inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT); dentry = lookup_one_qstr_excl(&last, path->dentry, reval_flag | create_flags); if (IS_ERR(dentry)) goto unlock; error = -EEXIST; if (d_is_positive(dentry)) goto fail; /* * Special case - lookup gave negative, but... we had foo/bar/ * From the vfs_mknod() POV we just have a negative dentry - * all is fine. Let's be bastards - you had / on the end, you've * been asking for (non-existent) directory. -ENOENT for you. */ if (unlikely(!create_flags)) { error = -ENOENT; goto fail; } if (unlikely(err2)) { error = err2; goto fail; } return dentry; fail: dput(dentry); dentry = ERR_PTR(error); unlock: inode_unlock(path->dentry->d_inode); if (!err2) mnt_drop_write(path->mnt); out: path_put(path); return dentry; } struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path, unsigned int lookup_flags) { struct filename *filename = getname_kernel(pathname); struct dentry *res = filename_create(dfd, filename, path, lookup_flags); putname(filename); return res; } EXPORT_SYMBOL(kern_path_create); void done_path_create(struct path *path, struct dentry *dentry) { dput(dentry); inode_unlock(path->dentry->d_inode); mnt_drop_write(path->mnt); path_put(path); } EXPORT_SYMBOL(done_path_create); inline struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, unsigned int lookup_flags) { struct filename *filename = getname(pathname); struct dentry *res = filename_create(dfd, filename, path, lookup_flags); putname(filename); return res; } EXPORT_SYMBOL(user_path_create); /** * vfs_mknod - create device node or file * @idmap: idmap of the mount the inode was found from * @dir: inode of the parent directory * @dentry: dentry of the child device node * @mode: mode of the child device node * @dev: device number of device to create * * Create a device node or file. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV; int error = may_create(idmap, dir, dentry); if (error) return error; if ((S_ISCHR(mode) || S_ISBLK(mode)) && !is_whiteout && !capable(CAP_MKNOD)) return -EPERM; if (!dir->i_op->mknod) return -EPERM; mode = vfs_prepare_mode(idmap, dir, mode, mode, mode); error = devcgroup_inode_mknod(mode, dev); if (error) return error; error = security_inode_mknod(dir, dentry, mode, dev); if (error) return error; error = dir->i_op->mknod(idmap, dir, dentry, mode, dev); if (!error) fsnotify_create(dir, dentry); return error; } EXPORT_SYMBOL(vfs_mknod); static int may_mknod(umode_t mode) { switch (mode & S_IFMT) { case S_IFREG: case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: case 0: /* zero mode translates to S_IFREG */ return 0; case S_IFDIR: return -EPERM; default: return -EINVAL; } } static int do_mknodat(int dfd, struct filename *name, umode_t mode, unsigned int dev) { struct mnt_idmap *idmap; struct dentry *dentry; struct path path; int error; unsigned int lookup_flags = 0; error = may_mknod(mode); if (error) goto out1; retry: dentry = filename_create(dfd, name, &path, lookup_flags); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out1; error = security_path_mknod(&path, dentry, mode_strip_umask(path.dentry->d_inode, mode), dev); if (error) goto out2; idmap = mnt_idmap(path.mnt); switch (mode & S_IFMT) { case 0: case S_IFREG: error = vfs_create(idmap, path.dentry->d_inode, dentry, mode, true); if (!error) security_path_post_mknod(idmap, dentry); break; case S_IFCHR: case S_IFBLK: error = vfs_mknod(idmap, path.dentry->d_inode, dentry, mode, new_decode_dev(dev)); break; case S_IFIFO: case S_IFSOCK: error = vfs_mknod(idmap, path.dentry->d_inode, dentry, mode, 0); break; } out2: done_path_create(&path, dentry); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } out1: putname(name); return error; } SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, unsigned int, dev) { return do_mknodat(dfd, getname(filename), mode, dev); } SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev) { return do_mknodat(AT_FDCWD, getname(filename), mode, dev); } /** * vfs_mkdir - create directory * @idmap: idmap of the mount the inode was found from * @dir: inode of the parent directory * @dentry: dentry of the child directory * @mode: mode of the child directory * * Create a directory. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ int vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { int error; unsigned max_links = dir->i_sb->s_max_links; error = may_create(idmap, dir, dentry); if (error) return error; if (!dir->i_op->mkdir) return -EPERM; mode = vfs_prepare_mode(idmap, dir, mode, S_IRWXUGO | S_ISVTX, 0); error = security_inode_mkdir(dir, dentry, mode); if (error) return error; if (max_links && dir->i_nlink >= max_links) return -EMLINK; error = dir->i_op->mkdir(idmap, dir, dentry, mode); if (!error) fsnotify_mkdir(dir, dentry); return error; } EXPORT_SYMBOL(vfs_mkdir); int do_mkdirat(int dfd, struct filename *name, umode_t mode) { struct dentry *dentry; struct path path; int error; unsigned int lookup_flags = LOOKUP_DIRECTORY; retry: dentry = filename_create(dfd, name, &path, lookup_flags); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_putname; error = security_path_mkdir(&path, dentry, mode_strip_umask(path.dentry->d_inode, mode)); if (!error) { error = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode, dentry, mode); } done_path_create(&path, dentry); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } out_putname: putname(name); return error; } SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) { return do_mkdirat(dfd, getname(pathname), mode); } SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode) { return do_mkdirat(AT_FDCWD, getname(pathname), mode); } /** * vfs_rmdir - remove directory * @idmap: idmap of the mount the inode was found from * @dir: inode of the parent directory * @dentry: dentry of the child directory * * Remove a directory. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry) { int error = may_delete(idmap, dir, dentry, 1); if (error) return error; if (!dir->i_op->rmdir) return -EPERM; dget(dentry); inode_lock(dentry->d_inode); error = -EBUSY; if (is_local_mountpoint(dentry) || (dentry->d_inode->i_flags & S_KERNEL_FILE)) goto out; error = security_inode_rmdir(dir, dentry); if (error) goto out; error = dir->i_op->rmdir(dir, dentry); if (error) goto out; shrink_dcache_parent(dentry); dentry->d_inode->i_flags |= S_DEAD; dont_mount(dentry); detach_mounts(dentry); out: inode_unlock(dentry->d_inode); dput(dentry); if (!error) d_delete_notify(dir, dentry); return error; } EXPORT_SYMBOL(vfs_rmdir); int do_rmdir(int dfd, struct filename *name) { int error; struct dentry *dentry; struct path path; struct qstr last; int type; unsigned int lookup_flags = 0; retry: error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type); if (error) goto exit1; switch (type) { case LAST_DOTDOT: error = -ENOTEMPTY; goto exit2; case LAST_DOT: error = -EINVAL; goto exit2; case LAST_ROOT: error = -EBUSY; goto exit2; } error = mnt_want_write(path.mnt); if (error) goto exit2; inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT); dentry = lookup_one_qstr_excl(&last, path.dentry, lookup_flags); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto exit3; if (!dentry->d_inode) { error = -ENOENT; goto exit4; } error = security_path_rmdir(&path, dentry); if (error) goto exit4; error = vfs_rmdir(mnt_idmap(path.mnt), path.dentry->d_inode, dentry); exit4: dput(dentry); exit3: inode_unlock(path.dentry->d_inode); mnt_drop_write(path.mnt); exit2: path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } exit1: putname(name); return error; } SYSCALL_DEFINE1(rmdir, const char __user *, pathname) { return do_rmdir(AT_FDCWD, getname(pathname)); } /** * vfs_unlink - unlink a filesystem object * @idmap: idmap of the mount the inode was found from * @dir: parent directory * @dentry: victim * @delegated_inode: returns victim inode, if the inode is delegated. * * The caller must hold dir->i_mutex. * * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and * return a reference to the inode in delegated_inode. The caller * should then break the delegation on that inode and retry. Because * breaking a delegation may take a long time, the caller should drop * dir->i_mutex before doing so. * * Alternatively, a caller may pass NULL for delegated_inode. This may * be appropriate for callers that expect the underlying filesystem not * to be NFS exported. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, struct inode **delegated_inode) { struct inode *target = dentry->d_inode; int error = may_delete(idmap, dir, dentry, 0); if (error) return error; if (!dir->i_op->unlink) return -EPERM; inode_lock(target); if (IS_SWAPFILE(target)) error = -EPERM; else if (is_local_mountpoint(dentry)) error = -EBUSY; else { error = security_inode_unlink(dir, dentry); if (!error) { error = try_break_deleg(target, delegated_inode); if (error) goto out; error = dir->i_op->unlink(dir, dentry); if (!error) { dont_mount(dentry); detach_mounts(dentry); } } } out: inode_unlock(target); /* We don't d_delete() NFS sillyrenamed files--they still exist. */ if (!error && dentry->d_flags & DCACHE_NFSFS_RENAMED) { fsnotify_unlink(dir, dentry); } else if (!error) { fsnotify_link_count(target); d_delete_notify(dir, dentry); } return error; } EXPORT_SYMBOL(vfs_unlink); /* * Make sure that the actual truncation of the file will occur outside its * directory's i_mutex. Truncate can take a long time if there is a lot of * writeout happening, and we don't want to prevent access to the directory * while waiting on the I/O. */ int do_unlinkat(int dfd, struct filename *name) { int error; struct dentry *dentry; struct path path; struct qstr last; int type; struct inode *inode = NULL; struct inode *delegated_inode = NULL; unsigned int lookup_flags = 0; retry: error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type); if (error) goto exit1; error = -EISDIR; if (type != LAST_NORM) goto exit2; error = mnt_want_write(path.mnt); if (error) goto exit2; retry_deleg: inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT); dentry = lookup_one_qstr_excl(&last, path.dentry, lookup_flags); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { /* Why not before? Because we want correct error value */ if (last.name[last.len] || d_is_negative(dentry)) goto slashes; inode = dentry->d_inode; ihold(inode); error = security_path_unlink(&path, dentry); if (error) goto exit3; error = vfs_unlink(mnt_idmap(path.mnt), path.dentry->d_inode, dentry, &delegated_inode); exit3: dput(dentry); } inode_unlock(path.dentry->d_inode); if (inode) iput(inode); /* truncate the inode here */ inode = NULL; if (delegated_inode) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; } mnt_drop_write(path.mnt); exit2: path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; inode = NULL; goto retry; } exit1: putname(name); return error; slashes: if (d_is_negative(dentry)) error = -ENOENT; else if (d_is_dir(dentry)) error = -EISDIR; else error = -ENOTDIR; goto exit3; } SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag) { if ((flag & ~AT_REMOVEDIR) != 0) return -EINVAL; if (flag & AT_REMOVEDIR) return do_rmdir(dfd, getname(pathname)); return do_unlinkat(dfd, getname(pathname)); } SYSCALL_DEFINE1(unlink, const char __user *, pathname) { return do_unlinkat(AT_FDCWD, getname(pathname)); } /** * vfs_symlink - create symlink * @idmap: idmap of the mount the inode was found from * @dir: inode of the parent directory * @dentry: dentry of the child symlink file * @oldname: name of the file to link to * * Create a symlink. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *oldname) { int error; error = may_create(idmap, dir, dentry); if (error) return error; if (!dir->i_op->symlink) return -EPERM; error = security_inode_symlink(dir, dentry, oldname); if (error) return error; error = dir->i_op->symlink(idmap, dir, dentry, oldname); if (!error) fsnotify_create(dir, dentry); return error; } EXPORT_SYMBOL(vfs_symlink); int do_symlinkat(struct filename *from, int newdfd, struct filename *to) { int error; struct dentry *dentry; struct path path; unsigned int lookup_flags = 0; if (IS_ERR(from)) { error = PTR_ERR(from); goto out_putnames; } retry: dentry = filename_create(newdfd, to, &path, lookup_flags); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_putnames; error = security_path_symlink(&path, dentry, from->name); if (!error) error = vfs_symlink(mnt_idmap(path.mnt), path.dentry->d_inode, dentry, from->name); done_path_create(&path, dentry); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } out_putnames: putname(to); putname(from); return error; } SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, int, newdfd, const char __user *, newname) { return do_symlinkat(getname(oldname), newdfd, getname(newname)); } SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newname) { return do_symlinkat(getname(oldname), AT_FDCWD, getname(newname)); } /** * vfs_link - create a new link * @old_dentry: object to be linked * @idmap: idmap of the mount * @dir: new parent * @new_dentry: where to create the new link * @delegated_inode: returns inode needing a delegation break * * The caller must hold dir->i_mutex * * If vfs_link discovers a delegation on the to-be-linked file in need * of breaking, it will return -EWOULDBLOCK and return a reference to the * inode in delegated_inode. The caller should then break the delegation * and retry. Because breaking a delegation may take a long time, the * caller should drop the i_mutex before doing so. * * Alternatively, a caller may pass NULL for delegated_inode. This may * be appropriate for callers that expect the underlying filesystem not * to be NFS exported. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ int vfs_link(struct dentry *old_dentry, struct mnt_idmap *idmap, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode) { struct inode *inode = old_dentry->d_inode; unsigned max_links = dir->i_sb->s_max_links; int error; if (!inode) return -ENOENT; error = may_create(idmap, dir, new_dentry); if (error) return error; if (dir->i_sb != inode->i_sb) return -EXDEV; /* * A link to an append-only or immutable file cannot be created. */ if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return -EPERM; /* * Updating the link count will likely cause i_uid and i_gid to * be writen back improperly if their true value is unknown to * the vfs. */ if (HAS_UNMAPPED_ID(idmap, inode)) return -EPERM; if (!dir->i_op->link) return -EPERM; if (S_ISDIR(inode->i_mode)) return -EPERM; error = security_inode_link(old_dentry, dir, new_dentry); if (error) return error; inode_lock(inode); /* Make sure we don't allow creating hardlink to an unlinked file */ if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE)) error = -ENOENT; else if (max_links && inode->i_nlink >= max_links) error = -EMLINK; else { error = try_break_deleg(inode, delegated_inode); if (!error) error = dir->i_op->link(old_dentry, dir, new_dentry); } if (!error && (inode->i_state & I_LINKABLE)) { spin_lock(&inode->i_lock); inode->i_state &= ~I_LINKABLE; spin_unlock(&inode->i_lock); } inode_unlock(inode); if (!error) fsnotify_link(dir, inode, new_dentry); return error; } EXPORT_SYMBOL(vfs_link); /* * Hardlinks are often used in delicate situations. We avoid * security-related surprises by not following symlinks on the * newname. --KAB * * We don't follow them on the oldname either to be compatible * with linux 2.0, and to avoid hard-linking to directories * and other special files. --ADM */ int do_linkat(int olddfd, struct filename *old, int newdfd, struct filename *new, int flags) { struct mnt_idmap *idmap; struct dentry *new_dentry; struct path old_path, new_path; struct inode *delegated_inode = NULL; int how = 0; int error; if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) { error = -EINVAL; goto out_putnames; } /* * To use null names we require CAP_DAC_READ_SEARCH or * that the open-time creds of the dfd matches current. * This ensures that not everyone will be able to create * a hardlink using the passed file descriptor. */ if (flags & AT_EMPTY_PATH) how |= LOOKUP_LINKAT_EMPTY; if (flags & AT_SYMLINK_FOLLOW) how |= LOOKUP_FOLLOW; retry: error = filename_lookup(olddfd, old, how, &old_path, NULL); if (error) goto out_putnames; new_dentry = filename_create(newdfd, new, &new_path, (how & LOOKUP_REVAL)); error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) goto out_putpath; error = -EXDEV; if (old_path.mnt != new_path.mnt) goto out_dput; idmap = mnt_idmap(new_path.mnt); error = may_linkat(idmap, &old_path); if (unlikely(error)) goto out_dput; error = security_path_link(old_path.dentry, &new_path, new_dentry); if (error) goto out_dput; error = vfs_link(old_path.dentry, idmap, new_path.dentry->d_inode, new_dentry, &delegated_inode); out_dput: done_path_create(&new_path, new_dentry); if (delegated_inode) { error = break_deleg_wait(&delegated_inode); if (!error) { path_put(&old_path); goto retry; } } if (retry_estale(error, how)) { path_put(&old_path); how |= LOOKUP_REVAL; goto retry; } out_putpath: path_put(&old_path); out_putnames: putname(old); putname(new); return error; } SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, int, flags) { return do_linkat(olddfd, getname_uflags(oldname, flags), newdfd, getname(newname), flags); } SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname) { return do_linkat(AT_FDCWD, getname(oldname), AT_FDCWD, getname(newname), 0); } /** * vfs_rename - rename a filesystem object * @rd: pointer to &struct renamedata info * * The caller must hold multiple mutexes--see lock_rename()). * * If vfs_rename discovers a delegation in need of breaking at either * the source or destination, it will return -EWOULDBLOCK and return a * reference to the inode in delegated_inode. The caller should then * break the delegation and retry. Because breaking a delegation may * take a long time, the caller should drop all locks before doing * so. * * Alternatively, a caller may pass NULL for delegated_inode. This may * be appropriate for callers that expect the underlying filesystem not * to be NFS exported. * * The worst of all namespace operations - renaming directory. "Perverted" * doesn't even start to describe it. Somebody in UCB had a heck of a trip... * Problems: * * a) we can get into loop creation. * b) race potential - two innocent renames can create a loop together. * That's where 4.4BSD screws up. Current fix: serialization on * sb->s_vfs_rename_mutex. We might be more accurate, but that's another * story. * c) we may have to lock up to _four_ objects - parents and victim (if it exists), * and source (if it's a non-directory or a subdirectory that moves to * different parent). * And that - after we got ->i_mutex on parents (until then we don't know * whether the target exists). Solution: try to be smart with locking * order for inodes. We rely on the fact that tree topology may change * only under ->s_vfs_rename_mutex _and_ that parent of the object we * move will be locked. Thus we can rank directories by the tree * (ancestors first) and rank all non-directories after them. * That works since everybody except rename does "lock parent, lookup, * lock child" and rename is under ->s_vfs_rename_mutex. * HOWEVER, it relies on the assumption that any object with ->lookup() * has no more than 1 dentry. If "hybrid" objects will ever appear, * we'd better make sure that there's no link(2) for them. * d) conversion from fhandle to dentry may come in the wrong moment - when * we are removing the target. Solution: we will have to grab ->i_mutex * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on * ->i_mutex on parents, which works but leads to some truly excessive * locking]. */ int vfs_rename(struct renamedata *rd) { int error; struct inode *old_dir = rd->old_dir, *new_dir = rd->new_dir; struct dentry *old_dentry = rd->old_dentry; struct dentry *new_dentry = rd->new_dentry; struct inode **delegated_inode = rd->delegated_inode; unsigned int flags = rd->flags; bool is_dir = d_is_dir(old_dentry); struct inode *source = old_dentry->d_inode; struct inode *target = new_dentry->d_inode; bool new_is_dir = false; unsigned max_links = new_dir->i_sb->s_max_links; struct name_snapshot old_name; bool lock_old_subdir, lock_new_subdir; if (source == target) return 0; error = may_delete(rd->old_mnt_idmap, old_dir, old_dentry, is_dir); if (error) return error; if (!target) { error = may_create(rd->new_mnt_idmap, new_dir, new_dentry); } else { new_is_dir = d_is_dir(new_dentry); if (!(flags & RENAME_EXCHANGE)) error = may_delete(rd->new_mnt_idmap, new_dir, new_dentry, is_dir); else error = may_delete(rd->new_mnt_idmap, new_dir, new_dentry, new_is_dir); } if (error) return error; if (!old_dir->i_op->rename) return -EPERM; /* * If we are going to change the parent - check write permissions, * we'll need to flip '..'. */ if (new_dir != old_dir) { if (is_dir) { error = inode_permission(rd->old_mnt_idmap, source, MAY_WRITE); if (error) return error; } if ((flags & RENAME_EXCHANGE) && new_is_dir) { error = inode_permission(rd->new_mnt_idmap, target, MAY_WRITE); if (error) return error; } } error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry, flags); if (error) return error; take_dentry_name_snapshot(&old_name, old_dentry); dget(new_dentry); /* * Lock children. * The source subdirectory needs to be locked on cross-directory * rename or cross-directory exchange since its parent changes. * The target subdirectory needs to be locked on cross-directory * exchange due to parent change and on any rename due to becoming * a victim. * Non-directories need locking in all cases (for NFS reasons); * they get locked after any subdirectories (in inode address order). * * NOTE: WE ONLY LOCK UNRELATED DIRECTORIES IN CROSS-DIRECTORY CASE. * NEVER, EVER DO THAT WITHOUT ->s_vfs_rename_mutex. */ lock_old_subdir = new_dir != old_dir; lock_new_subdir = new_dir != old_dir || !(flags & RENAME_EXCHANGE); if (is_dir) { if (lock_old_subdir) inode_lock_nested(source, I_MUTEX_CHILD); if (target && (!new_is_dir || lock_new_subdir)) inode_lock(target); } else if (new_is_dir) { if (lock_new_subdir) inode_lock_nested(target, I_MUTEX_CHILD); inode_lock(source); } else { lock_two_nondirectories(source, target); } error = -EPERM; if (IS_SWAPFILE(source) || (target && IS_SWAPFILE(target))) goto out; error = -EBUSY; if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry)) goto out; if (max_links && new_dir != old_dir) { error = -EMLINK; if (is_dir && !new_is_dir && new_dir->i_nlink >= max_links) goto out; if ((flags & RENAME_EXCHANGE) && !is_dir && new_is_dir && old_dir->i_nlink >= max_links) goto out; } if (!is_dir) { error = try_break_deleg(source, delegated_inode); if (error) goto out; } if (target && !new_is_dir) { error = try_break_deleg(target, delegated_inode); if (error) goto out; } error = old_dir->i_op->rename(rd->new_mnt_idmap, old_dir, old_dentry, new_dir, new_dentry, flags); if (error) goto out; if (!(flags & RENAME_EXCHANGE) && target) { if (is_dir) { shrink_dcache_parent(new_dentry); target->i_flags |= S_DEAD; } dont_mount(new_dentry); detach_mounts(new_dentry); } if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) { if (!(flags & RENAME_EXCHANGE)) d_move(old_dentry, new_dentry); else d_exchange(old_dentry, new_dentry); } out: if (!is_dir || lock_old_subdir) inode_unlock(source); if (target && (!new_is_dir || lock_new_subdir)) inode_unlock(target); dput(new_dentry); if (!error) { fsnotify_move(old_dir, new_dir, &old_name.name, is_dir, !(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry); if (flags & RENAME_EXCHANGE) { fsnotify_move(new_dir, old_dir, &old_dentry->d_name, new_is_dir, NULL, new_dentry); } } release_dentry_name_snapshot(&old_name); return error; } EXPORT_SYMBOL(vfs_rename); int do_renameat2(int olddfd, struct filename *from, int newdfd, struct filename *to, unsigned int flags) { struct renamedata rd; struct dentry *old_dentry, *new_dentry; struct dentry *trap; struct path old_path, new_path; struct qstr old_last, new_last; int old_type, new_type; struct inode *delegated_inode = NULL; unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET; bool should_retry = false; int error = -EINVAL; if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) goto put_names; if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) && (flags & RENAME_EXCHANGE)) goto put_names; if (flags & RENAME_EXCHANGE) target_flags = 0; retry: error = filename_parentat(olddfd, from, lookup_flags, &old_path, &old_last, &old_type); if (error) goto put_names; error = filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last, &new_type); if (error) goto exit1; error = -EXDEV; if (old_path.mnt != new_path.mnt) goto exit2; error = -EBUSY; if (old_type != LAST_NORM) goto exit2; if (flags & RENAME_NOREPLACE) error = -EEXIST; if (new_type != LAST_NORM) goto exit2; error = mnt_want_write(old_path.mnt); if (error) goto exit2; retry_deleg: trap = lock_rename(new_path.dentry, old_path.dentry); if (IS_ERR(trap)) { error = PTR_ERR(trap); goto exit_lock_rename; } old_dentry = lookup_one_qstr_excl(&old_last, old_path.dentry, lookup_flags); error = PTR_ERR(old_dentry); if (IS_ERR(old_dentry)) goto exit3; /* source must exist */ error = -ENOENT; if (d_is_negative(old_dentry)) goto exit4; new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry, lookup_flags | target_flags); error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) goto exit4; error = -EEXIST; if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) goto exit5; if (flags & RENAME_EXCHANGE) { error = -ENOENT; if (d_is_negative(new_dentry)) goto exit5; if (!d_is_dir(new_dentry)) { error = -ENOTDIR; if (new_last.name[new_last.len]) goto exit5; } } /* unless the source is a directory trailing slashes give -ENOTDIR */ if (!d_is_dir(old_dentry)) { error = -ENOTDIR; if (old_last.name[old_last.len]) goto exit5; if (!(flags & RENAME_EXCHANGE) && new_last.name[new_last.len]) goto exit5; } /* source should not be ancestor of target */ error = -EINVAL; if (old_dentry == trap) goto exit5; /* target should not be an ancestor of source */ if (!(flags & RENAME_EXCHANGE)) error = -ENOTEMPTY; if (new_dentry == trap) goto exit5; error = security_path_rename(&old_path, old_dentry, &new_path, new_dentry, flags); if (error) goto exit5; rd.old_dir = old_path.dentry->d_inode; rd.old_dentry = old_dentry; rd.old_mnt_idmap = mnt_idmap(old_path.mnt); rd.new_dir = new_path.dentry->d_inode; rd.new_dentry = new_dentry; rd.new_mnt_idmap = mnt_idmap(new_path.mnt); rd.delegated_inode = &delegated_inode; rd.flags = flags; error = vfs_rename(&rd); exit5: dput(new_dentry); exit4: dput(old_dentry); exit3: unlock_rename(new_path.dentry, old_path.dentry); exit_lock_rename: if (delegated_inode) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; } mnt_drop_write(old_path.mnt); exit2: if (retry_estale(error, lookup_flags)) should_retry = true; path_put(&new_path); exit1: path_put(&old_path); if (should_retry) { should_retry = false; lookup_flags |= LOOKUP_REVAL; goto retry; } put_names: putname(from); putname(to); return error; } SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, unsigned int, flags) { return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname), flags); } SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname) { return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname), 0); } SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname) { return do_renameat2(AT_FDCWD, getname(oldname), AT_FDCWD, getname(newname), 0); } int readlink_copy(char __user *buffer, int buflen, const char *link) { int len = PTR_ERR(link); if (IS_ERR(link)) goto out; len = strlen(link); if (len > (unsigned) buflen) len = buflen; if (copy_to_user(buffer, link, len)) len = -EFAULT; out: return len; } /** * vfs_readlink - copy symlink body into userspace buffer * @dentry: dentry on which to get symbolic link * @buffer: user memory pointer * @buflen: size of buffer * * Does not touch atime. That's up to the caller if necessary * * Does not call security hook. */ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen) { struct inode *inode = d_inode(dentry); DEFINE_DELAYED_CALL(done); const char *link; int res; if (unlikely(!(inode->i_opflags & IOP_DEFAULT_READLINK))) { if (unlikely(inode->i_op->readlink)) return inode->i_op->readlink(dentry, buffer, buflen); if (!d_is_symlink(dentry)) return -EINVAL; spin_lock(&inode->i_lock); inode->i_opflags |= IOP_DEFAULT_READLINK; spin_unlock(&inode->i_lock); } link = READ_ONCE(inode->i_link); if (!link) { link = inode->i_op->get_link(dentry, inode, &done); if (IS_ERR(link)) return PTR_ERR(link); } res = readlink_copy(buffer, buflen, link); do_delayed_call(&done); return res; } EXPORT_SYMBOL(vfs_readlink); /** * vfs_get_link - get symlink body * @dentry: dentry on which to get symbolic link * @done: caller needs to free returned data with this * * Calls security hook and i_op->get_link() on the supplied inode. * * It does not touch atime. That's up to the caller if necessary. * * Does not work on "special" symlinks like /proc/$$/fd/N */ const char *vfs_get_link(struct dentry *dentry, struct delayed_call *done) { const char *res = ERR_PTR(-EINVAL); struct inode *inode = d_inode(dentry); if (d_is_symlink(dentry)) { res = ERR_PTR(security_inode_readlink(dentry)); if (!res) res = inode->i_op->get_link(dentry, inode, done); } return res; } EXPORT_SYMBOL(vfs_get_link); /* get the link contents into pagecache */ const char *page_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *callback) { char *kaddr; struct page *page; struct address_space *mapping = inode->i_mapping; if (!dentry) { page = find_get_page(mapping, 0); if (!page) return ERR_PTR(-ECHILD); if (!PageUptodate(page)) { put_page(page); return ERR_PTR(-ECHILD); } } else { page = read_mapping_page(mapping, 0, NULL); if (IS_ERR(page)) return (char*)page; } set_delayed_call(callback, page_put_link, page); BUG_ON(mapping_gfp_mask(mapping) & __GFP_HIGHMEM); kaddr = page_address(page); nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1); return kaddr; } EXPORT_SYMBOL(page_get_link); void page_put_link(void *arg) { put_page(arg); } EXPORT_SYMBOL(page_put_link); int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) { DEFINE_DELAYED_CALL(done); int res = readlink_copy(buffer, buflen, page_get_link(dentry, d_inode(dentry), &done)); do_delayed_call(&done); return res; } EXPORT_SYMBOL(page_readlink); int page_symlink(struct inode *inode, const char *symname, int len) { struct address_space *mapping = inode->i_mapping; const struct address_space_operations *aops = mapping->a_ops; bool nofs = !mapping_gfp_constraint(mapping, __GFP_FS); struct page *page; void *fsdata = NULL; int err; unsigned int flags; retry: if (nofs) flags = memalloc_nofs_save(); err = aops->write_begin(NULL, mapping, 0, len-1, &page, &fsdata); if (nofs) memalloc_nofs_restore(flags); if (err) goto fail; memcpy(page_address(page), symname, len-1); err = aops->write_end(NULL, mapping, 0, len-1, len-1, page, fsdata); if (err < 0) goto fail; if (err < len-1) goto retry; mark_inode_dirty(inode); return 0; fail: return err; } EXPORT_SYMBOL(page_symlink); const struct inode_operations page_symlink_inode_operations = { .get_link = page_get_link, }; EXPORT_SYMBOL(page_symlink_inode_operations); |
| 363 362 349 349 222 220 12 4 8 8 8 22 17 21 20 10 8 8 20 8 12 19 20 14 11 11 21 21 5 5 21 3 17 3 21 157 52 52 22 22 68 8 8 68 302 1 30 35 32 35 35 32 30 32 30 32 8 8 35 35 32 35 35 1 388 349 347 92 334 347 347 35 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 | // SPDX-License-Identifier: GPL-2.0-only /* * TCP CUBIC: Binary Increase Congestion control for TCP v2.3 * Home page: * http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC * This is from the implementation of CUBIC TCP in * Sangtae Ha, Injong Rhee and Lisong Xu, * "CUBIC: A New TCP-Friendly High-Speed TCP Variant" * in ACM SIGOPS Operating System Review, July 2008. * Available from: * http://netsrv.csc.ncsu.edu/export/cubic_a_new_tcp_2008.pdf * * CUBIC integrates a new slow start algorithm, called HyStart. * The details of HyStart are presented in * Sangtae Ha and Injong Rhee, * "Taming the Elephants: New TCP Slow Start", NCSU TechReport 2008. * Available from: * http://netsrv.csc.ncsu.edu/export/hystart_techreport_2008.pdf * * All testing results are available from: * http://netsrv.csc.ncsu.edu/wiki/index.php/TCP_Testing * * Unless CUBIC is enabled and congestion window is large * this behaves the same as the original Reno. */ #include <linux/mm.h> #include <linux/btf.h> #include <linux/btf_ids.h> #include <linux/module.h> #include <linux/math64.h> #include <net/tcp.h> #define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation * max_cwnd = snd_cwnd * beta */ #define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ /* Two methods of hybrid slow start */ #define HYSTART_ACK_TRAIN 0x1 #define HYSTART_DELAY 0x2 /* Number of delay samples for detecting the increase of delay */ #define HYSTART_MIN_SAMPLES 8 #define HYSTART_DELAY_MIN (4000U) /* 4 ms */ #define HYSTART_DELAY_MAX (16000U) /* 16 ms */ #define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) static int fast_convergence __read_mostly = 1; static int beta __read_mostly = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */ static int initial_ssthresh __read_mostly; static int bic_scale __read_mostly = 41; static int tcp_friendliness __read_mostly = 1; static int hystart __read_mostly = 1; static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY; static int hystart_low_window __read_mostly = 16; static int hystart_ack_delta_us __read_mostly = 2000; static u32 cube_rtt_scale __read_mostly; static u32 beta_scale __read_mostly; static u64 cube_factor __read_mostly; /* Note parameters that are used for precomputing scale factors are read-only */ module_param(fast_convergence, int, 0644); MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence"); module_param(beta, int, 0644); MODULE_PARM_DESC(beta, "beta for multiplicative increase"); module_param(initial_ssthresh, int, 0644); MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold"); module_param(bic_scale, int, 0444); MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)"); module_param(tcp_friendliness, int, 0644); MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); module_param(hystart, int, 0644); MODULE_PARM_DESC(hystart, "turn on/off hybrid slow start algorithm"); module_param(hystart_detect, int, 0644); MODULE_PARM_DESC(hystart_detect, "hybrid slow start detection mechanisms" " 1: packet-train 2: delay 3: both packet-train and delay"); module_param(hystart_low_window, int, 0644); MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start"); module_param(hystart_ack_delta_us, int, 0644); MODULE_PARM_DESC(hystart_ack_delta_us, "spacing between ack's indicating train (usecs)"); /* BIC TCP Parameters */ struct bictcp { u32 cnt; /* increase cwnd by 1 after ACKs */ u32 last_max_cwnd; /* last maximum snd_cwnd */ u32 last_cwnd; /* the last snd_cwnd */ u32 last_time; /* time when updated last_cwnd */ u32 bic_origin_point;/* origin point of bic function */ u32 bic_K; /* time to origin point from the beginning of the current epoch */ u32 delay_min; /* min delay (usec) */ u32 epoch_start; /* beginning of an epoch */ u32 ack_cnt; /* number of acks */ u32 tcp_cwnd; /* estimated tcp cwnd */ u16 unused; u8 sample_cnt; /* number of samples to decide curr_rtt */ u8 found; /* the exit point is found? */ u32 round_start; /* beginning of each round */ u32 end_seq; /* end_seq of the round */ u32 last_ack; /* last time when the ACK spacing is close */ u32 curr_rtt; /* the minimum rtt of current round */ }; static inline void bictcp_reset(struct bictcp *ca) { memset(ca, 0, offsetof(struct bictcp, unused)); ca->found = 0; } static inline u32 bictcp_clock_us(const struct sock *sk) { return tcp_sk(sk)->tcp_mstamp; } static inline void bictcp_hystart_reset(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); ca->round_start = ca->last_ack = bictcp_clock_us(sk); ca->end_seq = tp->snd_nxt; ca->curr_rtt = ~0U; ca->sample_cnt = 0; } __bpf_kfunc static void cubictcp_init(struct sock *sk) { struct bictcp *ca = inet_csk_ca(sk); bictcp_reset(ca); if (hystart) bictcp_hystart_reset(sk); if (!hystart && initial_ssthresh) tcp_sk(sk)->snd_ssthresh = initial_ssthresh; } __bpf_kfunc static void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) { if (event == CA_EVENT_TX_START) { struct bictcp *ca = inet_csk_ca(sk); u32 now = tcp_jiffies32; s32 delta; delta = now - tcp_sk(sk)->lsndtime; /* We were application limited (idle) for a while. * Shift epoch_start to keep cwnd growth to cubic curve. */ if (ca->epoch_start && delta > 0) { ca->epoch_start += delta; if (after(ca->epoch_start, now)) ca->epoch_start = now; } return; } } /* calculate the cubic root of x using a table lookup followed by one * Newton-Raphson iteration. * Avg err ~= 0.195% */ static u32 cubic_root(u64 a) { u32 x, b, shift; /* * cbrt(x) MSB values for x MSB values in [0..63]. * Precomputed then refined by hand - Willy Tarreau * * For x in [0..63], * v = cbrt(x << 18) - 1 * cbrt(x) = (v[x] + 10) >> 6 */ static const u8 v[] = { /* 0x00 */ 0, 54, 54, 54, 118, 118, 118, 118, /* 0x08 */ 123, 129, 134, 138, 143, 147, 151, 156, /* 0x10 */ 157, 161, 164, 168, 170, 173, 176, 179, /* 0x18 */ 181, 185, 187, 190, 192, 194, 197, 199, /* 0x20 */ 200, 202, 204, 206, 209, 211, 213, 215, /* 0x28 */ 217, 219, 221, 222, 224, 225, 227, 229, /* 0x30 */ 231, 232, 234, 236, 237, 239, 240, 242, /* 0x38 */ 244, 245, 246, 248, 250, 251, 252, 254, }; b = fls64(a); if (b < 7) { /* a in [0..63] */ return ((u32)v[(u32)a] + 35) >> 6; } b = ((b * 84) >> 8) - 1; shift = (a >> (b * 3)); x = ((u32)(((u32)v[shift] + 10) << b)) >> 6; /* * Newton-Raphson iteration * 2 * x = ( 2 * x + a / x ) / 3 * k+1 k k */ x = (2 * x + (u32)div64_u64(a, (u64)x * (u64)(x - 1))); x = ((x * 341) >> 10); return x; } /* * Compute congestion window to use. */ static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked) { u32 delta, bic_target, max_cnt; u64 offs, t; ca->ack_cnt += acked; /* count the number of ACKed packets */ if (ca->last_cwnd == cwnd && (s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32) return; /* The CUBIC function can update ca->cnt at most once per jiffy. * On all cwnd reduction events, ca->epoch_start is set to 0, * which will force a recalculation of ca->cnt. */ if (ca->epoch_start && tcp_jiffies32 == ca->last_time) goto tcp_friendliness; ca->last_cwnd = cwnd; ca->last_time = tcp_jiffies32; if (ca->epoch_start == 0) { ca->epoch_start = tcp_jiffies32; /* record beginning */ ca->ack_cnt = acked; /* start counting */ ca->tcp_cwnd = cwnd; /* syn with cubic */ if (ca->last_max_cwnd <= cwnd) { ca->bic_K = 0; ca->bic_origin_point = cwnd; } else { /* Compute new K based on * (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ) */ ca->bic_K = cubic_root(cube_factor * (ca->last_max_cwnd - cwnd)); ca->bic_origin_point = ca->last_max_cwnd; } } /* cubic function - calc*/ /* calculate c * time^3 / rtt, * while considering overflow in calculation of time^3 * (so time^3 is done by using 64 bit) * and without the support of division of 64bit numbers * (so all divisions are done by using 32 bit) * also NOTE the unit of those veriables * time = (t - K) / 2^bictcp_HZ * c = bic_scale >> 10 * rtt = (srtt >> 3) / HZ * !!! The following code does not have overflow problems, * if the cwnd < 1 million packets !!! */ t = (s32)(tcp_jiffies32 - ca->epoch_start); t += usecs_to_jiffies(ca->delay_min); /* change the unit from HZ to bictcp_HZ */ t <<= BICTCP_HZ; do_div(t, HZ); if (t < ca->bic_K) /* t - K */ offs = ca->bic_K - t; else offs = t - ca->bic_K; /* c/rtt * (t-K)^3 */ delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ); if (t < ca->bic_K) /* below origin*/ bic_target = ca->bic_origin_point - delta; else /* above origin*/ bic_target = ca->bic_origin_point + delta; /* cubic function - calc bictcp_cnt*/ if (bic_target > cwnd) { ca->cnt = cwnd / (bic_target - cwnd); } else { ca->cnt = 100 * cwnd; /* very small increment*/ } /* * The initial growth of cubic function may be too conservative * when the available bandwidth is still unknown. */ if (ca->last_max_cwnd == 0 && ca->cnt > 20) ca->cnt = 20; /* increase cwnd 5% per RTT */ tcp_friendliness: /* TCP Friendly */ if (tcp_friendliness) { u32 scale = beta_scale; delta = (cwnd * scale) >> 3; while (ca->ack_cnt > delta) { /* update tcp cwnd */ ca->ack_cnt -= delta; ca->tcp_cwnd++; } if (ca->tcp_cwnd > cwnd) { /* if bic is slower than tcp */ delta = ca->tcp_cwnd - cwnd; max_cnt = cwnd / delta; if (ca->cnt > max_cnt) ca->cnt = max_cnt; } } /* The maximum rate of cwnd increase CUBIC allows is 1 packet per * 2 packets ACKed, meaning cwnd grows at 1.5x per RTT. */ ca->cnt = max(ca->cnt, 2U); } __bpf_kfunc static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); if (!tcp_is_cwnd_limited(sk)) return; if (tcp_in_slow_start(tp)) { acked = tcp_slow_start(tp, acked); if (!acked) return; } bictcp_update(ca, tcp_snd_cwnd(tp), acked); tcp_cong_avoid_ai(tp, ca->cnt, acked); } __bpf_kfunc static u32 cubictcp_recalc_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); ca->epoch_start = 0; /* end of epoch */ /* Wmax and fast convergence */ if (tcp_snd_cwnd(tp) < ca->last_max_cwnd && fast_convergence) ca->last_max_cwnd = (tcp_snd_cwnd(tp) * (BICTCP_BETA_SCALE + beta)) / (2 * BICTCP_BETA_SCALE); else ca->last_max_cwnd = tcp_snd_cwnd(tp); return max((tcp_snd_cwnd(tp) * beta) / BICTCP_BETA_SCALE, 2U); } __bpf_kfunc static void cubictcp_state(struct sock *sk, u8 new_state) { if (new_state == TCP_CA_Loss) { bictcp_reset(inet_csk_ca(sk)); bictcp_hystart_reset(sk); } } /* Account for TSO/GRO delays. * Otherwise short RTT flows could get too small ssthresh, since during * slow start we begin with small TSO packets and ca->delay_min would * not account for long aggregation delay when TSO packets get bigger. * Ideally even with a very small RTT we would like to have at least one * TSO packet being sent and received by GRO, and another one in qdisc layer. * We apply another 100% factor because @rate is doubled at this point. * We cap the cushion to 1ms. */ static u32 hystart_ack_delay(const struct sock *sk) { unsigned long rate; rate = READ_ONCE(sk->sk_pacing_rate); if (!rate) return 0; return min_t(u64, USEC_PER_MSEC, div64_ul((u64)sk->sk_gso_max_size * 4 * USEC_PER_SEC, rate)); } static void hystart_update(struct sock *sk, u32 delay) { struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); u32 threshold; if (after(tp->snd_una, ca->end_seq)) bictcp_hystart_reset(sk); if (hystart_detect & HYSTART_ACK_TRAIN) { u32 now = bictcp_clock_us(sk); /* first detection parameter - ack-train detection */ if ((s32)(now - ca->last_ack) <= hystart_ack_delta_us) { ca->last_ack = now; threshold = ca->delay_min + hystart_ack_delay(sk); /* Hystart ack train triggers if we get ack past * ca->delay_min/2. * Pacing might have delayed packets up to RTT/2 * during slow start. */ if (sk->sk_pacing_status == SK_PACING_NONE) threshold >>= 1; if ((s32)(now - ca->round_start) > threshold) { ca->found = 1; pr_debug("hystart_ack_train (%u > %u) delay_min %u (+ ack_delay %u) cwnd %u\n", now - ca->round_start, threshold, ca->delay_min, hystart_ack_delay(sk), tcp_snd_cwnd(tp)); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTTRAINDETECT); NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTTRAINCWND, tcp_snd_cwnd(tp)); tp->snd_ssthresh = tcp_snd_cwnd(tp); } } } if (hystart_detect & HYSTART_DELAY) { /* obtain the minimum delay of more than sampling packets */ if (ca->curr_rtt > delay) ca->curr_rtt = delay; if (ca->sample_cnt < HYSTART_MIN_SAMPLES) { ca->sample_cnt++; } else { if (ca->curr_rtt > ca->delay_min + HYSTART_DELAY_THRESH(ca->delay_min >> 3)) { ca->found = 1; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTDELAYDETECT); NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPHYSTARTDELAYCWND, tcp_snd_cwnd(tp)); tp->snd_ssthresh = tcp_snd_cwnd(tp); } } } } __bpf_kfunc static void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) { const struct tcp_sock *tp = tcp_sk(sk); struct bictcp *ca = inet_csk_ca(sk); u32 delay; /* Some calls are for duplicates without timetamps */ if (sample->rtt_us < 0) return; /* Discard delay samples right after fast recovery */ if (ca->epoch_start && (s32)(tcp_jiffies32 - ca->epoch_start) < HZ) return; delay = sample->rtt_us; if (delay == 0) delay = 1; /* first time call or link delay decreases */ if (ca->delay_min == 0 || ca->delay_min > delay) ca->delay_min = delay; /* hystart triggers when cwnd is larger than some threshold */ if (!ca->found && tcp_in_slow_start(tp) && hystart && tcp_snd_cwnd(tp) >= hystart_low_window) hystart_update(sk, delay); } static struct tcp_congestion_ops cubictcp __read_mostly = { .init = cubictcp_init, .ssthresh = cubictcp_recalc_ssthresh, .cong_avoid = cubictcp_cong_avoid, .set_state = cubictcp_state, .undo_cwnd = tcp_reno_undo_cwnd, .cwnd_event = cubictcp_cwnd_event, .pkts_acked = cubictcp_acked, .owner = THIS_MODULE, .name = "cubic", }; BTF_KFUNCS_START(tcp_cubic_check_kfunc_ids) BTF_ID_FLAGS(func, cubictcp_init) BTF_ID_FLAGS(func, cubictcp_recalc_ssthresh) BTF_ID_FLAGS(func, cubictcp_cong_avoid) BTF_ID_FLAGS(func, cubictcp_state) BTF_ID_FLAGS(func, cubictcp_cwnd_event) BTF_ID_FLAGS(func, cubictcp_acked) BTF_KFUNCS_END(tcp_cubic_check_kfunc_ids) static const struct btf_kfunc_id_set tcp_cubic_kfunc_set = { .owner = THIS_MODULE, .set = &tcp_cubic_check_kfunc_ids, }; static int __init cubictcp_register(void) { int ret; BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); /* Precompute a bunch of the scaling factors that are used per-packet * based on SRTT of 100ms */ beta_scale = 8*(BICTCP_BETA_SCALE+beta) / 3 / (BICTCP_BETA_SCALE - beta); cube_rtt_scale = (bic_scale * 10); /* 1024*c/rtt */ /* calculate the "K" for (wmax-cwnd) = c/rtt * K^3 * so K = cubic_root( (wmax-cwnd)*rtt/c ) * the unit of K is bictcp_HZ=2^10, not HZ * * c = bic_scale >> 10 * rtt = 100ms * * the following code has been designed and tested for * cwnd < 1 million packets * RTT < 100 seconds * HZ < 1,000,00 (corresponding to 10 nano-second) */ /* 1/c * 2^2*bictcp_HZ * srtt */ cube_factor = 1ull << (10+3*BICTCP_HZ); /* 2^40 */ /* divide by bic_scale and by constant Srtt (100ms) */ do_div(cube_factor, bic_scale * 10); ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &tcp_cubic_kfunc_set); if (ret < 0) return ret; return tcp_register_congestion_control(&cubictcp); } static void __exit cubictcp_unregister(void) { tcp_unregister_congestion_control(&cubictcp); } module_init(cubictcp_register); module_exit(cubictcp_unregister); MODULE_AUTHOR("Sangtae Ha, Stephen Hemminger"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("CUBIC TCP"); MODULE_VERSION("2.3"); |
| 9 9 9 9 9 9 9 10 10 10 9 10 10 8 8 10 10 10 10 68 68 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | /* * This file implement the Wireless Extensions proc API. * * Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com> * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved. * * (As all part of the Linux kernel, this file is GPL) */ /* * The /proc/net/wireless file is a human readable user-space interface * exporting various wireless specific statistics from the wireless devices. * This is the most popular part of the Wireless Extensions ;-) * * This interface is a pure clone of /proc/net/dev (in net/core/dev.c). * The content of the file is basically the content of "struct iw_statistics". */ #include <linux/module.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/wireless.h> #include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <net/iw_handler.h> #include <net/wext.h> static void wireless_seq_printf_stats(struct seq_file *seq, struct net_device *dev) { /* Get stats from the driver */ struct iw_statistics *stats = get_wireless_stats(dev); static struct iw_statistics nullstats = {}; /* show device if it's wireless regardless of current stats */ if (!stats) { #ifdef CONFIG_WIRELESS_EXT if (dev->wireless_handlers) stats = &nullstats; #endif #ifdef CONFIG_CFG80211 if (dev->ieee80211_ptr) stats = &nullstats; #endif } if (stats) { seq_printf(seq, "%6s: %04x %3d%c %3d%c %3d%c %6d %6d %6d " "%6d %6d %6d\n", dev->name, stats->status, stats->qual.qual, stats->qual.updated & IW_QUAL_QUAL_UPDATED ? '.' : ' ', ((__s32) stats->qual.level) - ((stats->qual.updated & IW_QUAL_DBM) ? 0x100 : 0), stats->qual.updated & IW_QUAL_LEVEL_UPDATED ? '.' : ' ', ((__s32) stats->qual.noise) - ((stats->qual.updated & IW_QUAL_DBM) ? 0x100 : 0), stats->qual.updated & IW_QUAL_NOISE_UPDATED ? '.' : ' ', stats->discard.nwid, stats->discard.code, stats->discard.fragment, stats->discard.retries, stats->discard.misc, stats->miss.beacon); if (stats != &nullstats) stats->qual.updated &= ~IW_QUAL_ALL_UPDATED; } } /* ---------------------------------------------------------------- */ /* * Print info for /proc/net/wireless (print all entries) */ static int wireless_dev_seq_show(struct seq_file *seq, void *v) { might_sleep(); if (v == SEQ_START_TOKEN) seq_printf(seq, "Inter-| sta-| Quality | Discarded " "packets | Missed | WE\n" " face | tus | link level noise | nwid " "crypt frag retry misc | beacon | %d\n", WIRELESS_EXT); else wireless_seq_printf_stats(seq, v); return 0; } static void *wireless_dev_seq_start(struct seq_file *seq, loff_t *pos) { struct net *net = seq_file_net(seq); loff_t off; struct net_device *dev; rtnl_lock(); if (!*pos) return SEQ_START_TOKEN; off = 1; for_each_netdev(net, dev) if (off++ == *pos) return dev; return NULL; } static void *wireless_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct net *net = seq_file_net(seq); ++*pos; return v == SEQ_START_TOKEN ? first_net_device(net) : next_net_device(v); } static void wireless_dev_seq_stop(struct seq_file *seq, void *v) { rtnl_unlock(); } static const struct seq_operations wireless_seq_ops = { .start = wireless_dev_seq_start, .next = wireless_dev_seq_next, .stop = wireless_dev_seq_stop, .show = wireless_dev_seq_show, }; int __net_init wext_proc_init(struct net *net) { /* Create /proc/net/wireless entry */ if (!proc_create_net("wireless", 0444, net->proc_net, &wireless_seq_ops, sizeof(struct seq_net_private))) return -ENOMEM; return 0; } void __net_exit wext_proc_exit(struct net *net) { remove_proc_entry("wireless", net->proc_net); } |
| 512 4 3 3 1 1 4 3 2 2 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 | // SPDX-License-Identifier: GPL-2.0 #include "io_uring.h" #include "napi.h" #ifdef CONFIG_NET_RX_BUSY_POLL /* Timeout for cleanout of stale entries. */ #define NAPI_TIMEOUT (60 * SEC_CONVERSION) struct io_napi_entry { unsigned int napi_id; struct list_head list; unsigned long timeout; struct hlist_node node; struct rcu_head rcu; }; static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list, unsigned int napi_id) { struct io_napi_entry *e; hlist_for_each_entry_rcu(e, hash_list, node) { if (e->napi_id != napi_id) continue; return e; } return NULL; } static inline ktime_t net_to_ktime(unsigned long t) { /* napi approximating usecs, reverse busy_loop_current_time */ return ns_to_ktime(t << 10); } void __io_napi_add(struct io_ring_ctx *ctx, struct socket *sock) { struct hlist_head *hash_list; unsigned int napi_id; struct sock *sk; struct io_napi_entry *e; sk = sock->sk; if (!sk) return; napi_id = READ_ONCE(sk->sk_napi_id); /* Non-NAPI IDs can be rejected. */ if (napi_id < MIN_NAPI_ID) return; hash_list = &ctx->napi_ht[hash_min(napi_id, HASH_BITS(ctx->napi_ht))]; rcu_read_lock(); e = io_napi_hash_find(hash_list, napi_id); if (e) { e->timeout = jiffies + NAPI_TIMEOUT; rcu_read_unlock(); return; } rcu_read_unlock(); e = kmalloc(sizeof(*e), GFP_NOWAIT); if (!e) return; e->napi_id = napi_id; e->timeout = jiffies + NAPI_TIMEOUT; spin_lock(&ctx->napi_lock); if (unlikely(io_napi_hash_find(hash_list, napi_id))) { spin_unlock(&ctx->napi_lock); kfree(e); return; } hlist_add_tail_rcu(&e->node, hash_list); list_add_tail(&e->list, &ctx->napi_list); spin_unlock(&ctx->napi_lock); } static void __io_napi_remove_stale(struct io_ring_ctx *ctx) { struct io_napi_entry *e; unsigned int i; spin_lock(&ctx->napi_lock); hash_for_each(ctx->napi_ht, i, e, node) { if (time_after(jiffies, e->timeout)) { list_del(&e->list); hash_del_rcu(&e->node); kfree_rcu(e, rcu); } } spin_unlock(&ctx->napi_lock); } static inline void io_napi_remove_stale(struct io_ring_ctx *ctx, bool is_stale) { if (is_stale) __io_napi_remove_stale(ctx); } static inline bool io_napi_busy_loop_timeout(ktime_t start_time, ktime_t bp) { if (bp) { ktime_t end_time = ktime_add(start_time, bp); ktime_t now = net_to_ktime(busy_loop_current_time()); return ktime_after(now, end_time); } return true; } static bool io_napi_busy_loop_should_end(void *data, unsigned long start_time) { struct io_wait_queue *iowq = data; if (signal_pending(current)) return true; if (io_should_wake(iowq) || io_has_work(iowq->ctx)) return true; if (io_napi_busy_loop_timeout(net_to_ktime(start_time), iowq->napi_busy_poll_dt)) return true; return false; } static bool __io_napi_do_busy_loop(struct io_ring_ctx *ctx, void *loop_end_arg) { struct io_napi_entry *e; bool (*loop_end)(void *, unsigned long) = NULL; bool is_stale = false; if (loop_end_arg) loop_end = io_napi_busy_loop_should_end; list_for_each_entry_rcu(e, &ctx->napi_list, list) { napi_busy_loop_rcu(e->napi_id, loop_end, loop_end_arg, ctx->napi_prefer_busy_poll, BUSY_POLL_BUDGET); if (time_after(jiffies, e->timeout)) is_stale = true; } return is_stale; } static void io_napi_blocking_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) { unsigned long start_time = busy_loop_current_time(); void *loop_end_arg = NULL; bool is_stale = false; /* Singular lists use a different napi loop end check function and are * only executed once. */ if (list_is_singular(&ctx->napi_list)) loop_end_arg = iowq; rcu_read_lock(); do { is_stale = __io_napi_do_busy_loop(ctx, loop_end_arg); } while (!io_napi_busy_loop_should_end(iowq, start_time) && !loop_end_arg); rcu_read_unlock(); io_napi_remove_stale(ctx, is_stale); } /* * io_napi_init() - Init napi settings * @ctx: pointer to io-uring context structure * * Init napi settings in the io-uring context. */ void io_napi_init(struct io_ring_ctx *ctx) { u64 sys_dt = READ_ONCE(sysctl_net_busy_poll) * NSEC_PER_USEC; INIT_LIST_HEAD(&ctx->napi_list); spin_lock_init(&ctx->napi_lock); ctx->napi_prefer_busy_poll = false; ctx->napi_busy_poll_dt = ns_to_ktime(sys_dt); } /* * io_napi_free() - Deallocate napi * @ctx: pointer to io-uring context structure * * Free the napi list and the hash table in the io-uring context. */ void io_napi_free(struct io_ring_ctx *ctx) { struct io_napi_entry *e; unsigned int i; spin_lock(&ctx->napi_lock); hash_for_each(ctx->napi_ht, i, e, node) { hash_del_rcu(&e->node); kfree_rcu(e, rcu); } spin_unlock(&ctx->napi_lock); } /* * io_napi_register() - Register napi with io-uring * @ctx: pointer to io-uring context structure * @arg: pointer to io_uring_napi structure * * Register napi in the io-uring context. */ int io_register_napi(struct io_ring_ctx *ctx, void __user *arg) { const struct io_uring_napi curr = { .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt), .prefer_busy_poll = ctx->napi_prefer_busy_poll }; struct io_uring_napi napi; if (ctx->flags & IORING_SETUP_IOPOLL) return -EINVAL; if (copy_from_user(&napi, arg, sizeof(napi))) return -EFAULT; if (napi.pad[0] || napi.pad[1] || napi.pad[2] || napi.resv) return -EINVAL; if (copy_to_user(arg, &curr, sizeof(curr))) return -EFAULT; WRITE_ONCE(ctx->napi_busy_poll_dt, napi.busy_poll_to * NSEC_PER_USEC); WRITE_ONCE(ctx->napi_prefer_busy_poll, !!napi.prefer_busy_poll); WRITE_ONCE(ctx->napi_enabled, true); return 0; } /* * io_napi_unregister() - Unregister napi with io-uring * @ctx: pointer to io-uring context structure * @arg: pointer to io_uring_napi structure * * Unregister napi. If arg has been specified copy the busy poll timeout and * prefer busy poll setting to the passed in structure. */ int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg) { const struct io_uring_napi curr = { .busy_poll_to = ktime_to_us(ctx->napi_busy_poll_dt), .prefer_busy_poll = ctx->napi_prefer_busy_poll }; if (arg && copy_to_user(arg, &curr, sizeof(curr))) return -EFAULT; WRITE_ONCE(ctx->napi_busy_poll_dt, 0); WRITE_ONCE(ctx->napi_prefer_busy_poll, false); WRITE_ONCE(ctx->napi_enabled, false); return 0; } /* * __io_napi_adjust_timeout() - adjust busy loop timeout * @ctx: pointer to io-uring context structure * @iowq: pointer to io wait queue * @ts: pointer to timespec or NULL * * Adjust the busy loop timeout according to timespec and busy poll timeout. * If the specified NAPI timeout is bigger than the wait timeout, then adjust * the NAPI timeout accordingly. */ void __io_napi_adjust_timeout(struct io_ring_ctx *ctx, struct io_wait_queue *iowq, ktime_t to_wait) { ktime_t poll_dt = READ_ONCE(ctx->napi_busy_poll_dt); if (to_wait) poll_dt = min(poll_dt, to_wait); iowq->napi_busy_poll_dt = poll_dt; } /* * __io_napi_busy_loop() - execute busy poll loop * @ctx: pointer to io-uring context structure * @iowq: pointer to io wait queue * * Execute the busy poll loop and merge the spliced off list. */ void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) { iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll); if (!(ctx->flags & IORING_SETUP_SQPOLL)) io_napi_blocking_busy_loop(ctx, iowq); } /* * io_napi_sqpoll_busy_poll() - busy poll loop for sqpoll * @ctx: pointer to io-uring context structure * * Splice of the napi list and execute the napi busy poll loop. */ int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx) { bool is_stale = false; if (!READ_ONCE(ctx->napi_busy_poll_dt)) return 0; if (list_empty_careful(&ctx->napi_list)) return 0; rcu_read_lock(); is_stale = __io_napi_do_busy_loop(ctx, NULL); rcu_read_unlock(); io_napi_remove_stale(ctx, is_stale); return 1; } #endif |
| 11 11 15 15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 | // SPDX-License-Identifier: GPL-2.0 /* * Key setup facility for FS encryption support. * * Copyright (C) 2015, Google, Inc. * * Originally written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar. * Heavily modified since then. */ #include <crypto/skcipher.h> #include <linux/random.h> #include "fscrypt_private.h" struct fscrypt_mode fscrypt_modes[] = { [FSCRYPT_MODE_AES_256_XTS] = { .friendly_name = "AES-256-XTS", .cipher_str = "xts(aes)", .keysize = 64, .security_strength = 32, .ivsize = 16, .blk_crypto_mode = BLK_ENCRYPTION_MODE_AES_256_XTS, }, [FSCRYPT_MODE_AES_256_CTS] = { .friendly_name = "AES-256-CBC-CTS", .cipher_str = "cts(cbc(aes))", .keysize = 32, .security_strength = 32, .ivsize = 16, }, [FSCRYPT_MODE_AES_128_CBC] = { .friendly_name = "AES-128-CBC-ESSIV", .cipher_str = "essiv(cbc(aes),sha256)", .keysize = 16, .security_strength = 16, .ivsize = 16, .blk_crypto_mode = BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV, }, [FSCRYPT_MODE_AES_128_CTS] = { .friendly_name = "AES-128-CBC-CTS", .cipher_str = "cts(cbc(aes))", .keysize = 16, .security_strength = 16, .ivsize = 16, }, [FSCRYPT_MODE_SM4_XTS] = { .friendly_name = "SM4-XTS", .cipher_str = "xts(sm4)", .keysize = 32, .security_strength = 16, .ivsize = 16, .blk_crypto_mode = BLK_ENCRYPTION_MODE_SM4_XTS, }, [FSCRYPT_MODE_SM4_CTS] = { .friendly_name = "SM4-CBC-CTS", .cipher_str = "cts(cbc(sm4))", .keysize = 16, .security_strength = 16, .ivsize = 16, }, [FSCRYPT_MODE_ADIANTUM] = { .friendly_name = "Adiantum", .cipher_str = "adiantum(xchacha12,aes)", .keysize = 32, .security_strength = 32, .ivsize = 32, .blk_crypto_mode = BLK_ENCRYPTION_MODE_ADIANTUM, }, [FSCRYPT_MODE_AES_256_HCTR2] = { .friendly_name = "AES-256-HCTR2", .cipher_str = "hctr2(aes)", .keysize = 32, .security_strength = 32, .ivsize = 32, }, }; static DEFINE_MUTEX(fscrypt_mode_key_setup_mutex); static struct fscrypt_mode * select_encryption_mode(const union fscrypt_policy *policy, const struct inode *inode) { BUILD_BUG_ON(ARRAY_SIZE(fscrypt_modes) != FSCRYPT_MODE_MAX + 1); if (S_ISREG(inode->i_mode)) return &fscrypt_modes[fscrypt_policy_contents_mode(policy)]; if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) return &fscrypt_modes[fscrypt_policy_fnames_mode(policy)]; WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n", inode->i_ino, (inode->i_mode & S_IFMT)); return ERR_PTR(-EINVAL); } /* Create a symmetric cipher object for the given encryption mode and key */ static struct crypto_skcipher * fscrypt_allocate_skcipher(struct fscrypt_mode *mode, const u8 *raw_key, const struct inode *inode) { struct crypto_skcipher *tfm; int err; tfm = crypto_alloc_skcipher(mode->cipher_str, 0, 0); if (IS_ERR(tfm)) { if (PTR_ERR(tfm) == -ENOENT) { fscrypt_warn(inode, "Missing crypto API support for %s (API name: \"%s\")", mode->friendly_name, mode->cipher_str); return ERR_PTR(-ENOPKG); } fscrypt_err(inode, "Error allocating '%s' transform: %ld", mode->cipher_str, PTR_ERR(tfm)); return tfm; } if (!xchg(&mode->logged_cryptoapi_impl, 1)) { /* * fscrypt performance can vary greatly depending on which * crypto algorithm implementation is used. Help people debug * performance problems by logging the ->cra_driver_name the * first time a mode is used. */ pr_info("fscrypt: %s using implementation \"%s\"\n", mode->friendly_name, crypto_skcipher_driver_name(tfm)); } if (WARN_ON_ONCE(crypto_skcipher_ivsize(tfm) != mode->ivsize)) { err = -EINVAL; goto err_free_tfm; } crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_FORBID_WEAK_KEYS); err = crypto_skcipher_setkey(tfm, raw_key, mode->keysize); if (err) goto err_free_tfm; return tfm; err_free_tfm: crypto_free_skcipher(tfm); return ERR_PTR(err); } /* * Prepare the crypto transform object or blk-crypto key in @prep_key, given the * raw key, encryption mode (@ci->ci_mode), flag indicating which encryption * implementation (fs-layer or blk-crypto) will be used (@ci->ci_inlinecrypt), * and IV generation method (@ci->ci_policy.flags). */ int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, const struct fscrypt_inode_info *ci) { struct crypto_skcipher *tfm; if (fscrypt_using_inline_encryption(ci)) return fscrypt_prepare_inline_crypt_key(prep_key, raw_key, ci); tfm = fscrypt_allocate_skcipher(ci->ci_mode, raw_key, ci->ci_inode); if (IS_ERR(tfm)) return PTR_ERR(tfm); /* * Pairs with the smp_load_acquire() in fscrypt_is_key_prepared(). * I.e., here we publish ->tfm with a RELEASE barrier so that * concurrent tasks can ACQUIRE it. Note that this concurrency is only * possible for per-mode keys, not for per-file keys. */ smp_store_release(&prep_key->tfm, tfm); return 0; } /* Destroy a crypto transform object and/or blk-crypto key. */ void fscrypt_destroy_prepared_key(struct super_block *sb, struct fscrypt_prepared_key *prep_key) { crypto_free_skcipher(prep_key->tfm); fscrypt_destroy_inline_crypt_key(sb, prep_key); memzero_explicit(prep_key, sizeof(*prep_key)); } /* Given a per-file encryption key, set up the file's crypto transform object */ int fscrypt_set_per_file_enc_key(struct fscrypt_inode_info *ci, const u8 *raw_key) { ci->ci_owns_key = true; return fscrypt_prepare_key(&ci->ci_enc_key, raw_key, ci); } static int setup_per_mode_enc_key(struct fscrypt_inode_info *ci, struct fscrypt_master_key *mk, struct fscrypt_prepared_key *keys, u8 hkdf_context, bool include_fs_uuid) { const struct inode *inode = ci->ci_inode; const struct super_block *sb = inode->i_sb; struct fscrypt_mode *mode = ci->ci_mode; const u8 mode_num = mode - fscrypt_modes; struct fscrypt_prepared_key *prep_key; u8 mode_key[FSCRYPT_MAX_KEY_SIZE]; u8 hkdf_info[sizeof(mode_num) + sizeof(sb->s_uuid)]; unsigned int hkdf_infolen = 0; int err; if (WARN_ON_ONCE(mode_num > FSCRYPT_MODE_MAX)) return -EINVAL; prep_key = &keys[mode_num]; if (fscrypt_is_key_prepared(prep_key, ci)) { ci->ci_enc_key = *prep_key; return 0; } mutex_lock(&fscrypt_mode_key_setup_mutex); if (fscrypt_is_key_prepared(prep_key, ci)) goto done_unlock; BUILD_BUG_ON(sizeof(mode_num) != 1); BUILD_BUG_ON(sizeof(sb->s_uuid) != 16); BUILD_BUG_ON(sizeof(hkdf_info) != 17); hkdf_info[hkdf_infolen++] = mode_num; if (include_fs_uuid) { memcpy(&hkdf_info[hkdf_infolen], &sb->s_uuid, sizeof(sb->s_uuid)); hkdf_infolen += sizeof(sb->s_uuid); } err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, hkdf_context, hkdf_info, hkdf_infolen, mode_key, mode->keysize); if (err) goto out_unlock; err = fscrypt_prepare_key(prep_key, mode_key, ci); memzero_explicit(mode_key, mode->keysize); if (err) goto out_unlock; done_unlock: ci->ci_enc_key = *prep_key; err = 0; out_unlock: mutex_unlock(&fscrypt_mode_key_setup_mutex); return err; } /* * Derive a SipHash key from the given fscrypt master key and the given * application-specific information string. * * Note that the KDF produces a byte array, but the SipHash APIs expect the key * as a pair of 64-bit words. Therefore, on big endian CPUs we have to do an * endianness swap in order to get the same results as on little endian CPUs. */ static int fscrypt_derive_siphash_key(const struct fscrypt_master_key *mk, u8 context, const u8 *info, unsigned int infolen, siphash_key_t *key) { int err; err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, context, info, infolen, (u8 *)key, sizeof(*key)); if (err) return err; BUILD_BUG_ON(sizeof(*key) != 16); BUILD_BUG_ON(ARRAY_SIZE(key->key) != 2); le64_to_cpus(&key->key[0]); le64_to_cpus(&key->key[1]); return 0; } int fscrypt_derive_dirhash_key(struct fscrypt_inode_info *ci, const struct fscrypt_master_key *mk) { int err; err = fscrypt_derive_siphash_key(mk, HKDF_CONTEXT_DIRHASH_KEY, ci->ci_nonce, FSCRYPT_FILE_NONCE_SIZE, &ci->ci_dirhash_key); if (err) return err; ci->ci_dirhash_key_initialized = true; return 0; } void fscrypt_hash_inode_number(struct fscrypt_inode_info *ci, const struct fscrypt_master_key *mk) { WARN_ON_ONCE(ci->ci_inode->i_ino == 0); WARN_ON_ONCE(!mk->mk_ino_hash_key_initialized); ci->ci_hashed_ino = (u32)siphash_1u64(ci->ci_inode->i_ino, &mk->mk_ino_hash_key); } static int fscrypt_setup_iv_ino_lblk_32_key(struct fscrypt_inode_info *ci, struct fscrypt_master_key *mk) { int err; err = setup_per_mode_enc_key(ci, mk, mk->mk_iv_ino_lblk_32_keys, HKDF_CONTEXT_IV_INO_LBLK_32_KEY, true); if (err) return err; /* pairs with smp_store_release() below */ if (!smp_load_acquire(&mk->mk_ino_hash_key_initialized)) { mutex_lock(&fscrypt_mode_key_setup_mutex); if (mk->mk_ino_hash_key_initialized) goto unlock; err = fscrypt_derive_siphash_key(mk, HKDF_CONTEXT_INODE_HASH_KEY, NULL, 0, &mk->mk_ino_hash_key); if (err) goto unlock; /* pairs with smp_load_acquire() above */ smp_store_release(&mk->mk_ino_hash_key_initialized, true); unlock: mutex_unlock(&fscrypt_mode_key_setup_mutex); if (err) return err; } /* * New inodes may not have an inode number assigned yet. * Hashing their inode number is delayed until later. */ if (ci->ci_inode->i_ino) fscrypt_hash_inode_number(ci, mk); return 0; } static int fscrypt_setup_v2_file_key(struct fscrypt_inode_info *ci, struct fscrypt_master_key *mk, bool need_dirhash_key) { int err; if (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) { /* * DIRECT_KEY: instead of deriving per-file encryption keys, the * per-file nonce will be included in all the IVs. But unlike * v1 policies, for v2 policies in this case we don't encrypt * with the master key directly but rather derive a per-mode * encryption key. This ensures that the master key is * consistently used only for HKDF, avoiding key reuse issues. */ err = setup_per_mode_enc_key(ci, mk, mk->mk_direct_keys, HKDF_CONTEXT_DIRECT_KEY, false); } else if (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) { /* * IV_INO_LBLK_64: encryption keys are derived from (master_key, * mode_num, filesystem_uuid), and inode number is included in * the IVs. This format is optimized for use with inline * encryption hardware compliant with the UFS standard. */ err = setup_per_mode_enc_key(ci, mk, mk->mk_iv_ino_lblk_64_keys, HKDF_CONTEXT_IV_INO_LBLK_64_KEY, true); } else if (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32) { err = fscrypt_setup_iv_ino_lblk_32_key(ci, mk); } else { u8 derived_key[FSCRYPT_MAX_KEY_SIZE]; err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, HKDF_CONTEXT_PER_FILE_ENC_KEY, ci->ci_nonce, FSCRYPT_FILE_NONCE_SIZE, derived_key, ci->ci_mode->keysize); if (err) return err; err = fscrypt_set_per_file_enc_key(ci, derived_key); memzero_explicit(derived_key, ci->ci_mode->keysize); } if (err) return err; /* Derive a secret dirhash key for directories that need it. */ if (need_dirhash_key) { err = fscrypt_derive_dirhash_key(ci, mk); if (err) return err; } return 0; } /* * Check whether the size of the given master key (@mk) is appropriate for the * encryption settings which a particular file will use (@ci). * * If the file uses a v1 encryption policy, then the master key must be at least * as long as the derived key, as this is a requirement of the v1 KDF. * * Otherwise, the KDF can accept any size key, so we enforce a slightly looser * requirement: we require that the size of the master key be at least the * maximum security strength of any algorithm whose key will be derived from it * (but in practice we only need to consider @ci->ci_mode, since any other * possible subkeys such as DIRHASH and INODE_HASH will never increase the * required key size over @ci->ci_mode). This allows AES-256-XTS keys to be * derived from a 256-bit master key, which is cryptographically sufficient, * rather than requiring a 512-bit master key which is unnecessarily long. (We * still allow 512-bit master keys if the user chooses to use them, though.) */ static bool fscrypt_valid_master_key_size(const struct fscrypt_master_key *mk, const struct fscrypt_inode_info *ci) { unsigned int min_keysize; if (ci->ci_policy.version == FSCRYPT_POLICY_V1) min_keysize = ci->ci_mode->keysize; else min_keysize = ci->ci_mode->security_strength; if (mk->mk_secret.size < min_keysize) { fscrypt_warn(NULL, "key with %s %*phN is too short (got %u bytes, need %u+ bytes)", master_key_spec_type(&mk->mk_spec), master_key_spec_len(&mk->mk_spec), (u8 *)&mk->mk_spec.u, mk->mk_secret.size, min_keysize); return false; } return true; } /* * Find the master key, then set up the inode's actual encryption key. * * If the master key is found in the filesystem-level keyring, then it is * returned in *mk_ret with its semaphore read-locked. This is needed to ensure * that only one task links the fscrypt_inode_info into ->mk_decrypted_inodes * (as multiple tasks may race to create an fscrypt_inode_info for the same * inode), and to synchronize the master key being removed with a new inode * starting to use it. */ static int setup_file_encryption_key(struct fscrypt_inode_info *ci, bool need_dirhash_key, struct fscrypt_master_key **mk_ret) { struct super_block *sb = ci->ci_inode->i_sb; struct fscrypt_key_specifier mk_spec; struct fscrypt_master_key *mk; int err; err = fscrypt_select_encryption_impl(ci); if (err) return err; err = fscrypt_policy_to_key_spec(&ci->ci_policy, &mk_spec); if (err) return err; mk = fscrypt_find_master_key(sb, &mk_spec); if (unlikely(!mk)) { const union fscrypt_policy *dummy_policy = fscrypt_get_dummy_policy(sb); /* * Add the test_dummy_encryption key on-demand. In principle, * it should be added at mount time. Do it here instead so that * the individual filesystems don't need to worry about adding * this key at mount time and cleaning up on mount failure. */ if (dummy_policy && fscrypt_policies_equal(dummy_policy, &ci->ci_policy)) { err = fscrypt_add_test_dummy_key(sb, &mk_spec); if (err) return err; mk = fscrypt_find_master_key(sb, &mk_spec); } } if (unlikely(!mk)) { if (ci->ci_policy.version != FSCRYPT_POLICY_V1) return -ENOKEY; /* * As a legacy fallback for v1 policies, search for the key in * the current task's subscribed keyrings too. Don't move this * to before the search of ->s_master_keys, since users * shouldn't be able to override filesystem-level keys. */ return fscrypt_setup_v1_file_key_via_subscribed_keyrings(ci); } down_read(&mk->mk_sem); if (!mk->mk_present) { /* FS_IOC_REMOVE_ENCRYPTION_KEY has been executed on this key */ err = -ENOKEY; goto out_release_key; } if (!fscrypt_valid_master_key_size(mk, ci)) { err = -ENOKEY; goto out_release_key; } switch (ci->ci_policy.version) { case FSCRYPT_POLICY_V1: err = fscrypt_setup_v1_file_key(ci, mk->mk_secret.raw); break; case FSCRYPT_POLICY_V2: err = fscrypt_setup_v2_file_key(ci, mk, need_dirhash_key); break; default: WARN_ON_ONCE(1); err = -EINVAL; break; } if (err) goto out_release_key; *mk_ret = mk; return 0; out_release_key: up_read(&mk->mk_sem); fscrypt_put_master_key(mk); return err; } static void put_crypt_info(struct fscrypt_inode_info *ci) { struct fscrypt_master_key *mk; if (!ci) return; if (ci->ci_direct_key) fscrypt_put_direct_key(ci->ci_direct_key); else if (ci->ci_owns_key) fscrypt_destroy_prepared_key(ci->ci_inode->i_sb, &ci->ci_enc_key); mk = ci->ci_master_key; if (mk) { /* * Remove this inode from the list of inodes that were unlocked * with the master key. In addition, if we're removing the last * inode from an incompletely removed key, then complete the * full removal of the key. */ spin_lock(&mk->mk_decrypted_inodes_lock); list_del(&ci->ci_master_key_link); spin_unlock(&mk->mk_decrypted_inodes_lock); fscrypt_put_master_key_activeref(ci->ci_inode->i_sb, mk); } memzero_explicit(ci, sizeof(*ci)); kmem_cache_free(fscrypt_inode_info_cachep, ci); } static int fscrypt_setup_encryption_info(struct inode *inode, const union fscrypt_policy *policy, const u8 nonce[FSCRYPT_FILE_NONCE_SIZE], bool need_dirhash_key) { struct fscrypt_inode_info *crypt_info; struct fscrypt_mode *mode; struct fscrypt_master_key *mk = NULL; int res; res = fscrypt_initialize(inode->i_sb); if (res) return res; crypt_info = kmem_cache_zalloc(fscrypt_inode_info_cachep, GFP_KERNEL); if (!crypt_info) return -ENOMEM; crypt_info->ci_inode = inode; crypt_info->ci_policy = *policy; memcpy(crypt_info->ci_nonce, nonce, FSCRYPT_FILE_NONCE_SIZE); mode = select_encryption_mode(&crypt_info->ci_policy, inode); if (IS_ERR(mode)) { res = PTR_ERR(mode); goto out; } WARN_ON_ONCE(mode->ivsize > FSCRYPT_MAX_IV_SIZE); crypt_info->ci_mode = mode; crypt_info->ci_data_unit_bits = fscrypt_policy_du_bits(&crypt_info->ci_policy, inode); crypt_info->ci_data_units_per_block_bits = inode->i_blkbits - crypt_info->ci_data_unit_bits; res = setup_file_encryption_key(crypt_info, need_dirhash_key, &mk); if (res) goto out; /* * For existing inodes, multiple tasks may race to set ->i_crypt_info. * So use cmpxchg_release(). This pairs with the smp_load_acquire() in * fscrypt_get_inode_info(). I.e., here we publish ->i_crypt_info with * a RELEASE barrier so that other tasks can ACQUIRE it. */ if (cmpxchg_release(&inode->i_crypt_info, NULL, crypt_info) == NULL) { /* * We won the race and set ->i_crypt_info to our crypt_info. * Now link it into the master key's inode list. */ if (mk) { crypt_info->ci_master_key = mk; refcount_inc(&mk->mk_active_refs); spin_lock(&mk->mk_decrypted_inodes_lock); list_add(&crypt_info->ci_master_key_link, &mk->mk_decrypted_inodes); spin_unlock(&mk->mk_decrypted_inodes_lock); } crypt_info = NULL; } res = 0; out: if (mk) { up_read(&mk->mk_sem); fscrypt_put_master_key(mk); } put_crypt_info(crypt_info); return res; } /** * fscrypt_get_encryption_info() - set up an inode's encryption key * @inode: the inode to set up the key for. Must be encrypted. * @allow_unsupported: if %true, treat an unsupported encryption policy (or * unrecognized encryption context) the same way as the key * being unavailable, instead of returning an error. Use * %false unless the operation being performed is needed in * order for files (or directories) to be deleted. * * Set up ->i_crypt_info, if it hasn't already been done. * * Note: unless ->i_crypt_info is already set, this isn't %GFP_NOFS-safe. So * generally this shouldn't be called from within a filesystem transaction. * * Return: 0 if ->i_crypt_info was set or was already set, *or* if the * encryption key is unavailable. (Use fscrypt_has_encryption_key() to * distinguish these cases.) Also can return another -errno code. */ int fscrypt_get_encryption_info(struct inode *inode, bool allow_unsupported) { int res; union fscrypt_context ctx; union fscrypt_policy policy; if (fscrypt_has_encryption_key(inode)) return 0; res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx)); if (res < 0) { if (res == -ERANGE && allow_unsupported) return 0; fscrypt_warn(inode, "Error %d getting encryption context", res); return res; } res = fscrypt_policy_from_context(&policy, &ctx, res); if (res) { if (allow_unsupported) return 0; fscrypt_warn(inode, "Unrecognized or corrupt encryption context"); return res; } if (!fscrypt_supported_policy(&policy, inode)) { if (allow_unsupported) return 0; return -EINVAL; } res = fscrypt_setup_encryption_info(inode, &policy, fscrypt_context_nonce(&ctx), IS_CASEFOLDED(inode) && S_ISDIR(inode->i_mode)); if (res == -ENOPKG && allow_unsupported) /* Algorithm unavailable? */ res = 0; if (res == -ENOKEY) res = 0; return res; } /** * fscrypt_prepare_new_inode() - prepare to create a new inode in a directory * @dir: a possibly-encrypted directory * @inode: the new inode. ->i_mode and ->i_blkbits must be set already. * ->i_ino doesn't need to be set yet. * @encrypt_ret: (output) set to %true if the new inode will be encrypted * * If the directory is encrypted, set up its ->i_crypt_info in preparation for * encrypting the name of the new file. Also, if the new inode will be * encrypted, set up its ->i_crypt_info and set *encrypt_ret=true. * * This isn't %GFP_NOFS-safe, and therefore it should be called before starting * any filesystem transaction to create the inode. For this reason, ->i_ino * isn't required to be set yet, as the filesystem may not have set it yet. * * This doesn't persist the new inode's encryption context. That still needs to * be done later by calling fscrypt_set_context(). * * Return: 0 on success, -ENOKEY if the encryption key is missing, or another * -errno code */ int fscrypt_prepare_new_inode(struct inode *dir, struct inode *inode, bool *encrypt_ret) { const union fscrypt_policy *policy; u8 nonce[FSCRYPT_FILE_NONCE_SIZE]; policy = fscrypt_policy_to_inherit(dir); if (policy == NULL) return 0; if (IS_ERR(policy)) return PTR_ERR(policy); if (WARN_ON_ONCE(inode->i_blkbits == 0)) return -EINVAL; if (WARN_ON_ONCE(inode->i_mode == 0)) return -EINVAL; /* * Only regular files, directories, and symlinks are encrypted. * Special files like device nodes and named pipes aren't. */ if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) && !S_ISLNK(inode->i_mode)) return 0; *encrypt_ret = true; get_random_bytes(nonce, FSCRYPT_FILE_NONCE_SIZE); return fscrypt_setup_encryption_info(inode, policy, nonce, IS_CASEFOLDED(dir) && S_ISDIR(inode->i_mode)); } EXPORT_SYMBOL_GPL(fscrypt_prepare_new_inode); /** * fscrypt_put_encryption_info() - free most of an inode's fscrypt data * @inode: an inode being evicted * * Free the inode's fscrypt_inode_info. Filesystems must call this when the * inode is being evicted. An RCU grace period need not have elapsed yet. */ void fscrypt_put_encryption_info(struct inode *inode) { put_crypt_info(inode->i_crypt_info); inode->i_crypt_info = NULL; } EXPORT_SYMBOL(fscrypt_put_encryption_info); /** * fscrypt_free_inode() - free an inode's fscrypt data requiring RCU delay * @inode: an inode being freed * * Free the inode's cached decrypted symlink target, if any. Filesystems must * call this after an RCU grace period, just before they free the inode. */ void fscrypt_free_inode(struct inode *inode) { if (IS_ENCRYPTED(inode) && S_ISLNK(inode->i_mode)) { kfree(inode->i_link); inode->i_link = NULL; } } EXPORT_SYMBOL(fscrypt_free_inode); /** * fscrypt_drop_inode() - check whether the inode's master key has been removed * @inode: an inode being considered for eviction * * Filesystems supporting fscrypt must call this from their ->drop_inode() * method so that encrypted inodes are evicted as soon as they're no longer in * use and their master key has been removed. * * Return: 1 if fscrypt wants the inode to be evicted now, otherwise 0 */ int fscrypt_drop_inode(struct inode *inode) { const struct fscrypt_inode_info *ci = fscrypt_get_inode_info(inode); /* * If ci is NULL, then the inode doesn't have an encryption key set up * so it's irrelevant. If ci_master_key is NULL, then the master key * was provided via the legacy mechanism of the process-subscribed * keyrings, so we don't know whether it's been removed or not. */ if (!ci || !ci->ci_master_key) return 0; /* * With proper, non-racy use of FS_IOC_REMOVE_ENCRYPTION_KEY, all inodes * protected by the key were cleaned by sync_filesystem(). But if * userspace is still using the files, inodes can be dirtied between * then and now. We mustn't lose any writes, so skip dirty inodes here. */ if (inode->i_state & I_DIRTY_ALL) return 0; /* * We can't take ->mk_sem here, since this runs in atomic context. * Therefore, ->mk_present can change concurrently, and our result may * immediately become outdated. But there's no correctness problem with * unnecessarily evicting. Nor is there a correctness problem with not * evicting while iput() is racing with the key being removed, since * then the thread removing the key will either evict the inode itself * or will correctly detect that it wasn't evicted due to the race. */ return !READ_ONCE(ci->ci_master_key->mk_present); } EXPORT_SYMBOL_GPL(fscrypt_drop_inode); |
| 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 | // SPDX-License-Identifier: GPL-2.0+ /* * Driver for Alauda-based card readers * * Current development and maintenance by: * (c) 2005 Daniel Drake <dsd@gentoo.org> * * The 'Alauda' is a chip manufacturered by RATOC for OEM use. * * Alauda implements a vendor-specific command set to access two media reader * ports (XD, SmartMedia). This driver converts SCSI commands to the commands * which are accepted by these devices. * * The driver was developed through reverse-engineering, with the help of the * sddr09 driver which has many similarities, and with some help from the * (very old) vendor-supplied GPL sma03 driver. * * For protocol info, see http://alauda.sourceforge.net */ #include <linux/module.h> #include <linux/slab.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_device.h> #include "usb.h" #include "transport.h" #include "protocol.h" #include "debug.h" #include "scsiglue.h" #define DRV_NAME "ums-alauda" MODULE_DESCRIPTION("Driver for Alauda-based card readers"); MODULE_AUTHOR("Daniel Drake <dsd@gentoo.org>"); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS(USB_STORAGE); /* * Status bytes */ #define ALAUDA_STATUS_ERROR 0x01 #define ALAUDA_STATUS_READY 0x40 /* * Control opcodes (for request field) */ #define ALAUDA_GET_XD_MEDIA_STATUS 0x08 #define ALAUDA_GET_SM_MEDIA_STATUS 0x98 #define ALAUDA_ACK_XD_MEDIA_CHANGE 0x0a #define ALAUDA_ACK_SM_MEDIA_CHANGE 0x9a #define ALAUDA_GET_XD_MEDIA_SIG 0x86 #define ALAUDA_GET_SM_MEDIA_SIG 0x96 /* * Bulk command identity (byte 0) */ #define ALAUDA_BULK_CMD 0x40 /* * Bulk opcodes (byte 1) */ #define ALAUDA_BULK_GET_REDU_DATA 0x85 #define ALAUDA_BULK_READ_BLOCK 0x94 #define ALAUDA_BULK_ERASE_BLOCK 0xa3 #define ALAUDA_BULK_WRITE_BLOCK 0xb4 #define ALAUDA_BULK_GET_STATUS2 0xb7 #define ALAUDA_BULK_RESET_MEDIA 0xe0 /* * Port to operate on (byte 8) */ #define ALAUDA_PORT_XD 0x00 #define ALAUDA_PORT_SM 0x01 /* * LBA and PBA are unsigned ints. Special values. */ #define UNDEF 0xffff #define SPARE 0xfffe #define UNUSABLE 0xfffd struct alauda_media_info { unsigned long capacity; /* total media size in bytes */ unsigned int pagesize; /* page size in bytes */ unsigned int blocksize; /* number of pages per block */ unsigned int uzonesize; /* number of usable blocks per zone */ unsigned int zonesize; /* number of blocks per zone */ unsigned int blockmask; /* mask to get page from address */ unsigned char pageshift; unsigned char blockshift; unsigned char zoneshift; u16 **lba_to_pba; /* logical to physical block map */ u16 **pba_to_lba; /* physical to logical block map */ }; struct alauda_info { struct alauda_media_info port[2]; int wr_ep; /* endpoint to write data out of */ unsigned char sense_key; unsigned long sense_asc; /* additional sense code */ unsigned long sense_ascq; /* additional sense code qualifier */ bool media_initialized; }; #define short_pack(lsb,msb) ( ((u16)(lsb)) | ( ((u16)(msb))<<8 ) ) #define LSB_of(s) ((s)&0xFF) #define MSB_of(s) ((s)>>8) #define MEDIA_PORT(us) us->srb->device->lun #define MEDIA_INFO(us) ((struct alauda_info *)us->extra)->port[MEDIA_PORT(us)] #define PBA_LO(pba) ((pba & 0xF) << 5) #define PBA_HI(pba) (pba >> 3) #define PBA_ZONE(pba) (pba >> 11) static int init_alauda(struct us_data *us); /* * The table of devices */ #define UNUSUAL_DEV(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax, \ vendorName, productName, useProtocol, useTransport, \ initFunction, flags) \ { USB_DEVICE_VER(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax), \ .driver_info = (flags) } static struct usb_device_id alauda_usb_ids[] = { # include "unusual_alauda.h" { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, alauda_usb_ids); #undef UNUSUAL_DEV /* * The flags table */ #define UNUSUAL_DEV(idVendor, idProduct, bcdDeviceMin, bcdDeviceMax, \ vendor_name, product_name, use_protocol, use_transport, \ init_function, Flags) \ { \ .vendorName = vendor_name, \ .productName = product_name, \ .useProtocol = use_protocol, \ .useTransport = use_transport, \ .initFunction = init_function, \ } static struct us_unusual_dev alauda_unusual_dev_list[] = { # include "unusual_alauda.h" { } /* Terminating entry */ }; #undef UNUSUAL_DEV /* * Media handling */ struct alauda_card_info { unsigned char id; /* id byte */ unsigned char chipshift; /* 1<<cs bytes total capacity */ unsigned char pageshift; /* 1<<ps bytes in a page */ unsigned char blockshift; /* 1<<bs pages per block */ unsigned char zoneshift; /* 1<<zs blocks per zone */ }; static struct alauda_card_info alauda_card_ids[] = { /* NAND flash */ { 0x6e, 20, 8, 4, 8}, /* 1 MB */ { 0xe8, 20, 8, 4, 8}, /* 1 MB */ { 0xec, 20, 8, 4, 8}, /* 1 MB */ { 0x64, 21, 8, 4, 9}, /* 2 MB */ { 0xea, 21, 8, 4, 9}, /* 2 MB */ { 0x6b, 22, 9, 4, 9}, /* 4 MB */ { 0xe3, 22, 9, 4, 9}, /* 4 MB */ { 0xe5, 22, 9, 4, 9}, /* 4 MB */ { 0xe6, 23, 9, 4, 10}, /* 8 MB */ { 0x73, 24, 9, 5, 10}, /* 16 MB */ { 0x75, 25, 9, 5, 10}, /* 32 MB */ { 0x76, 26, 9, 5, 10}, /* 64 MB */ { 0x79, 27, 9, 5, 10}, /* 128 MB */ { 0x71, 28, 9, 5, 10}, /* 256 MB */ /* MASK ROM */ { 0x5d, 21, 9, 4, 8}, /* 2 MB */ { 0xd5, 22, 9, 4, 9}, /* 4 MB */ { 0xd6, 23, 9, 4, 10}, /* 8 MB */ { 0x57, 24, 9, 4, 11}, /* 16 MB */ { 0x58, 25, 9, 4, 12}, /* 32 MB */ { 0,} }; static struct alauda_card_info *alauda_card_find_id(unsigned char id) { int i; for (i = 0; alauda_card_ids[i].id != 0; i++) if (alauda_card_ids[i].id == id) return &(alauda_card_ids[i]); return NULL; } /* * ECC computation. */ static unsigned char parity[256]; static unsigned char ecc2[256]; static void nand_init_ecc(void) { int i, j, a; parity[0] = 0; for (i = 1; i < 256; i++) parity[i] = (parity[i&(i-1)] ^ 1); for (i = 0; i < 256; i++) { a = 0; for (j = 0; j < 8; j++) { if (i & (1<<j)) { if ((j & 1) == 0) a ^= 0x04; if ((j & 2) == 0) a ^= 0x10; if ((j & 4) == 0) a ^= 0x40; } } ecc2[i] = ~(a ^ (a<<1) ^ (parity[i] ? 0xa8 : 0)); } } /* compute 3-byte ecc on 256 bytes */ static void nand_compute_ecc(unsigned char *data, unsigned char *ecc) { int i, j, a; unsigned char par = 0, bit, bits[8] = {0}; /* collect 16 checksum bits */ for (i = 0; i < 256; i++) { par ^= data[i]; bit = parity[data[i]]; for (j = 0; j < 8; j++) if ((i & (1<<j)) == 0) bits[j] ^= bit; } /* put 4+4+4 = 12 bits in the ecc */ a = (bits[3] << 6) + (bits[2] << 4) + (bits[1] << 2) + bits[0]; ecc[0] = ~(a ^ (a<<1) ^ (parity[par] ? 0xaa : 0)); a = (bits[7] << 6) + (bits[6] << 4) + (bits[5] << 2) + bits[4]; ecc[1] = ~(a ^ (a<<1) ^ (parity[par] ? 0xaa : 0)); ecc[2] = ecc2[par]; } static int nand_compare_ecc(unsigned char *data, unsigned char *ecc) { return (data[0] == ecc[0] && data[1] == ecc[1] && data[2] == ecc[2]); } static void nand_store_ecc(unsigned char *data, unsigned char *ecc) { memcpy(data, ecc, 3); } /* * Alauda driver */ /* * Forget our PBA <---> LBA mappings for a particular port */ static void alauda_free_maps (struct alauda_media_info *media_info) { unsigned int shift = media_info->zoneshift + media_info->blockshift + media_info->pageshift; unsigned int num_zones = media_info->capacity >> shift; unsigned int i; if (media_info->lba_to_pba != NULL) for (i = 0; i < num_zones; i++) { kfree(media_info->lba_to_pba[i]); media_info->lba_to_pba[i] = NULL; } if (media_info->pba_to_lba != NULL) for (i = 0; i < num_zones; i++) { kfree(media_info->pba_to_lba[i]); media_info->pba_to_lba[i] = NULL; } } /* * Returns 2 bytes of status data * The first byte describes media status, and second byte describes door status */ static int alauda_get_media_status(struct us_data *us, unsigned char *data) { int rc; unsigned char command; if (MEDIA_PORT(us) == ALAUDA_PORT_XD) command = ALAUDA_GET_XD_MEDIA_STATUS; else command = ALAUDA_GET_SM_MEDIA_STATUS; rc = usb_stor_ctrl_transfer(us, us->recv_ctrl_pipe, command, 0xc0, 0, 1, data, 2); if (rc == USB_STOR_XFER_GOOD) usb_stor_dbg(us, "Media status %02X %02X\n", data[0], data[1]); return rc; } /* * Clears the "media was changed" bit so that we know when it changes again * in the future. */ static int alauda_ack_media(struct us_data *us) { unsigned char command; if (MEDIA_PORT(us) == ALAUDA_PORT_XD) command = ALAUDA_ACK_XD_MEDIA_CHANGE; else command = ALAUDA_ACK_SM_MEDIA_CHANGE; return usb_stor_ctrl_transfer(us, us->send_ctrl_pipe, command, 0x40, 0, 1, NULL, 0); } /* * Retrieves a 4-byte media signature, which indicates manufacturer, capacity, * and some other details. */ static int alauda_get_media_signature(struct us_data *us, unsigned char *data) { unsigned char command; if (MEDIA_PORT(us) == ALAUDA_PORT_XD) command = ALAUDA_GET_XD_MEDIA_SIG; else command = ALAUDA_GET_SM_MEDIA_SIG; return usb_stor_ctrl_transfer(us, us->recv_ctrl_pipe, command, 0xc0, 0, 0, data, 4); } /* * Resets the media status (but not the whole device?) */ static int alauda_reset_media(struct us_data *us) { unsigned char *command = us->iobuf; memset(command, 0, 9); command[0] = ALAUDA_BULK_CMD; command[1] = ALAUDA_BULK_RESET_MEDIA; command[8] = MEDIA_PORT(us); return usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, command, 9, NULL); } /* * Examines the media and deduces capacity, etc. */ static int alauda_init_media(struct us_data *us) { unsigned char *data = us->iobuf; int ready = 0; struct alauda_card_info *media_info; unsigned int num_zones; while (ready == 0) { msleep(20); if (alauda_get_media_status(us, data) != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; if (data[0] & 0x10) ready = 1; } usb_stor_dbg(us, "We are ready for action!\n"); if (alauda_ack_media(us) != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; msleep(10); if (alauda_get_media_status(us, data) != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; if (data[0] != 0x14) { usb_stor_dbg(us, "Media not ready after ack\n"); return USB_STOR_TRANSPORT_ERROR; } if (alauda_get_media_signature(us, data) != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; usb_stor_dbg(us, "Media signature: %4ph\n", data); media_info = alauda_card_find_id(data[1]); if (media_info == NULL) { pr_warn("alauda_init_media: Unrecognised media signature: %4ph\n", data); return USB_STOR_TRANSPORT_ERROR; } MEDIA_INFO(us).capacity = 1 << media_info->chipshift; usb_stor_dbg(us, "Found media with capacity: %ldMB\n", MEDIA_INFO(us).capacity >> 20); MEDIA_INFO(us).pageshift = media_info->pageshift; MEDIA_INFO(us).blockshift = media_info->blockshift; MEDIA_INFO(us).zoneshift = media_info->zoneshift; MEDIA_INFO(us).pagesize = 1 << media_info->pageshift; MEDIA_INFO(us).blocksize = 1 << media_info->blockshift; MEDIA_INFO(us).zonesize = 1 << media_info->zoneshift; MEDIA_INFO(us).uzonesize = ((1 << media_info->zoneshift) / 128) * 125; MEDIA_INFO(us).blockmask = MEDIA_INFO(us).blocksize - 1; num_zones = MEDIA_INFO(us).capacity >> (MEDIA_INFO(us).zoneshift + MEDIA_INFO(us).blockshift + MEDIA_INFO(us).pageshift); MEDIA_INFO(us).pba_to_lba = kcalloc(num_zones, sizeof(u16*), GFP_NOIO); MEDIA_INFO(us).lba_to_pba = kcalloc(num_zones, sizeof(u16*), GFP_NOIO); if (MEDIA_INFO(us).pba_to_lba == NULL || MEDIA_INFO(us).lba_to_pba == NULL) return USB_STOR_TRANSPORT_ERROR; if (alauda_reset_media(us) != USB_STOR_XFER_GOOD) return USB_STOR_TRANSPORT_ERROR; return USB_STOR_TRANSPORT_GOOD; } /* * Examines the media status and does the right thing when the media has gone, * appeared, or changed. */ static int alauda_check_media(struct us_data *us) { struct alauda_info *info = (struct alauda_info *) us->extra; unsigned char *status = us->iobuf; int rc; rc = alauda_get_media_status(us, status); if (rc != USB_STOR_XFER_GOOD) { status[0] = 0xF0; /* Pretend there's no media */ status[1] = 0; } /* Check for no media or door open */ if ((status[0] & 0x80) || ((status[0] & 0x1F) == 0x10) || ((status[1] & 0x01) == 0)) { usb_stor_dbg(us, "No media, or door open\n"); alauda_free_maps(&MEDIA_INFO(us)); info->sense_key = 0x02; info->sense_asc = 0x3A; info->sense_ascq = 0x00; return USB_STOR_TRANSPORT_FAILED; } /* Check for media change */ if (status[0] & 0x08 || !info->media_initialized) { usb_stor_dbg(us, "Media change detected\n"); alauda_free_maps(&MEDIA_INFO(us)); rc = alauda_init_media(us); if (rc == USB_STOR_TRANSPORT_GOOD) info->media_initialized = true; info->sense_key = UNIT_ATTENTION; info->sense_asc = 0x28; info->sense_ascq = 0x00; return USB_STOR_TRANSPORT_FAILED; } return USB_STOR_TRANSPORT_GOOD; } /* * Checks the status from the 2nd status register * Returns 3 bytes of status data, only the first is known */ static int alauda_check_status2(struct us_data *us) { int rc; unsigned char command[] = { ALAUDA_BULK_CMD, ALAUDA_BULK_GET_STATUS2, 0, 0, 0, 0, 3, 0, MEDIA_PORT(us) }; unsigned char data[3]; rc = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, command, 9, NULL); if (rc != USB_STOR_XFER_GOOD) return rc; rc = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, data, 3, NULL); if (rc != USB_STOR_XFER_GOOD) return rc; usb_stor_dbg(us, "%3ph\n", data); if (data[0] & ALAUDA_STATUS_ERROR) return USB_STOR_XFER_ERROR; return USB_STOR_XFER_GOOD; } /* * Gets the redundancy data for the first page of a PBA * Returns 16 bytes. */ static int alauda_get_redu_data(struct us_data *us, u16 pba, unsigned char *data) { int rc; unsigned char command[] = { ALAUDA_BULK_CMD, ALAUDA_BULK_GET_REDU_DATA, PBA_HI(pba), PBA_ZONE(pba), 0, PBA_LO(pba), 0, 0, MEDIA_PORT(us) }; rc = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, command, 9, NULL); if (rc != USB_STOR_XFER_GOOD) return rc; return usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, data, 16, NULL); } /* * Finds the first unused PBA in a zone * Returns the absolute PBA of an unused PBA, or 0 if none found. */ static u16 alauda_find_unused_pba(struct alauda_media_info *info, unsigned int zone) { u16 *pba_to_lba = info->pba_to_lba[zone]; unsigned int i; for (i = 0; i < info->zonesize; i++) if (pba_to_lba[i] == UNDEF) return (zone << info->zoneshift) + i; return 0; } /* * Reads the redundancy data for all PBA's in a zone * Produces lba <--> pba mappings */ static int alauda_read_map(struct us_data *us, unsigned int zone) { unsigned char *data = us->iobuf; int result; int i, j; unsigned int zonesize = MEDIA_INFO(us).zonesize; unsigned int uzonesize = MEDIA_INFO(us).uzonesize; unsigned int lba_offset, lba_real, blocknum; unsigned int zone_base_lba = zone * uzonesize; unsigned int zone_base_pba = zone * zonesize; u16 *lba_to_pba = kcalloc(zonesize, sizeof(u16), GFP_NOIO); u16 *pba_to_lba = kcalloc(zonesize, sizeof(u16), GFP_NOIO); if (lba_to_pba == NULL || pba_to_lba == NULL) { result = USB_STOR_TRANSPORT_ERROR; goto error; } usb_stor_dbg(us, "Mapping blocks for zone %d\n", zone); /* 1024 PBA's per zone */ for (i = 0; i < zonesize; i++) lba_to_pba[i] = pba_to_lba[i] = UNDEF; for (i = 0; i < zonesize; i++) { blocknum = zone_base_pba + i; result = alauda_get_redu_data(us, blocknum, data); if (result != USB_STOR_XFER_GOOD) { result = USB_STOR_TRANSPORT_ERROR; goto error; } /* special PBAs have control field 0^16 */ for (j = 0; j < 16; j++) if (data[j] != 0) goto nonz; pba_to_lba[i] = UNUSABLE; usb_stor_dbg(us, "PBA %d has no logical mapping\n", blocknum); continue; nonz: /* unwritten PBAs have control field FF^16 */ for (j = 0; j < 16; j++) if (data[j] != 0xff) goto nonff; continue; nonff: /* normal PBAs start with six FFs */ if (j < 6) { usb_stor_dbg(us, "PBA %d has no logical mapping: reserved area = %02X%02X%02X%02X data status %02X block status %02X\n", blocknum, data[0], data[1], data[2], data[3], data[4], data[5]); pba_to_lba[i] = UNUSABLE; continue; } if ((data[6] >> 4) != 0x01) { usb_stor_dbg(us, "PBA %d has invalid address field %02X%02X/%02X%02X\n", blocknum, data[6], data[7], data[11], data[12]); pba_to_lba[i] = UNUSABLE; continue; } /* check even parity */ if (parity[data[6] ^ data[7]]) { printk(KERN_WARNING "alauda_read_map: Bad parity in LBA for block %d" " (%02X %02X)\n", i, data[6], data[7]); pba_to_lba[i] = UNUSABLE; continue; } lba_offset = short_pack(data[7], data[6]); lba_offset = (lba_offset & 0x07FF) >> 1; lba_real = lba_offset + zone_base_lba; /* * Every 1024 physical blocks ("zone"), the LBA numbers * go back to zero, but are within a higher block of LBA's. * Also, there is a maximum of 1000 LBA's per zone. * In other words, in PBA 1024-2047 you will find LBA 0-999 * which are really LBA 1000-1999. This allows for 24 bad * or special physical blocks per zone. */ if (lba_offset >= uzonesize) { printk(KERN_WARNING "alauda_read_map: Bad low LBA %d for block %d\n", lba_real, blocknum); continue; } if (lba_to_pba[lba_offset] != UNDEF) { printk(KERN_WARNING "alauda_read_map: " "LBA %d seen for PBA %d and %d\n", lba_real, lba_to_pba[lba_offset], blocknum); continue; } pba_to_lba[i] = lba_real; lba_to_pba[lba_offset] = blocknum; continue; } MEDIA_INFO(us).lba_to_pba[zone] = lba_to_pba; MEDIA_INFO(us).pba_to_lba[zone] = pba_to_lba; result = 0; goto out; error: kfree(lba_to_pba); kfree(pba_to_lba); out: return result; } /* * Checks to see whether we have already mapped a certain zone * If we haven't, the map is generated */ static void alauda_ensure_map_for_zone(struct us_data *us, unsigned int zone) { if (MEDIA_INFO(us).lba_to_pba[zone] == NULL || MEDIA_INFO(us).pba_to_lba[zone] == NULL) alauda_read_map(us, zone); } /* * Erases an entire block */ static int alauda_erase_block(struct us_data *us, u16 pba) { int rc; unsigned char command[] = { ALAUDA_BULK_CMD, ALAUDA_BULK_ERASE_BLOCK, PBA_HI(pba), PBA_ZONE(pba), 0, PBA_LO(pba), 0x02, 0, MEDIA_PORT(us) }; unsigned char buf[2]; usb_stor_dbg(us, "Erasing PBA %d\n", pba); rc = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, command, 9, NULL); if (rc != USB_STOR_XFER_GOOD) return rc; rc = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, buf, 2, NULL); if (rc != USB_STOR_XFER_GOOD) return rc; usb_stor_dbg(us, "Erase result: %02X %02X\n", buf[0], buf[1]); return rc; } /* * Reads data from a certain offset page inside a PBA, including interleaved * redundancy data. Returns (pagesize+64)*pages bytes in data. */ static int alauda_read_block_raw(struct us_data *us, u16 pba, unsigned int page, unsigned int pages, unsigned char *data) { int rc; unsigned char command[] = { ALAUDA_BULK_CMD, ALAUDA_BULK_READ_BLOCK, PBA_HI(pba), PBA_ZONE(pba), 0, PBA_LO(pba) + page, pages, 0, MEDIA_PORT(us) }; usb_stor_dbg(us, "pba %d page %d count %d\n", pba, page, pages); rc = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, command, 9, NULL); if (rc != USB_STOR_XFER_GOOD) return rc; return usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe, data, (MEDIA_INFO(us).pagesize + 64) * pages, NULL); } /* * Reads data from a certain offset page inside a PBA, excluding redundancy * data. Returns pagesize*pages bytes in data. Note that data must be big enough * to hold (pagesize+64)*pages bytes of data, but you can ignore those 'extra' * trailing bytes outside this function. */ static int alauda_read_block(struct us_data *us, u16 pba, unsigned int page, unsigned int pages, unsigned char *data) { int i, rc; unsigned int pagesize = MEDIA_INFO(us).pagesize; rc = alauda_read_block_raw(us, pba, page, pages, data); if (rc != USB_STOR_XFER_GOOD) return rc; /* Cut out the redundancy data */ for (i = 0; i < pages; i++) { int dest_offset = i * pagesize; int src_offset = i * (pagesize + 64); memmove(data + dest_offset, data + src_offset, pagesize); } return rc; } /* * Writes an entire block of data and checks status after write. * Redundancy data must be already included in data. Data should be * (pagesize+64)*blocksize bytes in length. */ static int alauda_write_block(struct us_data *us, u16 pba, unsigned char *data) { int rc; struct alauda_info *info = (struct alauda_info *) us->extra; unsigned char command[] = { ALAUDA_BULK_CMD, ALAUDA_BULK_WRITE_BLOCK, PBA_HI(pba), PBA_ZONE(pba), 0, PBA_LO(pba), 32, 0, MEDIA_PORT(us) }; usb_stor_dbg(us, "pba %d\n", pba); rc = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe, command, 9, NULL); if (rc != USB_STOR_XFER_GOOD) return rc; rc = usb_stor_bulk_transfer_buf(us, info->wr_ep, data, (MEDIA_INFO(us).pagesize + 64) * MEDIA_INFO(us).blocksize, NULL); if (rc != USB_STOR_XFER_GOOD) return rc; return alauda_check_status2(us); } /* * Write some data to a specific LBA. */ static int alauda_write_lba(struct us_data *us, u16 lba, unsigned int page, unsigned int pages, unsigned char *ptr, unsigned char *blockbuffer) { u16 pba, lbap, new_pba; unsigned char *bptr, *cptr, *xptr; unsigned char ecc[3]; int i, result; unsigned int uzonesize = MEDIA_INFO(us).uzonesize; unsigned int zonesize = MEDIA_INFO(us).zonesize; unsigned int pagesize = MEDIA_INFO(us).pagesize; unsigned int blocksize = MEDIA_INFO(us).blocksize; unsigned int lba_offset = lba % uzonesize; unsigned int new_pba_offset; unsigned int zone = lba / uzonesize; alauda_ensure_map_for_zone(us, zone); pba = MEDIA_INFO(us).lba_to_pba[zone][lba_offset]; if (pba == 1) { /* * Maybe it is impossible to write to PBA 1. * Fake success, but don't do anything. */ printk(KERN_WARNING "alauda_write_lba: avoid writing to pba 1\n"); return USB_STOR_TRANSPORT_GOOD; } new_pba = alauda_find_unused_pba(&MEDIA_INFO(us), zone); if (!new_pba) { printk(KERN_WARNING "alauda_write_lba: Out of unused blocks\n"); return USB_STOR_TRANSPORT_ERROR; } /* read old contents */ if (pba != UNDEF) { result = alauda_read_block_raw(us, pba, 0, blocksize, blockbuffer); if (result != USB_STOR_XFER_GOOD) return result; } else { memset(blockbuffer, 0, blocksize * (pagesize + 64)); } lbap = (lba_offset << 1) | 0x1000; if (parity[MSB_of(lbap) ^ LSB_of(lbap)]) lbap ^= 1; /* check old contents and fill lba */ for (i = 0; i < blocksize; i++) { bptr = blockbuffer + (i * (pagesize + 64)); cptr = bptr + pagesize; nand_compute_ecc(bptr, ecc); if (!nand_compare_ecc(cptr+13, ecc)) { usb_stor_dbg(us, "Warning: bad ecc in page %d- of pba %d\n", i, pba); nand_store_ecc(cptr+13, ecc); } nand_compute_ecc(bptr + (pagesize / 2), ecc); if (!nand_compare_ecc(cptr+8, ecc)) { usb_stor_dbg(us, "Warning: bad ecc in page %d+ of pba %d\n", i, pba); nand_store_ecc(cptr+8, ecc); } cptr[6] = cptr[11] = MSB_of(lbap); cptr[7] = cptr[12] = LSB_of(lbap); } /* copy in new stuff and compute ECC */ xptr = ptr; for (i = page; i < page+pages; i++) { bptr = blockbuffer + (i * (pagesize + 64)); cptr = bptr + pagesize; memcpy(bptr, xptr, pagesize); xptr += pagesize; nand_compute_ecc(bptr, ecc); nand_store_ecc(cptr+13, ecc); nand_compute_ecc(bptr + (pagesize / 2), ecc); nand_store_ecc(cptr+8, ecc); } result = alauda_write_block(us, new_pba, blockbuffer); if (result != USB_STOR_XFER_GOOD) return result; new_pba_offset = new_pba - (zone * zonesize); MEDIA_INFO(us).pba_to_lba[zone][new_pba_offset] = lba; MEDIA_INFO(us).lba_to_pba[zone][lba_offset] = new_pba; usb_stor_dbg(us, "Remapped LBA %d to PBA %d\n", lba, new_pba); if (pba != UNDEF) { unsigned int pba_offset = pba - (zone * zonesize); result = alauda_erase_block(us, pba); if (result != USB_STOR_XFER_GOOD) return result; MEDIA_INFO(us).pba_to_lba[zone][pba_offset] = UNDEF; } return USB_STOR_TRANSPORT_GOOD; } /* * Read data from a specific sector address */ static int alauda_read_data(struct us_data *us, unsigned long address, unsigned int sectors) { unsigned char *buffer; u16 lba, max_lba; unsigned int page, len, offset; unsigned int blockshift = MEDIA_INFO(us).blockshift; unsigned int pageshift = MEDIA_INFO(us).pageshift; unsigned int blocksize = MEDIA_INFO(us).blocksize; unsigned int pagesize = MEDIA_INFO(us).pagesize; unsigned int uzonesize = MEDIA_INFO(us).uzonesize; struct scatterlist *sg; int result; /* * Since we only read in one block at a time, we have to create * a bounce buffer and move the data a piece at a time between the * bounce buffer and the actual transfer buffer. * We make this buffer big enough to hold temporary redundancy data, * which we use when reading the data blocks. */ len = min(sectors, blocksize) * (pagesize + 64); buffer = kmalloc(len, GFP_NOIO); if (!buffer) return USB_STOR_TRANSPORT_ERROR; /* Figure out the initial LBA and page */ lba = address >> blockshift; page = (address & MEDIA_INFO(us).blockmask); max_lba = MEDIA_INFO(us).capacity >> (blockshift + pageshift); result = USB_STOR_TRANSPORT_GOOD; offset = 0; sg = NULL; while (sectors > 0) { unsigned int zone = lba / uzonesize; /* integer division */ unsigned int lba_offset = lba - (zone * uzonesize); unsigned int pages; u16 pba; alauda_ensure_map_for_zone(us, zone); /* Not overflowing capacity? */ if (lba >= max_lba) { usb_stor_dbg(us, "Error: Requested lba %u exceeds maximum %u\n", lba, max_lba); result = USB_STOR_TRANSPORT_ERROR; break; } /* Find number of pages we can read in this block */ pages = min(sectors, blocksize - page); len = pages << pageshift; /* Find where this lba lives on disk */ pba = MEDIA_INFO(us).lba_to_pba[zone][lba_offset]; if (pba == UNDEF) { /* this lba was never written */ usb_stor_dbg(us, "Read %d zero pages (LBA %d) page %d\n", pages, lba, page); /* * This is not really an error. It just means * that the block has never been written. * Instead of returning USB_STOR_TRANSPORT_ERROR * it is better to return all zero data. */ memset(buffer, 0, len); } else { usb_stor_dbg(us, "Read %d pages, from PBA %d (LBA %d) page %d\n", pages, pba, lba, page); result = alauda_read_block(us, pba, page, pages, buffer); if (result != USB_STOR_TRANSPORT_GOOD) break; } /* Store the data in the transfer buffer */ usb_stor_access_xfer_buf(buffer, len, us->srb, &sg, &offset, TO_XFER_BUF); page = 0; lba++; sectors -= pages; } kfree(buffer); return result; } /* * Write data to a specific sector address */ static int alauda_write_data(struct us_data *us, unsigned long address, unsigned int sectors) { unsigned char *buffer, *blockbuffer; unsigned int page, len, offset; unsigned int blockshift = MEDIA_INFO(us).blockshift; unsigned int pageshift = MEDIA_INFO(us).pageshift; unsigned int blocksize = MEDIA_INFO(us).blocksize; unsigned int pagesize = MEDIA_INFO(us).pagesize; struct scatterlist *sg; u16 lba, max_lba; int result; /* * Since we don't write the user data directly to the device, * we have to create a bounce buffer and move the data a piece * at a time between the bounce buffer and the actual transfer buffer. */ len = min(sectors, blocksize) * pagesize; buffer = kmalloc(len, GFP_NOIO); if (!buffer) return USB_STOR_TRANSPORT_ERROR; /* * We also need a temporary block buffer, where we read in the old data, * overwrite parts with the new data, and manipulate the redundancy data */ blockbuffer = kmalloc_array(pagesize + 64, blocksize, GFP_NOIO); if (!blockbuffer) { kfree(buffer); return USB_STOR_TRANSPORT_ERROR; } /* Figure out the initial LBA and page */ lba = address >> blockshift; page = (address & MEDIA_INFO(us).blockmask); max_lba = MEDIA_INFO(us).capacity >> (pageshift + blockshift); result = USB_STOR_TRANSPORT_GOOD; offset = 0; sg = NULL; while (sectors > 0) { /* Write as many sectors as possible in this block */ unsigned int pages = min(sectors, blocksize - page); len = pages << pageshift; /* Not overflowing capacity? */ if (lba >= max_lba) { usb_stor_dbg(us, "Requested lba %u exceeds maximum %u\n", lba, max_lba); result = USB_STOR_TRANSPORT_ERROR; break; } /* Get the data from the transfer buffer */ usb_stor_access_xfer_buf(buffer, len, us->srb, &sg, &offset, FROM_XFER_BUF); result = alauda_write_lba(us, lba, page, pages, buffer, blockbuffer); if (result != USB_STOR_TRANSPORT_GOOD) break; page = 0; lba++; sectors -= pages; } kfree(buffer); kfree(blockbuffer); return result; } /* * Our interface with the rest of the world */ static void alauda_info_destructor(void *extra) { struct alauda_info *info = (struct alauda_info *) extra; int port; if (!info) return; for (port = 0; port < 2; port++) { struct alauda_media_info *media_info = &info->port[port]; alauda_free_maps(media_info); kfree(media_info->lba_to_pba); kfree(media_info->pba_to_lba); } } /* * Initialize alauda_info struct and find the data-write endpoint */ static int init_alauda(struct us_data *us) { struct alauda_info *info; struct usb_host_interface *altsetting = us->pusb_intf->cur_altsetting; nand_init_ecc(); us->extra = kzalloc(sizeof(struct alauda_info), GFP_NOIO); if (!us->extra) return -ENOMEM; info = (struct alauda_info *) us->extra; us->extra_destructor = alauda_info_destructor; info->wr_ep = usb_sndbulkpipe(us->pusb_dev, altsetting->endpoint[0].desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); return 0; } static int alauda_transport(struct scsi_cmnd *srb, struct us_data *us) { int rc; struct alauda_info *info = (struct alauda_info *) us->extra; unsigned char *ptr = us->iobuf; static unsigned char inquiry_response[36] = { 0x00, 0x80, 0x00, 0x01, 0x1F, 0x00, 0x00, 0x00 }; if (srb->cmnd[0] == INQUIRY) { usb_stor_dbg(us, "INQUIRY - Returning bogus response\n"); memcpy(ptr, inquiry_response, sizeof(inquiry_response)); fill_inquiry_response(us, ptr, 36); return USB_STOR_TRANSPORT_GOOD; } if (srb->cmnd[0] == TEST_UNIT_READY) { usb_stor_dbg(us, "TEST_UNIT_READY\n"); return alauda_check_media(us); } if (srb->cmnd[0] == READ_CAPACITY) { unsigned int num_zones; unsigned long capacity; rc = alauda_check_media(us); if (rc != USB_STOR_TRANSPORT_GOOD) return rc; num_zones = MEDIA_INFO(us).capacity >> (MEDIA_INFO(us).zoneshift + MEDIA_INFO(us).blockshift + MEDIA_INFO(us).pageshift); capacity = num_zones * MEDIA_INFO(us).uzonesize * MEDIA_INFO(us).blocksize; /* Report capacity and page size */ ((__be32 *) ptr)[0] = cpu_to_be32(capacity - 1); ((__be32 *) ptr)[1] = cpu_to_be32(512); usb_stor_set_xfer_buf(ptr, 8, srb); return USB_STOR_TRANSPORT_GOOD; } if (srb->cmnd[0] == READ_10) { unsigned int page, pages; rc = alauda_check_media(us); if (rc != USB_STOR_TRANSPORT_GOOD) return rc; page = short_pack(srb->cmnd[3], srb->cmnd[2]); page <<= 16; page |= short_pack(srb->cmnd[5], srb->cmnd[4]); pages = short_pack(srb->cmnd[8], srb->cmnd[7]); usb_stor_dbg(us, "READ_10: page %d pagect %d\n", page, pages); return alauda_read_data(us, page, pages); } if (srb->cmnd[0] == WRITE_10) { unsigned int page, pages; rc = alauda_check_media(us); if (rc != USB_STOR_TRANSPORT_GOOD) return rc; page = short_pack(srb->cmnd[3], srb->cmnd[2]); page <<= 16; page |= short_pack(srb->cmnd[5], srb->cmnd[4]); pages = short_pack(srb->cmnd[8], srb->cmnd[7]); usb_stor_dbg(us, "WRITE_10: page %d pagect %d\n", page, pages); return alauda_write_data(us, page, pages); } if (srb->cmnd[0] == REQUEST_SENSE) { usb_stor_dbg(us, "REQUEST_SENSE\n"); memset(ptr, 0, 18); ptr[0] = 0xF0; ptr[2] = info->sense_key; ptr[7] = 11; ptr[12] = info->sense_asc; ptr[13] = info->sense_ascq; usb_stor_set_xfer_buf(ptr, 18, srb); return USB_STOR_TRANSPORT_GOOD; } if (srb->cmnd[0] == ALLOW_MEDIUM_REMOVAL) { /* * sure. whatever. not like we can stop the user from popping * the media out of the device (no locking doors, etc) */ return USB_STOR_TRANSPORT_GOOD; } usb_stor_dbg(us, "Gah! Unknown command: %d (0x%x)\n", srb->cmnd[0], srb->cmnd[0]); info->sense_key = 0x05; info->sense_asc = 0x20; info->sense_ascq = 0x00; return USB_STOR_TRANSPORT_FAILED; } static struct scsi_host_template alauda_host_template; static int alauda_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct us_data *us; int result; result = usb_stor_probe1(&us, intf, id, (id - alauda_usb_ids) + alauda_unusual_dev_list, &alauda_host_template); if (result) return result; us->transport_name = "Alauda Control/Bulk"; us->transport = alauda_transport; us->transport_reset = usb_stor_Bulk_reset; us->max_lun = 1; result = usb_stor_probe2(us); return result; } static struct usb_driver alauda_driver = { .name = DRV_NAME, .probe = alauda_probe, .disconnect = usb_stor_disconnect, .suspend = usb_stor_suspend, .resume = usb_stor_resume, .reset_resume = usb_stor_reset_resume, .pre_reset = usb_stor_pre_reset, .post_reset = usb_stor_post_reset, .id_table = alauda_usb_ids, .soft_unbind = 1, .no_dynamic_id = 1, }; module_usb_stor_driver(alauda_driver, alauda_host_template, DRV_NAME); |
| 33 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 | /* * Copyright (c) 2016 Intel Corporation * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting documentation, and * that the name of the copyright holders not be used in advertising or * publicity pertaining to distribution of the software without specific, * written prior permission. The copyright holders make no representations * about the suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. */ #ifndef __DRM_BRIDGE_H__ #define __DRM_BRIDGE_H__ #include <linux/ctype.h> #include <linux/list.h> #include <linux/mutex.h> #include <drm/drm_atomic.h> #include <drm/drm_encoder.h> #include <drm/drm_mode_object.h> #include <drm/drm_modes.h> struct device_node; struct drm_bridge; struct drm_bridge_timings; struct drm_connector; struct drm_display_info; struct drm_minor; struct drm_panel; struct edid; struct i2c_adapter; /** * enum drm_bridge_attach_flags - Flags for &drm_bridge_funcs.attach */ enum drm_bridge_attach_flags { /** * @DRM_BRIDGE_ATTACH_NO_CONNECTOR: When this flag is set the bridge * shall not create a drm_connector. */ DRM_BRIDGE_ATTACH_NO_CONNECTOR = BIT(0), }; /** * struct drm_bridge_funcs - drm_bridge control functions */ struct drm_bridge_funcs { /** * @attach: * * This callback is invoked whenever our bridge is being attached to a * &drm_encoder. The flags argument tunes the behaviour of the attach * operation (see DRM_BRIDGE_ATTACH_*). * * The @attach callback is optional. * * RETURNS: * * Zero on success, error code on failure. */ int (*attach)(struct drm_bridge *bridge, enum drm_bridge_attach_flags flags); /** * @detach: * * This callback is invoked whenever our bridge is being detached from a * &drm_encoder. * * The @detach callback is optional. */ void (*detach)(struct drm_bridge *bridge); /** * @mode_valid: * * This callback is used to check if a specific mode is valid in this * bridge. This should be implemented if the bridge has some sort of * restriction in the modes it can display. For example, a given bridge * may be responsible to set a clock value. If the clock can not * produce all the values for the available modes then this callback * can be used to restrict the number of modes to only the ones that * can be displayed. * * This hook is used by the probe helpers to filter the mode list in * drm_helper_probe_single_connector_modes(), and it is used by the * atomic helpers to validate modes supplied by userspace in * drm_atomic_helper_check_modeset(). * * The @mode_valid callback is optional. * * NOTE: * * Since this function is both called from the check phase of an atomic * commit, and the mode validation in the probe paths it is not allowed * to look at anything else but the passed-in mode, and validate it * against configuration-invariant hardware constraints. Any further * limits which depend upon the configuration can only be checked in * @mode_fixup. * * RETURNS: * * drm_mode_status Enum */ enum drm_mode_status (*mode_valid)(struct drm_bridge *bridge, const struct drm_display_info *info, const struct drm_display_mode *mode); /** * @mode_fixup: * * This callback is used to validate and adjust a mode. The parameter * mode is the display mode that should be fed to the next element in * the display chain, either the final &drm_connector or the next * &drm_bridge. The parameter adjusted_mode is the input mode the bridge * requires. It can be modified by this callback and does not need to * match mode. See also &drm_crtc_state.adjusted_mode for more details. * * This is the only hook that allows a bridge to reject a modeset. If * this function passes all other callbacks must succeed for this * configuration. * * The mode_fixup callback is optional. &drm_bridge_funcs.mode_fixup() * is not called when &drm_bridge_funcs.atomic_check() is implemented, * so only one of them should be provided. * * NOTE: * * This function is called in the check phase of atomic modesets, which * can be aborted for any reason (including on userspace's request to * just check whether a configuration would be possible). Drivers MUST * NOT touch any persistent state (hardware or software) or data * structures except the passed in @state parameter. * * Also beware that userspace can request its own custom modes, neither * core nor helpers filter modes to the list of probe modes reported by * the GETCONNECTOR IOCTL and stored in &drm_connector.modes. To ensure * that modes are filtered consistently put any bridge constraints and * limits checks into @mode_valid. * * RETURNS: * * True if an acceptable configuration is possible, false if the modeset * operation should be rejected. */ bool (*mode_fixup)(struct drm_bridge *bridge, const struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode); /** * @disable: * * This callback should disable the bridge. It is called right before * the preceding element in the display pipe is disabled. If the * preceding element is a bridge this means it's called before that * bridge's @disable vfunc. If the preceding element is a &drm_encoder * it's called right before the &drm_encoder_helper_funcs.disable, * &drm_encoder_helper_funcs.prepare or &drm_encoder_helper_funcs.dpms * hook. * * The bridge can assume that the display pipe (i.e. clocks and timing * signals) feeding it is still running when this callback is called. * * The @disable callback is optional. * * NOTE: * * This is deprecated, do not use! * New drivers shall use &drm_bridge_funcs.atomic_disable. */ void (*disable)(struct drm_bridge *bridge); /** * @post_disable: * * This callback should disable the bridge. It is called right after the * preceding element in the display pipe is disabled. If the preceding * element is a bridge this means it's called after that bridge's * @post_disable function. If the preceding element is a &drm_encoder * it's called right after the encoder's * &drm_encoder_helper_funcs.disable, &drm_encoder_helper_funcs.prepare * or &drm_encoder_helper_funcs.dpms hook. * * The bridge must assume that the display pipe (i.e. clocks and timing * signals) feeding it is no longer running when this callback is * called. * * The @post_disable callback is optional. * * NOTE: * * This is deprecated, do not use! * New drivers shall use &drm_bridge_funcs.atomic_post_disable. */ void (*post_disable)(struct drm_bridge *bridge); /** * @mode_set: * * This callback should set the given mode on the bridge. It is called * after the @mode_set callback for the preceding element in the display * pipeline has been called already. If the bridge is the first element * then this would be &drm_encoder_helper_funcs.mode_set. The display * pipe (i.e. clocks and timing signals) is off when this function is * called. * * The adjusted_mode parameter is the mode output by the CRTC for the * first bridge in the chain. It can be different from the mode * parameter that contains the desired mode for the connector at the end * of the bridges chain, for instance when the first bridge in the chain * performs scaling. The adjusted mode is mostly useful for the first * bridge in the chain and is likely irrelevant for the other bridges. * * For atomic drivers the adjusted_mode is the mode stored in * &drm_crtc_state.adjusted_mode. * * NOTE: * * This is deprecated, do not use! * New drivers shall set their mode in the * &drm_bridge_funcs.atomic_enable operation. */ void (*mode_set)(struct drm_bridge *bridge, const struct drm_display_mode *mode, const struct drm_display_mode *adjusted_mode); /** * @pre_enable: * * This callback should enable the bridge. It is called right before * the preceding element in the display pipe is enabled. If the * preceding element is a bridge this means it's called before that * bridge's @pre_enable function. If the preceding element is a * &drm_encoder it's called right before the encoder's * &drm_encoder_helper_funcs.enable, &drm_encoder_helper_funcs.commit or * &drm_encoder_helper_funcs.dpms hook. * * The display pipe (i.e. clocks and timing signals) feeding this bridge * will not yet be running when this callback is called. The bridge must * not enable the display link feeding the next bridge in the chain (if * there is one) when this callback is called. * * The @pre_enable callback is optional. * * NOTE: * * This is deprecated, do not use! * New drivers shall use &drm_bridge_funcs.atomic_pre_enable. */ void (*pre_enable)(struct drm_bridge *bridge); /** * @enable: * * This callback should enable the bridge. It is called right after * the preceding element in the display pipe is enabled. If the * preceding element is a bridge this means it's called after that * bridge's @enable function. If the preceding element is a * &drm_encoder it's called right after the encoder's * &drm_encoder_helper_funcs.enable, &drm_encoder_helper_funcs.commit or * &drm_encoder_helper_funcs.dpms hook. * * The bridge can assume that the display pipe (i.e. clocks and timing * signals) feeding it is running when this callback is called. This * callback must enable the display link feeding the next bridge in the * chain if there is one. * * The @enable callback is optional. * * NOTE: * * This is deprecated, do not use! * New drivers shall use &drm_bridge_funcs.atomic_enable. */ void (*enable)(struct drm_bridge *bridge); /** * @atomic_pre_enable: * * This callback should enable the bridge. It is called right before * the preceding element in the display pipe is enabled. If the * preceding element is a bridge this means it's called before that * bridge's @atomic_pre_enable or @pre_enable function. If the preceding * element is a &drm_encoder it's called right before the encoder's * &drm_encoder_helper_funcs.atomic_enable hook. * * The display pipe (i.e. clocks and timing signals) feeding this bridge * will not yet be running when this callback is called. The bridge must * not enable the display link feeding the next bridge in the chain (if * there is one) when this callback is called. * * The @atomic_pre_enable callback is optional. */ void (*atomic_pre_enable)(struct drm_bridge *bridge, struct drm_bridge_state *old_bridge_state); /** * @atomic_enable: * * This callback should enable the bridge. It is called right after * the preceding element in the display pipe is enabled. If the * preceding element is a bridge this means it's called after that * bridge's @atomic_enable or @enable function. If the preceding element * is a &drm_encoder it's called right after the encoder's * &drm_encoder_helper_funcs.atomic_enable hook. * * The bridge can assume that the display pipe (i.e. clocks and timing * signals) feeding it is running when this callback is called. This * callback must enable the display link feeding the next bridge in the * chain if there is one. * * The @atomic_enable callback is optional. */ void (*atomic_enable)(struct drm_bridge *bridge, struct drm_bridge_state *old_bridge_state); /** * @atomic_disable: * * This callback should disable the bridge. It is called right before * the preceding element in the display pipe is disabled. If the * preceding element is a bridge this means it's called before that * bridge's @atomic_disable or @disable vfunc. If the preceding element * is a &drm_encoder it's called right before the * &drm_encoder_helper_funcs.atomic_disable hook. * * The bridge can assume that the display pipe (i.e. clocks and timing * signals) feeding it is still running when this callback is called. * * The @atomic_disable callback is optional. */ void (*atomic_disable)(struct drm_bridge *bridge, struct drm_bridge_state *old_bridge_state); /** * @atomic_post_disable: * * This callback should disable the bridge. It is called right after the * preceding element in the display pipe is disabled. If the preceding * element is a bridge this means it's called after that bridge's * @atomic_post_disable or @post_disable function. If the preceding * element is a &drm_encoder it's called right after the encoder's * &drm_encoder_helper_funcs.atomic_disable hook. * * The bridge must assume that the display pipe (i.e. clocks and timing * signals) feeding it is no longer running when this callback is * called. * * The @atomic_post_disable callback is optional. */ void (*atomic_post_disable)(struct drm_bridge *bridge, struct drm_bridge_state *old_bridge_state); /** * @atomic_duplicate_state: * * Duplicate the current bridge state object (which is guaranteed to be * non-NULL). * * The atomic_duplicate_state hook is mandatory if the bridge * implements any of the atomic hooks, and should be left unassigned * otherwise. For bridges that don't subclass &drm_bridge_state, the * drm_atomic_helper_bridge_duplicate_state() helper function shall be * used to implement this hook. * * RETURNS: * A valid drm_bridge_state object or NULL if the allocation fails. */ struct drm_bridge_state *(*atomic_duplicate_state)(struct drm_bridge *bridge); /** * @atomic_destroy_state: * * Destroy a bridge state object previously allocated by * &drm_bridge_funcs.atomic_duplicate_state(). * * The atomic_destroy_state hook is mandatory if the bridge implements * any of the atomic hooks, and should be left unassigned otherwise. * For bridges that don't subclass &drm_bridge_state, the * drm_atomic_helper_bridge_destroy_state() helper function shall be * used to implement this hook. */ void (*atomic_destroy_state)(struct drm_bridge *bridge, struct drm_bridge_state *state); /** * @atomic_get_output_bus_fmts: * * Return the supported bus formats on the output end of a bridge. * The returned array must be allocated with kmalloc() and will be * freed by the caller. If the allocation fails, NULL should be * returned. num_output_fmts must be set to the returned array size. * Formats listed in the returned array should be listed in decreasing * preference order (the core will try all formats until it finds one * that works). * * This method is only called on the last element of the bridge chain * as part of the bus format negotiation process that happens in * &drm_atomic_bridge_chain_select_bus_fmts(). * This method is optional. When not implemented, the core will * fall back to &drm_connector.display_info.bus_formats[0] if * &drm_connector.display_info.num_bus_formats > 0, * or to MEDIA_BUS_FMT_FIXED otherwise. */ u32 *(*atomic_get_output_bus_fmts)(struct drm_bridge *bridge, struct drm_bridge_state *bridge_state, struct drm_crtc_state *crtc_state, struct drm_connector_state *conn_state, unsigned int *num_output_fmts); /** * @atomic_get_input_bus_fmts: * * Return the supported bus formats on the input end of a bridge for * a specific output bus format. * * The returned array must be allocated with kmalloc() and will be * freed by the caller. If the allocation fails, NULL should be * returned. num_input_fmts must be set to the returned array size. * Formats listed in the returned array should be listed in decreasing * preference order (the core will try all formats until it finds one * that works). When the format is not supported NULL should be * returned and num_input_fmts should be set to 0. * * This method is called on all elements of the bridge chain as part of * the bus format negotiation process that happens in * drm_atomic_bridge_chain_select_bus_fmts(). * This method is optional. When not implemented, the core will bypass * bus format negotiation on this element of the bridge without * failing, and the previous element in the chain will be passed * MEDIA_BUS_FMT_FIXED as its output bus format. * * Bridge drivers that need to support being linked to bridges that are * not supporting bus format negotiation should handle the * output_fmt == MEDIA_BUS_FMT_FIXED case appropriately, by selecting a * sensible default value or extracting this information from somewhere * else (FW property, &drm_display_mode, &drm_display_info, ...) * * Note: Even if input format selection on the first bridge has no * impact on the negotiation process (bus format negotiation stops once * we reach the first element of the chain), drivers are expected to * return accurate input formats as the input format may be used to * configure the CRTC output appropriately. */ u32 *(*atomic_get_input_bus_fmts)(struct drm_bridge *bridge, struct drm_bridge_state *bridge_state, struct drm_crtc_state *crtc_state, struct drm_connector_state *conn_state, u32 output_fmt, unsigned int *num_input_fmts); /** * @atomic_check: * * This method is responsible for checking bridge state correctness. * It can also check the state of the surrounding components in chain * to make sure the whole pipeline can work properly. * * &drm_bridge_funcs.atomic_check() hooks are called in reverse * order (from the last to the first bridge). * * This method is optional. &drm_bridge_funcs.mode_fixup() is not * called when &drm_bridge_funcs.atomic_check() is implemented, so only * one of them should be provided. * * If drivers need to tweak &drm_bridge_state.input_bus_cfg.flags or * &drm_bridge_state.output_bus_cfg.flags it should happen in * this function. By default the &drm_bridge_state.output_bus_cfg.flags * field is set to the next bridge * &drm_bridge_state.input_bus_cfg.flags value or * &drm_connector.display_info.bus_flags if the bridge is the last * element in the chain. * * RETURNS: * zero if the check passed, a negative error code otherwise. */ int (*atomic_check)(struct drm_bridge *bridge, struct drm_bridge_state *bridge_state, struct drm_crtc_state *crtc_state, struct drm_connector_state *conn_state); /** * @atomic_reset: * * Reset the bridge to a predefined state (or retrieve its current * state) and return a &drm_bridge_state object matching this state. * This function is called at attach time. * * The atomic_reset hook is mandatory if the bridge implements any of * the atomic hooks, and should be left unassigned otherwise. For * bridges that don't subclass &drm_bridge_state, the * drm_atomic_helper_bridge_reset() helper function shall be used to * implement this hook. * * Note that the atomic_reset() semantics is not exactly matching the * reset() semantics found on other components (connector, plane, ...). * * 1. The reset operation happens when the bridge is attached, not when * drm_mode_config_reset() is called * 2. It's meant to be used exclusively on bridges that have been * converted to the ATOMIC API * * RETURNS: * A valid drm_bridge_state object in case of success, an ERR_PTR() * giving the reason of the failure otherwise. */ struct drm_bridge_state *(*atomic_reset)(struct drm_bridge *bridge); /** * @detect: * * Check if anything is attached to the bridge output. * * This callback is optional, if not implemented the bridge will be * considered as always having a component attached to its output. * Bridges that implement this callback shall set the * DRM_BRIDGE_OP_DETECT flag in their &drm_bridge->ops. * * RETURNS: * * drm_connector_status indicating the bridge output status. */ enum drm_connector_status (*detect)(struct drm_bridge *bridge); /** * @get_modes: * * Fill all modes currently valid for the sink into the &drm_connector * with drm_mode_probed_add(). * * The @get_modes callback is mostly intended to support non-probeable * displays such as many fixed panels. Bridges that support reading * EDID shall leave @get_modes unimplemented and implement the * &drm_bridge_funcs->edid_read callback instead. * * This callback is optional. Bridges that implement it shall set the * DRM_BRIDGE_OP_MODES flag in their &drm_bridge->ops. * * The connector parameter shall be used for the sole purpose of * filling modes, and shall not be stored internally by bridge drivers * for future usage. * * RETURNS: * * The number of modes added by calling drm_mode_probed_add(). */ int (*get_modes)(struct drm_bridge *bridge, struct drm_connector *connector); /** * @edid_read: * * Read the EDID data of the connected display. * * The @edid_read callback is the preferred way of reporting mode * information for a display connected to the bridge output. Bridges * that support reading EDID shall implement this callback and leave * the @get_modes callback unimplemented. * * The caller of this operation shall first verify the output * connection status and refrain from reading EDID from a disconnected * output. * * This callback is optional. Bridges that implement it shall set the * DRM_BRIDGE_OP_EDID flag in their &drm_bridge->ops. * * The connector parameter shall be used for the sole purpose of EDID * retrieval, and shall not be stored internally by bridge drivers for * future usage. * * RETURNS: * * An edid structure newly allocated with drm_edid_alloc() or returned * from drm_edid_read() family of functions on success, or NULL * otherwise. The caller is responsible for freeing the returned edid * structure with drm_edid_free(). */ const struct drm_edid *(*edid_read)(struct drm_bridge *bridge, struct drm_connector *connector); /** * @hpd_notify: * * Notify the bridge of hot plug detection. * * This callback is optional, it may be implemented by bridges that * need to be notified of display connection or disconnection for * internal reasons. One use case is to reset the internal state of CEC * controllers for HDMI bridges. */ void (*hpd_notify)(struct drm_bridge *bridge, enum drm_connector_status status); /** * @hpd_enable: * * Enable hot plug detection. From now on the bridge shall call * drm_bridge_hpd_notify() each time a change is detected in the output * connection status, until hot plug detection gets disabled with * @hpd_disable. * * This callback is optional and shall only be implemented by bridges * that support hot-plug notification without polling. Bridges that * implement it shall also implement the @hpd_disable callback and set * the DRM_BRIDGE_OP_HPD flag in their &drm_bridge->ops. */ void (*hpd_enable)(struct drm_bridge *bridge); /** * @hpd_disable: * * Disable hot plug detection. Once this function returns the bridge * shall not call drm_bridge_hpd_notify() when a change in the output * connection status occurs. * * This callback is optional and shall only be implemented by bridges * that support hot-plug notification without polling. Bridges that * implement it shall also implement the @hpd_enable callback and set * the DRM_BRIDGE_OP_HPD flag in their &drm_bridge->ops. */ void (*hpd_disable)(struct drm_bridge *bridge); /** * @hdmi_tmds_char_rate_valid: * * Check whether a particular TMDS character rate is supported by the * driver. * * This callback is optional and should only be implemented by the * bridges that take part in the HDMI connector implementation. Bridges * that implement it shall set the DRM_BRIDGE_OP_HDMI flag in their * &drm_bridge->ops. * * Returns: * * Either &drm_mode_status.MODE_OK or one of the failure reasons * in &enum drm_mode_status. */ enum drm_mode_status (*hdmi_tmds_char_rate_valid)(const struct drm_bridge *bridge, const struct drm_display_mode *mode, unsigned long long tmds_rate); /** * @hdmi_clear_infoframe: * * This callback clears the infoframes in the hardware during commit. * It will be called multiple times, once for every disabled infoframe * type. * * This callback is optional but it must be implemented by bridges that * set the DRM_BRIDGE_OP_HDMI flag in their &drm_bridge->ops. */ int (*hdmi_clear_infoframe)(struct drm_bridge *bridge, enum hdmi_infoframe_type type); /** * @hdmi_write_infoframe: * * Program the infoframe into the hardware. It will be called multiple * times, once for every updated infoframe type. * * This callback is optional but it must be implemented by bridges that * set the DRM_BRIDGE_OP_HDMI flag in their &drm_bridge->ops. */ int (*hdmi_write_infoframe)(struct drm_bridge *bridge, enum hdmi_infoframe_type type, const u8 *buffer, size_t len); /** * @debugfs_init: * * Allows bridges to create bridge-specific debugfs files. */ void (*debugfs_init)(struct drm_bridge *bridge, struct dentry *root); }; /** * struct drm_bridge_timings - timing information for the bridge */ struct drm_bridge_timings { /** * @input_bus_flags: * * Tells what additional settings for the pixel data on the bus * this bridge requires (like pixel signal polarity). See also * &drm_display_info->bus_flags. */ u32 input_bus_flags; /** * @setup_time_ps: * * Defines the time in picoseconds the input data lines must be * stable before the clock edge. */ u32 setup_time_ps; /** * @hold_time_ps: * * Defines the time in picoseconds taken for the bridge to sample the * input signal after the clock edge. */ u32 hold_time_ps; /** * @dual_link: * * True if the bus operates in dual-link mode. The exact meaning is * dependent on the bus type. For LVDS buses, this indicates that even- * and odd-numbered pixels are received on separate links. */ bool dual_link; }; /** * enum drm_bridge_ops - Bitmask of operations supported by the bridge */ enum drm_bridge_ops { /** * @DRM_BRIDGE_OP_DETECT: The bridge can detect displays connected to * its output. Bridges that set this flag shall implement the * &drm_bridge_funcs->detect callback. */ DRM_BRIDGE_OP_DETECT = BIT(0), /** * @DRM_BRIDGE_OP_EDID: The bridge can retrieve the EDID of the display * connected to its output. Bridges that set this flag shall implement * the &drm_bridge_funcs->edid_read callback. */ DRM_BRIDGE_OP_EDID = BIT(1), /** * @DRM_BRIDGE_OP_HPD: The bridge can detect hot-plug and hot-unplug * without requiring polling. Bridges that set this flag shall * implement the &drm_bridge_funcs->hpd_enable and * &drm_bridge_funcs->hpd_disable callbacks if they support enabling * and disabling hot-plug detection dynamically. */ DRM_BRIDGE_OP_HPD = BIT(2), /** * @DRM_BRIDGE_OP_MODES: The bridge can retrieve the modes supported * by the display at its output. This does not include reading EDID * which is separately covered by @DRM_BRIDGE_OP_EDID. Bridges that set * this flag shall implement the &drm_bridge_funcs->get_modes callback. */ DRM_BRIDGE_OP_MODES = BIT(3), /** * @DRM_BRIDGE_OP_HDMI: The bridge provides HDMI connector operations, * including infoframes support. Bridges that set this flag must * implement the &drm_bridge_funcs->write_infoframe callback. * * Note: currently there can be at most one bridge in a chain that sets * this bit. This is to simplify corresponding glue code in connector * drivers. */ DRM_BRIDGE_OP_HDMI = BIT(4), }; /** * struct drm_bridge - central DRM bridge control structure */ struct drm_bridge { /** @base: inherit from &drm_private_object */ struct drm_private_obj base; /** @dev: DRM device this bridge belongs to */ struct drm_device *dev; /** @encoder: encoder to which this bridge is connected */ struct drm_encoder *encoder; /** @chain_node: used to form a bridge chain */ struct list_head chain_node; /** @of_node: device node pointer to the bridge */ struct device_node *of_node; /** @list: to keep track of all added bridges */ struct list_head list; /** * @timings: * * the timing specification for the bridge, if any (may be NULL) */ const struct drm_bridge_timings *timings; /** @funcs: control functions */ const struct drm_bridge_funcs *funcs; /** @driver_private: pointer to the bridge driver's internal context */ void *driver_private; /** @ops: bitmask of operations supported by the bridge */ enum drm_bridge_ops ops; /** * @type: Type of the connection at the bridge output * (DRM_MODE_CONNECTOR_*). For bridges at the end of this chain this * identifies the type of connected display. */ int type; /** * @interlace_allowed: Indicate that the bridge can handle interlaced * modes. */ bool interlace_allowed; /** * @pre_enable_prev_first: The bridge requires that the prev * bridge @pre_enable function is called before its @pre_enable, * and conversely for post_disable. This is most frequently a * requirement for DSI devices which need the host to be initialised * before the peripheral. */ bool pre_enable_prev_first; /** * @ddc: Associated I2C adapter for DDC access, if any. */ struct i2c_adapter *ddc; /** private: */ /** * @hpd_mutex: Protects the @hpd_cb and @hpd_data fields. */ struct mutex hpd_mutex; /** * @hpd_cb: Hot plug detection callback, registered with * drm_bridge_hpd_enable(). */ void (*hpd_cb)(void *data, enum drm_connector_status status); /** * @hpd_data: Private data passed to the Hot plug detection callback * @hpd_cb. */ void *hpd_data; /** * @vendor: Vendor of the product to be used for the SPD InfoFrame * generation. This is required if @DRM_BRIDGE_OP_HDMI is set. */ const char *vendor; /** * @product: Name of the product to be used for the SPD InfoFrame * generation. This is required if @DRM_BRIDGE_OP_HDMI is set. */ const char *product; /** * @supported_formats: Bitmask of @hdmi_colorspace listing supported * output formats. This is only relevant if @DRM_BRIDGE_OP_HDMI is set. */ unsigned int supported_formats; /** * @max_bpc: Maximum bits per char the HDMI bridge supports. Allowed * values are 8, 10 and 12. This is only relevant if * @DRM_BRIDGE_OP_HDMI is set. */ unsigned int max_bpc; }; static inline struct drm_bridge * drm_priv_to_bridge(struct drm_private_obj *priv) { return container_of(priv, struct drm_bridge, base); } void drm_bridge_add(struct drm_bridge *bridge); int devm_drm_bridge_add(struct device *dev, struct drm_bridge *bridge); void drm_bridge_remove(struct drm_bridge *bridge); int drm_bridge_attach(struct drm_encoder *encoder, struct drm_bridge *bridge, struct drm_bridge *previous, enum drm_bridge_attach_flags flags); #ifdef CONFIG_OF struct drm_bridge *of_drm_find_bridge(struct device_node *np); #else static inline struct drm_bridge *of_drm_find_bridge(struct device_node *np) { return NULL; } #endif /** * drm_bridge_get_next_bridge() - Get the next bridge in the chain * @bridge: bridge object * * RETURNS: * the next bridge in the chain after @bridge, or NULL if @bridge is the last. */ static inline struct drm_bridge * drm_bridge_get_next_bridge(struct drm_bridge *bridge) { if (list_is_last(&bridge->chain_node, &bridge->encoder->bridge_chain)) return NULL; return list_next_entry(bridge, chain_node); } /** * drm_bridge_get_prev_bridge() - Get the previous bridge in the chain * @bridge: bridge object * * RETURNS: * the previous bridge in the chain, or NULL if @bridge is the first. */ static inline struct drm_bridge * drm_bridge_get_prev_bridge(struct drm_bridge *bridge) { if (list_is_first(&bridge->chain_node, &bridge->encoder->bridge_chain)) return NULL; return list_prev_entry(bridge, chain_node); } /** * drm_bridge_chain_get_first_bridge() - Get the first bridge in the chain * @encoder: encoder object * * RETURNS: * the first bridge in the chain, or NULL if @encoder has no bridge attached * to it. */ static inline struct drm_bridge * drm_bridge_chain_get_first_bridge(struct drm_encoder *encoder) { return list_first_entry_or_null(&encoder->bridge_chain, struct drm_bridge, chain_node); } /** * drm_for_each_bridge_in_chain() - Iterate over all bridges present in a chain * @encoder: the encoder to iterate bridges on * @bridge: a bridge pointer updated to point to the current bridge at each * iteration * * Iterate over all bridges present in the bridge chain attached to @encoder. */ #define drm_for_each_bridge_in_chain(encoder, bridge) \ list_for_each_entry(bridge, &(encoder)->bridge_chain, chain_node) enum drm_mode_status drm_bridge_chain_mode_valid(struct drm_bridge *bridge, const struct drm_display_info *info, const struct drm_display_mode *mode); void drm_bridge_chain_mode_set(struct drm_bridge *bridge, const struct drm_display_mode *mode, const struct drm_display_mode *adjusted_mode); int drm_atomic_bridge_chain_check(struct drm_bridge *bridge, struct drm_crtc_state *crtc_state, struct drm_connector_state *conn_state); void drm_atomic_bridge_chain_disable(struct drm_bridge *bridge, struct drm_atomic_state *state); void drm_atomic_bridge_chain_post_disable(struct drm_bridge *bridge, struct drm_atomic_state *state); void drm_atomic_bridge_chain_pre_enable(struct drm_bridge *bridge, struct drm_atomic_state *state); void drm_atomic_bridge_chain_enable(struct drm_bridge *bridge, struct drm_atomic_state *state); u32 * drm_atomic_helper_bridge_propagate_bus_fmt(struct drm_bridge *bridge, struct drm_bridge_state *bridge_state, struct drm_crtc_state *crtc_state, struct drm_connector_state *conn_state, u32 output_fmt, unsigned int *num_input_fmts); enum drm_connector_status drm_bridge_detect(struct drm_bridge *bridge); int drm_bridge_get_modes(struct drm_bridge *bridge, struct drm_connector *connector); const struct drm_edid *drm_bridge_edid_read(struct drm_bridge *bridge, struct drm_connector *connector); void drm_bridge_hpd_enable(struct drm_bridge *bridge, void (*cb)(void *data, enum drm_connector_status status), void *data); void drm_bridge_hpd_disable(struct drm_bridge *bridge); void drm_bridge_hpd_notify(struct drm_bridge *bridge, enum drm_connector_status status); #ifdef CONFIG_DRM_PANEL_BRIDGE bool drm_bridge_is_panel(const struct drm_bridge *bridge); struct drm_bridge *drm_panel_bridge_add(struct drm_panel *panel); struct drm_bridge *drm_panel_bridge_add_typed(struct drm_panel *panel, u32 connector_type); void drm_panel_bridge_remove(struct drm_bridge *bridge); int drm_panel_bridge_set_orientation(struct drm_connector *connector, struct drm_bridge *bridge); struct drm_bridge *devm_drm_panel_bridge_add(struct device *dev, struct drm_panel *panel); struct drm_bridge *devm_drm_panel_bridge_add_typed(struct device *dev, struct drm_panel *panel, u32 connector_type); struct drm_bridge *drmm_panel_bridge_add(struct drm_device *drm, struct drm_panel *panel); struct drm_connector *drm_panel_bridge_connector(struct drm_bridge *bridge); #else static inline bool drm_bridge_is_panel(const struct drm_bridge *bridge) { return false; } static inline int drm_panel_bridge_set_orientation(struct drm_connector *connector, struct drm_bridge *bridge) { return -EINVAL; } #endif #if defined(CONFIG_OF) && defined(CONFIG_DRM_PANEL_BRIDGE) struct drm_bridge *devm_drm_of_get_bridge(struct device *dev, struct device_node *node, u32 port, u32 endpoint); struct drm_bridge *drmm_of_get_bridge(struct drm_device *drm, struct device_node *node, u32 port, u32 endpoint); #else static inline struct drm_bridge *devm_drm_of_get_bridge(struct device *dev, struct device_node *node, u32 port, u32 endpoint) { return ERR_PTR(-ENODEV); } static inline struct drm_bridge *drmm_of_get_bridge(struct drm_device *drm, struct device_node *node, u32 port, u32 endpoint) { return ERR_PTR(-ENODEV); } #endif #endif |
| 1350 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef MPLS_INTERNAL_H #define MPLS_INTERNAL_H #include <net/mpls.h> /* put a reasonable limit on the number of labels * we will accept from userspace */ #define MAX_NEW_LABELS 30 struct mpls_entry_decoded { u32 label; u8 ttl; u8 tc; u8 bos; }; struct mpls_pcpu_stats { struct mpls_link_stats stats; struct u64_stats_sync syncp; }; struct mpls_dev { int input_enabled; struct net_device *dev; struct mpls_pcpu_stats __percpu *stats; struct ctl_table_header *sysctl; struct rcu_head rcu; }; #if BITS_PER_LONG == 32 #define MPLS_INC_STATS_LEN(mdev, len, pkts_field, bytes_field) \ do { \ __typeof__(*(mdev)->stats) *ptr = \ raw_cpu_ptr((mdev)->stats); \ local_bh_disable(); \ u64_stats_update_begin(&ptr->syncp); \ ptr->stats.pkts_field++; \ ptr->stats.bytes_field += (len); \ u64_stats_update_end(&ptr->syncp); \ local_bh_enable(); \ } while (0) #define MPLS_INC_STATS(mdev, field) \ do { \ __typeof__(*(mdev)->stats) *ptr = \ raw_cpu_ptr((mdev)->stats); \ local_bh_disable(); \ u64_stats_update_begin(&ptr->syncp); \ ptr->stats.field++; \ u64_stats_update_end(&ptr->syncp); \ local_bh_enable(); \ } while (0) #else #define MPLS_INC_STATS_LEN(mdev, len, pkts_field, bytes_field) \ do { \ this_cpu_inc((mdev)->stats->stats.pkts_field); \ this_cpu_add((mdev)->stats->stats.bytes_field, (len)); \ } while (0) #define MPLS_INC_STATS(mdev, field) \ this_cpu_inc((mdev)->stats->stats.field) #endif struct sk_buff; #define LABEL_NOT_SPECIFIED (1 << 20) /* This maximum ha length copied from the definition of struct neighbour */ #define VIA_ALEN_ALIGN sizeof(unsigned long) #define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, VIA_ALEN_ALIGN)) enum mpls_payload_type { MPT_UNSPEC, /* IPv4 or IPv6 */ MPT_IPV4 = 4, MPT_IPV6 = 6, /* Other types not implemented: * - Pseudo-wire with or without control word (RFC4385) * - GAL (RFC5586) */ }; struct mpls_nh { /* next hop label forwarding entry */ struct net_device *nh_dev; /* nh_flags is accessed under RCU in the packet path; it is * modified handling netdev events with rtnl lock held */ unsigned int nh_flags; u8 nh_labels; u8 nh_via_alen; u8 nh_via_table; u8 nh_reserved1; u32 nh_label[]; }; /* offset of via from beginning of mpls_nh */ #define MPLS_NH_VIA_OFF(num_labels) \ ALIGN(sizeof(struct mpls_nh) + (num_labels) * sizeof(u32), \ VIA_ALEN_ALIGN) /* all nexthops within a route have the same size based on the * max number of labels and max via length across all nexthops */ #define MPLS_NH_SIZE(num_labels, max_via_alen) \ (MPLS_NH_VIA_OFF((num_labels)) + \ ALIGN((max_via_alen), VIA_ALEN_ALIGN)) enum mpls_ttl_propagation { MPLS_TTL_PROP_DEFAULT, MPLS_TTL_PROP_ENABLED, MPLS_TTL_PROP_DISABLED, }; /* The route, nexthops and vias are stored together in the same memory * block: * * +----------------------+ * | mpls_route | * +----------------------+ * | mpls_nh 0 | * +----------------------+ * | alignment padding | 4 bytes for odd number of labels * +----------------------+ * | via[rt_max_alen] 0 | * +----------------------+ * | alignment padding | via's aligned on sizeof(unsigned long) * +----------------------+ * | ... | * +----------------------+ * | mpls_nh n-1 | * +----------------------+ * | via[rt_max_alen] n-1 | * +----------------------+ */ struct mpls_route { /* next hop label forwarding entry */ struct rcu_head rt_rcu; u8 rt_protocol; u8 rt_payload_type; u8 rt_max_alen; u8 rt_ttl_propagate; u8 rt_nhn; /* rt_nhn_alive is accessed under RCU in the packet path; it * is modified handling netdev events with rtnl lock held */ u8 rt_nhn_alive; u8 rt_nh_size; u8 rt_via_offset; u8 rt_reserved1; struct mpls_nh rt_nh[]; }; #define for_nexthops(rt) { \ int nhsel; const struct mpls_nh *nh; \ for (nhsel = 0, nh = (rt)->rt_nh; \ nhsel < (rt)->rt_nhn; \ nh = (void *)nh + (rt)->rt_nh_size, nhsel++) #define change_nexthops(rt) { \ int nhsel; struct mpls_nh *nh; \ for (nhsel = 0, nh = (rt)->rt_nh; \ nhsel < (rt)->rt_nhn; \ nh = (void *)nh + (rt)->rt_nh_size, nhsel++) #define endfor_nexthops(rt) } static inline struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *hdr) { struct mpls_entry_decoded result; unsigned entry = be32_to_cpu(hdr->label_stack_entry); result.label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT; result.ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT; result.tc = (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT; result.bos = (entry & MPLS_LS_S_MASK) >> MPLS_LS_S_SHIFT; return result; } static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev) { return rcu_dereference_rtnl(dev->mpls_ptr); } int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels, const u32 label[]); int nla_get_labels(const struct nlattr *nla, u8 max_labels, u8 *labels, u32 label[], struct netlink_ext_ack *extack); bool mpls_output_possible(const struct net_device *dev); unsigned int mpls_dev_mtu(const struct net_device *dev); bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu); void mpls_stats_inc_outucastpkts(struct net_device *dev, const struct sk_buff *skb); #endif /* MPLS_INTERNAL_H */ |
| 389 23 365 389 387 365 23 23 390 390 389 2431 390 389 23 390 390 389 21 21 390 25 388 390 20 388 384 384 389 23 3 2429 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 | // SPDX-License-Identifier: GPL-2.0 /* * A fast, small, non-recursive O(n log n) sort for the Linux kernel * * This performs n*log2(n) + 0.37*n + o(n) comparisons on average, * and 1.5*n*log2(n) + O(n) in the (very contrived) worst case. * * Quicksort manages n*log2(n) - 1.26*n for random inputs (1.63*n * better) at the expense of stack usage and much larger code to avoid * quicksort's O(n^2) worst case. */ #include <linux/types.h> #include <linux/export.h> #include <linux/sort.h> /** * is_aligned - is this pointer & size okay for word-wide copying? * @base: pointer to data * @size: size of each element * @align: required alignment (typically 4 or 8) * * Returns true if elements can be copied using word loads and stores. * The size must be a multiple of the alignment, and the base address must * be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS. * * For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)" * to "if ((a | b) & mask)", so we do that by hand. */ __attribute_const__ __always_inline static bool is_aligned(const void *base, size_t size, unsigned char align) { unsigned char lsbits = (unsigned char)size; (void)base; #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS lsbits |= (unsigned char)(uintptr_t)base; #endif return (lsbits & (align - 1)) == 0; } /** * swap_words_32 - swap two elements in 32-bit chunks * @a: pointer to the first element to swap * @b: pointer to the second element to swap * @n: element size (must be a multiple of 4) * * Exchange the two objects in memory. This exploits base+index addressing, * which basically all CPUs have, to minimize loop overhead computations. * * For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the * bottom of the loop, even though the zero flag is still valid from the * subtract (since the intervening mov instructions don't alter the flags). * Gcc 8.1.0 doesn't have that problem. */ static void swap_words_32(void *a, void *b, size_t n) { do { u32 t = *(u32 *)(a + (n -= 4)); *(u32 *)(a + n) = *(u32 *)(b + n); *(u32 *)(b + n) = t; } while (n); } /** * swap_words_64 - swap two elements in 64-bit chunks * @a: pointer to the first element to swap * @b: pointer to the second element to swap * @n: element size (must be a multiple of 8) * * Exchange the two objects in memory. This exploits base+index * addressing, which basically all CPUs have, to minimize loop overhead * computations. * * We'd like to use 64-bit loads if possible. If they're not, emulating * one requires base+index+4 addressing which x86 has but most other * processors do not. If CONFIG_64BIT, we definitely have 64-bit loads, * but it's possible to have 64-bit loads without 64-bit pointers (e.g. * x32 ABI). Are there any cases the kernel needs to worry about? */ static void swap_words_64(void *a, void *b, size_t n) { do { #ifdef CONFIG_64BIT u64 t = *(u64 *)(a + (n -= 8)); *(u64 *)(a + n) = *(u64 *)(b + n); *(u64 *)(b + n) = t; #else /* Use two 32-bit transfers to avoid base+index+4 addressing */ u32 t = *(u32 *)(a + (n -= 4)); *(u32 *)(a + n) = *(u32 *)(b + n); *(u32 *)(b + n) = t; t = *(u32 *)(a + (n -= 4)); *(u32 *)(a + n) = *(u32 *)(b + n); *(u32 *)(b + n) = t; #endif } while (n); } /** * swap_bytes - swap two elements a byte at a time * @a: pointer to the first element to swap * @b: pointer to the second element to swap * @n: element size * * This is the fallback if alignment doesn't allow using larger chunks. */ static void swap_bytes(void *a, void *b, size_t n) { do { char t = ((char *)a)[--n]; ((char *)a)[n] = ((char *)b)[n]; ((char *)b)[n] = t; } while (n); } /* * The values are arbitrary as long as they can't be confused with * a pointer, but small integers make for the smallest compare * instructions. */ #define SWAP_WORDS_64 (swap_r_func_t)0 #define SWAP_WORDS_32 (swap_r_func_t)1 #define SWAP_BYTES (swap_r_func_t)2 #define SWAP_WRAPPER (swap_r_func_t)3 struct wrapper { cmp_func_t cmp; swap_func_t swap; }; /* * The function pointer is last to make tail calls most efficient if the * compiler decides not to inline this function. */ static void do_swap(void *a, void *b, size_t size, swap_r_func_t swap_func, const void *priv) { if (swap_func == SWAP_WRAPPER) { ((const struct wrapper *)priv)->swap(a, b, (int)size); return; } if (swap_func == SWAP_WORDS_64) swap_words_64(a, b, size); else if (swap_func == SWAP_WORDS_32) swap_words_32(a, b, size); else if (swap_func == SWAP_BYTES) swap_bytes(a, b, size); else swap_func(a, b, (int)size, priv); } #define _CMP_WRAPPER ((cmp_r_func_t)0L) static int do_cmp(const void *a, const void *b, cmp_r_func_t cmp, const void *priv) { if (cmp == _CMP_WRAPPER) return ((const struct wrapper *)priv)->cmp(a, b); return cmp(a, b, priv); } /** * parent - given the offset of the child, find the offset of the parent. * @i: the offset of the heap element whose parent is sought. Non-zero. * @lsbit: a precomputed 1-bit mask, equal to "size & -size" * @size: size of each element * * In terms of array indexes, the parent of element j = @i/@size is simply * (j-1)/2. But when working in byte offsets, we can't use implicit * truncation of integer divides. * * Fortunately, we only need one bit of the quotient, not the full divide. * @size has a least significant bit. That bit will be clear if @i is * an even multiple of @size, and set if it's an odd multiple. * * Logically, we're doing "if (i & lsbit) i -= size;", but since the * branch is unpredictable, it's done with a bit of clever branch-free * code instead. */ __attribute_const__ __always_inline static size_t parent(size_t i, unsigned int lsbit, size_t size) { i -= size; i -= size & -(i & lsbit); return i / 2; } /** * sort_r - sort an array of elements * @base: pointer to data to sort * @num: number of elements * @size: size of each element * @cmp_func: pointer to comparison function * @swap_func: pointer to swap function or NULL * @priv: third argument passed to comparison function * * This function does a heapsort on the given array. You may provide * a swap_func function if you need to do something more than a memory * copy (e.g. fix up pointers or auxiliary data), but the built-in swap * avoids a slow retpoline and so is significantly faster. * * Sorting time is O(n log n) both on average and worst-case. While * quicksort is slightly faster on average, it suffers from exploitable * O(n*n) worst-case behavior and extra memory requirements that make * it less suitable for kernel use. */ void sort_r(void *base, size_t num, size_t size, cmp_r_func_t cmp_func, swap_r_func_t swap_func, const void *priv) { /* pre-scale counters for performance */ size_t n = num * size, a = (num/2) * size; const unsigned int lsbit = size & -size; /* Used to find parent */ size_t shift = 0; if (!a) /* num < 2 || size == 0 */ return; /* called from 'sort' without swap function, let's pick the default */ if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap) swap_func = NULL; if (!swap_func) { if (is_aligned(base, size, 8)) swap_func = SWAP_WORDS_64; else if (is_aligned(base, size, 4)) swap_func = SWAP_WORDS_32; else swap_func = SWAP_BYTES; } /* * Loop invariants: * 1. elements [a,n) satisfy the heap property (compare greater than * all of their children), * 2. elements [n,num*size) are sorted, and * 3. a <= b <= c <= d <= n (whenever they are valid). */ for (;;) { size_t b, c, d; if (a) /* Building heap: sift down a */ a -= size << shift; else if (n > 3 * size) { /* Sorting: Extract two largest elements */ n -= size; do_swap(base, base + n, size, swap_func, priv); shift = do_cmp(base + size, base + 2 * size, cmp_func, priv) <= 0; a = size << shift; n -= size; do_swap(base + a, base + n, size, swap_func, priv); } else { /* Sort complete */ break; } /* * Sift element at "a" down into heap. This is the * "bottom-up" variant, which significantly reduces * calls to cmp_func(): we find the sift-down path all * the way to the leaves (one compare per level), then * backtrack to find where to insert the target element. * * Because elements tend to sift down close to the leaves, * this uses fewer compares than doing two per level * on the way down. (A bit more than half as many on * average, 3/4 worst-case.) */ for (b = a; c = 2*b + size, (d = c + size) < n;) b = do_cmp(base + c, base + d, cmp_func, priv) > 0 ? c : d; if (d == n) /* Special case last leaf with no sibling */ b = c; /* Now backtrack from "b" to the correct location for "a" */ while (b != a && do_cmp(base + a, base + b, cmp_func, priv) >= 0) b = parent(b, lsbit, size); c = b; /* Where "a" belongs */ while (b != a) { /* Shift it into place */ b = parent(b, lsbit, size); do_swap(base + b, base + c, size, swap_func, priv); } } n -= size; do_swap(base, base + n, size, swap_func, priv); if (n == size * 2 && do_cmp(base, base + size, cmp_func, priv) > 0) do_swap(base, base + size, size, swap_func, priv); } EXPORT_SYMBOL(sort_r); void sort(void *base, size_t num, size_t size, cmp_func_t cmp_func, swap_func_t swap_func) { struct wrapper w = { .cmp = cmp_func, .swap = swap_func, }; return sort_r(base, num, size, _CMP_WRAPPER, SWAP_WRAPPER, &w); } EXPORT_SYMBOL(sort); |
| 698 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 | // SPDX-License-Identifier: GPL-2.0-or-later /* Paravirtualization interfaces Copyright (C) 2006 Rusty Russell IBM Corporation 2007 - x86_64 support added by Glauber de Oliveira Costa, Red Hat Inc */ #include <linux/errno.h> #include <linux/init.h> #include <linux/export.h> #include <linux/efi.h> #include <linux/bcd.h> #include <linux/highmem.h> #include <linux/kprobes.h> #include <linux/pgtable.h> #include <linux/static_call.h> #include <asm/bug.h> #include <asm/paravirt.h> #include <asm/debugreg.h> #include <asm/desc.h> #include <asm/setup.h> #include <asm/time.h> #include <asm/pgalloc.h> #include <asm/irq.h> #include <asm/delay.h> #include <asm/fixmap.h> #include <asm/apic.h> #include <asm/tlbflush.h> #include <asm/timer.h> #include <asm/special_insns.h> #include <asm/tlb.h> #include <asm/io_bitmap.h> #include <asm/gsseg.h> /* stub always returning 0. */ DEFINE_ASM_FUNC(paravirt_ret0, "xor %eax,%eax", .entry.text); void __init default_banner(void) { printk(KERN_INFO "Booting paravirtualized kernel on %s\n", pv_info.name); } #ifdef CONFIG_PARAVIRT_XXL DEFINE_ASM_FUNC(_paravirt_ident_64, "mov %rdi, %rax", .text); DEFINE_ASM_FUNC(pv_native_save_fl, "pushf; pop %rax", .noinstr.text); DEFINE_ASM_FUNC(pv_native_irq_disable, "cli", .noinstr.text); DEFINE_ASM_FUNC(pv_native_irq_enable, "sti", .noinstr.text); DEFINE_ASM_FUNC(pv_native_read_cr2, "mov %cr2, %rax", .noinstr.text); #endif DEFINE_STATIC_KEY_FALSE(virt_spin_lock_key); void __init native_pv_lock_init(void) { if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) static_branch_enable(&virt_spin_lock_key); } static void native_tlb_remove_table(struct mmu_gather *tlb, void *table) { tlb_remove_page(tlb, table); } struct static_key paravirt_steal_enabled; struct static_key paravirt_steal_rq_enabled; static u64 native_steal_clock(int cpu) { return 0; } DEFINE_STATIC_CALL(pv_steal_clock, native_steal_clock); DEFINE_STATIC_CALL(pv_sched_clock, native_sched_clock); void paravirt_set_sched_clock(u64 (*func)(void)) { static_call_update(pv_sched_clock, func); } /* These are in entry.S */ static struct resource reserve_ioports = { .start = 0, .end = IO_SPACE_LIMIT, .name = "paravirt-ioport", .flags = IORESOURCE_IO | IORESOURCE_BUSY, }; /* * Reserve the whole legacy IO space to prevent any legacy drivers * from wasting time probing for their hardware. This is a fairly * brute-force approach to disabling all non-virtual drivers. * * Note that this must be called very early to have any effect. */ int paravirt_disable_iospace(void) { return request_resource(&ioport_resource, &reserve_ioports); } #ifdef CONFIG_PARAVIRT_XXL static noinstr void pv_native_write_cr2(unsigned long val) { native_write_cr2(val); } static noinstr unsigned long pv_native_get_debugreg(int regno) { return native_get_debugreg(regno); } static noinstr void pv_native_set_debugreg(int regno, unsigned long val) { native_set_debugreg(regno, val); } noinstr void pv_native_wbinvd(void) { native_wbinvd(); } static noinstr void pv_native_safe_halt(void) { native_safe_halt(); } #endif struct pv_info pv_info = { .name = "bare hardware", #ifdef CONFIG_PARAVIRT_XXL .extra_user_64bit_cs = __USER_CS, #endif }; /* 64-bit pagetable entries */ #define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64) struct paravirt_patch_template pv_ops = { /* Cpu ops. */ .cpu.io_delay = native_io_delay, #ifdef CONFIG_PARAVIRT_XXL .cpu.cpuid = native_cpuid, .cpu.get_debugreg = pv_native_get_debugreg, .cpu.set_debugreg = pv_native_set_debugreg, .cpu.read_cr0 = native_read_cr0, .cpu.write_cr0 = native_write_cr0, .cpu.write_cr4 = native_write_cr4, .cpu.wbinvd = pv_native_wbinvd, .cpu.read_msr = native_read_msr, .cpu.write_msr = native_write_msr, .cpu.read_msr_safe = native_read_msr_safe, .cpu.write_msr_safe = native_write_msr_safe, .cpu.read_pmc = native_read_pmc, .cpu.load_tr_desc = native_load_tr_desc, .cpu.set_ldt = native_set_ldt, .cpu.load_gdt = native_load_gdt, .cpu.load_idt = native_load_idt, .cpu.store_tr = native_store_tr, .cpu.load_tls = native_load_tls, .cpu.load_gs_index = native_load_gs_index, .cpu.write_ldt_entry = native_write_ldt_entry, .cpu.write_gdt_entry = native_write_gdt_entry, .cpu.write_idt_entry = native_write_idt_entry, .cpu.alloc_ldt = paravirt_nop, .cpu.free_ldt = paravirt_nop, .cpu.load_sp0 = native_load_sp0, #ifdef CONFIG_X86_IOPL_IOPERM .cpu.invalidate_io_bitmap = native_tss_invalidate_io_bitmap, .cpu.update_io_bitmap = native_tss_update_io_bitmap, #endif .cpu.start_context_switch = paravirt_nop, .cpu.end_context_switch = paravirt_nop, /* Irq ops. */ .irq.save_fl = __PV_IS_CALLEE_SAVE(pv_native_save_fl), .irq.irq_disable = __PV_IS_CALLEE_SAVE(pv_native_irq_disable), .irq.irq_enable = __PV_IS_CALLEE_SAVE(pv_native_irq_enable), .irq.safe_halt = pv_native_safe_halt, .irq.halt = native_halt, #endif /* CONFIG_PARAVIRT_XXL */ /* Mmu ops. */ .mmu.flush_tlb_user = native_flush_tlb_local, .mmu.flush_tlb_kernel = native_flush_tlb_global, .mmu.flush_tlb_one_user = native_flush_tlb_one_user, .mmu.flush_tlb_multi = native_flush_tlb_multi, .mmu.tlb_remove_table = native_tlb_remove_table, .mmu.exit_mmap = paravirt_nop, .mmu.notify_page_enc_status_changed = paravirt_nop, #ifdef CONFIG_PARAVIRT_XXL .mmu.read_cr2 = __PV_IS_CALLEE_SAVE(pv_native_read_cr2), .mmu.write_cr2 = pv_native_write_cr2, .mmu.read_cr3 = __native_read_cr3, .mmu.write_cr3 = native_write_cr3, .mmu.pgd_alloc = __paravirt_pgd_alloc, .mmu.pgd_free = paravirt_nop, .mmu.alloc_pte = paravirt_nop, .mmu.alloc_pmd = paravirt_nop, .mmu.alloc_pud = paravirt_nop, .mmu.alloc_p4d = paravirt_nop, .mmu.release_pte = paravirt_nop, .mmu.release_pmd = paravirt_nop, .mmu.release_pud = paravirt_nop, .mmu.release_p4d = paravirt_nop, .mmu.set_pte = native_set_pte, .mmu.set_pmd = native_set_pmd, .mmu.ptep_modify_prot_start = __ptep_modify_prot_start, .mmu.ptep_modify_prot_commit = __ptep_modify_prot_commit, .mmu.set_pud = native_set_pud, .mmu.pmd_val = PTE_IDENT, .mmu.make_pmd = PTE_IDENT, .mmu.pud_val = PTE_IDENT, .mmu.make_pud = PTE_IDENT, .mmu.set_p4d = native_set_p4d, #if CONFIG_PGTABLE_LEVELS >= 5 .mmu.p4d_val = PTE_IDENT, .mmu.make_p4d = PTE_IDENT, .mmu.set_pgd = native_set_pgd, #endif /* CONFIG_PGTABLE_LEVELS >= 5 */ .mmu.pte_val = PTE_IDENT, .mmu.pgd_val = PTE_IDENT, .mmu.make_pte = PTE_IDENT, .mmu.make_pgd = PTE_IDENT, .mmu.enter_mmap = paravirt_nop, .mmu.lazy_mode = { .enter = paravirt_nop, .leave = paravirt_nop, .flush = paravirt_nop, }, .mmu.set_fixmap = native_set_fixmap, #endif /* CONFIG_PARAVIRT_XXL */ #if defined(CONFIG_PARAVIRT_SPINLOCKS) /* Lock ops. */ #ifdef CONFIG_SMP .lock.queued_spin_lock_slowpath = native_queued_spin_lock_slowpath, .lock.queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock), .lock.wait = paravirt_nop, .lock.kick = paravirt_nop, .lock.vcpu_is_preempted = PV_CALLEE_SAVE(__native_vcpu_is_preempted), #endif /* SMP */ #endif }; #ifdef CONFIG_PARAVIRT_XXL NOKPROBE_SYMBOL(native_load_idt); #endif EXPORT_SYMBOL(pv_ops); EXPORT_SYMBOL_GPL(pv_info); |
| 291 13 319 3 340 340 340 340 339 369 303 8 343 347 346 8 74 189 189 5 190 5 145 453 500 330 341 75 101 90 63 50 69 69 69 57 141 69 63 146 58 2 1 5 115 113 1 344 191 350 348 186 107 171 89 175 126 126 176 342 9 154 46 4 4 4 310 312 5 5 312 6 2 291 520 5 518 340 350 344 347 343 104 105 338 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 | /* SPDX-License-Identifier: GPL-2.0-only */ #ifndef __KVM_HOST_H #define __KVM_HOST_H #include <linux/types.h> #include <linux/hardirq.h> #include <linux/list.h> #include <linux/mutex.h> #include <linux/spinlock.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/sched/stat.h> #include <linux/bug.h> #include <linux/minmax.h> #include <linux/mm.h> #include <linux/mmu_notifier.h> #include <linux/preempt.h> #include <linux/msi.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/rcupdate.h> #include <linux/ratelimit.h> #include <linux/err.h> #include <linux/irqflags.h> #include <linux/context_tracking.h> #include <linux/irqbypass.h> #include <linux/rcuwait.h> #include <linux/refcount.h> #include <linux/nospec.h> #include <linux/notifier.h> #include <linux/ftrace.h> #include <linux/hashtable.h> #include <linux/instrumentation.h> #include <linux/interval_tree.h> #include <linux/rbtree.h> #include <linux/xarray.h> #include <asm/signal.h> #include <linux/kvm.h> #include <linux/kvm_para.h> #include <linux/kvm_types.h> #include <asm/kvm_host.h> #include <linux/kvm_dirty_ring.h> #ifndef KVM_MAX_VCPU_IDS #define KVM_MAX_VCPU_IDS KVM_MAX_VCPUS #endif /* * The bit 16 ~ bit 31 of kvm_userspace_memory_region::flags are internally * used in kvm, other bits are visible for userspace which are defined in * include/linux/kvm_h. */ #define KVM_MEMSLOT_INVALID (1UL << 16) /* * Bit 63 of the memslot generation number is an "update in-progress flag", * e.g. is temporarily set for the duration of kvm_swap_active_memslots(). * This flag effectively creates a unique generation number that is used to * mark cached memslot data, e.g. MMIO accesses, as potentially being stale, * i.e. may (or may not) have come from the previous memslots generation. * * This is necessary because the actual memslots update is not atomic with * respect to the generation number update. Updating the generation number * first would allow a vCPU to cache a spte from the old memslots using the * new generation number, and updating the generation number after switching * to the new memslots would allow cache hits using the old generation number * to reference the defunct memslots. * * This mechanism is used to prevent getting hits in KVM's caches while a * memslot update is in-progress, and to prevent cache hits *after* updating * the actual generation number against accesses that were inserted into the * cache *before* the memslots were updated. */ #define KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS BIT_ULL(63) /* Two fragments for cross MMIO pages. */ #define KVM_MAX_MMIO_FRAGMENTS 2 #ifndef KVM_MAX_NR_ADDRESS_SPACES #define KVM_MAX_NR_ADDRESS_SPACES 1 #endif /* * For the normal pfn, the highest 12 bits should be zero, * so we can mask bit 62 ~ bit 52 to indicate the error pfn, * mask bit 63 to indicate the noslot pfn. */ #define KVM_PFN_ERR_MASK (0x7ffULL << 52) #define KVM_PFN_ERR_NOSLOT_MASK (0xfffULL << 52) #define KVM_PFN_NOSLOT (0x1ULL << 63) #define KVM_PFN_ERR_FAULT (KVM_PFN_ERR_MASK) #define KVM_PFN_ERR_HWPOISON (KVM_PFN_ERR_MASK + 1) #define KVM_PFN_ERR_RO_FAULT (KVM_PFN_ERR_MASK + 2) #define KVM_PFN_ERR_SIGPENDING (KVM_PFN_ERR_MASK + 3) /* * error pfns indicate that the gfn is in slot but faild to * translate it to pfn on host. */ static inline bool is_error_pfn(kvm_pfn_t pfn) { return !!(pfn & KVM_PFN_ERR_MASK); } /* * KVM_PFN_ERR_SIGPENDING indicates that fetching the PFN was interrupted * by a pending signal. Note, the signal may or may not be fatal. */ static inline bool is_sigpending_pfn(kvm_pfn_t pfn) { return pfn == KVM_PFN_ERR_SIGPENDING; } /* * error_noslot pfns indicate that the gfn can not be * translated to pfn - it is not in slot or failed to * translate it to pfn. */ static inline bool is_error_noslot_pfn(kvm_pfn_t pfn) { return !!(pfn & KVM_PFN_ERR_NOSLOT_MASK); } /* noslot pfn indicates that the gfn is not in slot. */ static inline bool is_noslot_pfn(kvm_pfn_t pfn) { return pfn == KVM_PFN_NOSLOT; } /* * architectures with KVM_HVA_ERR_BAD other than PAGE_OFFSET (e.g. s390) * provide own defines and kvm_is_error_hva */ #ifndef KVM_HVA_ERR_BAD #define KVM_HVA_ERR_BAD (PAGE_OFFSET) #define KVM_HVA_ERR_RO_BAD (PAGE_OFFSET + PAGE_SIZE) static inline bool kvm_is_error_hva(unsigned long addr) { return addr >= PAGE_OFFSET; } #endif static inline bool kvm_is_error_gpa(gpa_t gpa) { return gpa == INVALID_GPA; } #define KVM_ERR_PTR_BAD_PAGE (ERR_PTR(-ENOENT)) static inline bool is_error_page(struct page *page) { return IS_ERR(page); } #define KVM_REQUEST_MASK GENMASK(7,0) #define KVM_REQUEST_NO_WAKEUP BIT(8) #define KVM_REQUEST_WAIT BIT(9) #define KVM_REQUEST_NO_ACTION BIT(10) /* * Architecture-independent vcpu->requests bit members * Bits 3-7 are reserved for more arch-independent bits. */ #define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_UNBLOCK 2 #define KVM_REQ_DIRTY_RING_SOFT_FULL 3 #define KVM_REQUEST_ARCH_BASE 8 /* * KVM_REQ_OUTSIDE_GUEST_MODE exists is purely as way to force the vCPU to * OUTSIDE_GUEST_MODE. KVM_REQ_OUTSIDE_GUEST_MODE differs from a vCPU "kick" * in that it ensures the vCPU has reached OUTSIDE_GUEST_MODE before continuing * on. A kick only guarantees that the vCPU is on its way out, e.g. a previous * kick may have set vcpu->mode to EXITING_GUEST_MODE, and so there's no * guarantee the vCPU received an IPI and has actually exited guest mode. */ #define KVM_REQ_OUTSIDE_GUEST_MODE (KVM_REQUEST_NO_ACTION | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \ BUILD_BUG_ON((unsigned)(nr) >= (sizeof_field(struct kvm_vcpu, requests) * 8) - KVM_REQUEST_ARCH_BASE); \ (unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \ }) #define KVM_ARCH_REQ(nr) KVM_ARCH_REQ_FLAGS(nr, 0) bool kvm_make_vcpus_request_mask(struct kvm *kvm, unsigned int req, unsigned long *vcpu_bitmap); bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 extern struct mutex kvm_lock; extern struct list_head vm_list; struct kvm_io_range { gpa_t addr; int len; struct kvm_io_device *dev; }; #define NR_IOBUS_DEVS 1000 struct kvm_io_bus { int dev_count; int ioeventfd_count; struct kvm_io_range range[]; }; enum kvm_bus { KVM_MMIO_BUS, KVM_PIO_BUS, KVM_VIRTIO_CCW_NOTIFY_BUS, KVM_FAST_MMIO_BUS, KVM_NR_BUSES }; int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, const void *val); int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, const void *val, long cookie); int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, void *val); int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, struct kvm_io_device *dev); int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, struct kvm_io_device *dev); struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr); #ifdef CONFIG_KVM_ASYNC_PF struct kvm_async_pf { struct work_struct work; struct list_head link; struct list_head queue; struct kvm_vcpu *vcpu; gpa_t cr2_or_gpa; unsigned long addr; struct kvm_arch_async_pf arch; bool wakeup_all; bool notpresent_injected; }; void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu); void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu); bool kvm_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, unsigned long hva, struct kvm_arch_async_pf *arch); int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif #ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER union kvm_mmu_notifier_arg { unsigned long attributes; }; struct kvm_gfn_range { struct kvm_memory_slot *slot; gfn_t start; gfn_t end; union kvm_mmu_notifier_arg arg; bool may_block; }; bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range); #endif enum { OUTSIDE_GUEST_MODE, IN_GUEST_MODE, EXITING_GUEST_MODE, READING_SHADOW_PAGE_TABLES, }; #define KVM_UNMAPPED_PAGE ((void *) 0x500 + POISON_POINTER_DELTA) struct kvm_host_map { /* * Only valid if the 'pfn' is managed by the host kernel (i.e. There is * a 'struct page' for it. When using mem= kernel parameter some memory * can be used as guest memory but they are not managed by host * kernel). * If 'pfn' is not managed by the host kernel, this field is * initialized to KVM_UNMAPPED_PAGE. */ struct page *page; void *hva; kvm_pfn_t pfn; kvm_pfn_t gfn; }; /* * Used to check if the mapping is valid or not. Never use 'kvm_host_map' * directly to check for that. */ static inline bool kvm_vcpu_mapped(struct kvm_host_map *map) { return !!map->hva; } static inline bool kvm_vcpu_can_poll(ktime_t cur, ktime_t stop) { return single_task_running() && !need_resched() && ktime_before(cur, stop); } /* * Sometimes a large or cross-page mmio needs to be broken up into separate * exits for userspace servicing. */ struct kvm_mmio_fragment { gpa_t gpa; void *data; unsigned len; }; struct kvm_vcpu { struct kvm *kvm; #ifdef CONFIG_PREEMPT_NOTIFIERS struct preempt_notifier preempt_notifier; #endif int cpu; int vcpu_id; /* id given by userspace at creation */ int vcpu_idx; /* index into kvm->vcpu_array */ int ____srcu_idx; /* Don't use this directly. You've been warned. */ #ifdef CONFIG_PROVE_RCU int srcu_depth; #endif int mode; u64 requests; unsigned long guest_debug; struct mutex mutex; struct kvm_run *run; #ifndef __KVM_HAVE_ARCH_WQP struct rcuwait wait; #endif struct pid __rcu *pid; int sigset_active; sigset_t sigset; unsigned int halt_poll_ns; bool valid_wakeup; #ifdef CONFIG_HAS_IOMEM int mmio_needed; int mmio_read_completed; int mmio_is_write; int mmio_cur_fragment; int mmio_nr_fragments; struct kvm_mmio_fragment mmio_fragments[KVM_MAX_MMIO_FRAGMENTS]; #endif #ifdef CONFIG_KVM_ASYNC_PF struct { u32 queued; struct list_head queue; struct list_head done; spinlock_t lock; } async_pf; #endif #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT /* * Cpu relax intercept or pause loop exit optimization * in_spin_loop: set when a vcpu does a pause loop exit * or cpu relax intercepted. * dy_eligible: indicates whether vcpu is eligible for directed yield. */ struct { bool in_spin_loop; bool dy_eligible; } spin_loop; #endif bool wants_to_run; bool preempted; bool ready; bool scheduled_out; struct kvm_vcpu_arch arch; struct kvm_vcpu_stat stat; char stats_id[KVM_STATS_NAME_SIZE]; struct kvm_dirty_ring dirty_ring; /* * The most recently used memslot by this vCPU and the slots generation * for which it is valid. * No wraparound protection is needed since generations won't overflow in * thousands of years, even assuming 1M memslot operations per second. */ struct kvm_memory_slot *last_used_slot; u64 last_used_slot_gen; }; /* * Start accounting time towards a guest. * Must be called before entering guest context. */ static __always_inline void guest_timing_enter_irqoff(void) { /* * This is running in ioctl context so its safe to assume that it's the * stime pending cputime to flush. */ instrumentation_begin(); vtime_account_guest_enter(); instrumentation_end(); } /* * Enter guest context and enter an RCU extended quiescent state. * * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is * unsafe to use any code which may directly or indirectly use RCU, tracing * (including IRQ flag tracing), or lockdep. All code in this period must be * non-instrumentable. */ static __always_inline void guest_context_enter_irqoff(void) { /* * KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode * is very similar to exiting to userspace from rcu point of view. In * addition CPU may stay in a guest mode for quite a long time (up to * one time slice). Lets treat guest mode as quiescent state, just like * we do with user-mode execution. */ if (!context_tracking_guest_enter()) { instrumentation_begin(); rcu_virt_note_context_switch(); instrumentation_end(); } } /* * Deprecated. Architectures should move to guest_timing_enter_irqoff() and * guest_state_enter_irqoff(). */ static __always_inline void guest_enter_irqoff(void) { guest_timing_enter_irqoff(); guest_context_enter_irqoff(); } /** * guest_state_enter_irqoff - Fixup state when entering a guest * * Entry to a guest will enable interrupts, but the kernel state is interrupts * disabled when this is invoked. Also tell RCU about it. * * 1) Trace interrupts on state * 2) Invoke context tracking if enabled to adjust RCU state * 3) Tell lockdep that interrupts are enabled * * Invoked from architecture specific code before entering a guest. * Must be called with interrupts disabled and the caller must be * non-instrumentable. * The caller has to invoke guest_timing_enter_irqoff() before this. * * Note: this is analogous to exit_to_user_mode(). */ static __always_inline void guest_state_enter_irqoff(void) { instrumentation_begin(); trace_hardirqs_on_prepare(); lockdep_hardirqs_on_prepare(); instrumentation_end(); guest_context_enter_irqoff(); lockdep_hardirqs_on(CALLER_ADDR0); } /* * Exit guest context and exit an RCU extended quiescent state. * * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is * unsafe to use any code which may directly or indirectly use RCU, tracing * (including IRQ flag tracing), or lockdep. All code in this period must be * non-instrumentable. */ static __always_inline void guest_context_exit_irqoff(void) { context_tracking_guest_exit(); } /* * Stop accounting time towards a guest. * Must be called after exiting guest context. */ static __always_inline void guest_timing_exit_irqoff(void) { instrumentation_begin(); /* Flush the guest cputime we spent on the guest */ vtime_account_guest_exit(); instrumentation_end(); } /* * Deprecated. Architectures should move to guest_state_exit_irqoff() and * guest_timing_exit_irqoff(). */ static __always_inline void guest_exit_irqoff(void) { guest_context_exit_irqoff(); guest_timing_exit_irqoff(); } static inline void guest_exit(void) { unsigned long flags; local_irq_save(flags); guest_exit_irqoff(); local_irq_restore(flags); } /** * guest_state_exit_irqoff - Establish state when returning from guest mode * * Entry from a guest disables interrupts, but guest mode is traced as * interrupts enabled. Also with NO_HZ_FULL RCU might be idle. * * 1) Tell lockdep that interrupts are disabled * 2) Invoke context tracking if enabled to reactivate RCU * 3) Trace interrupts off state * * Invoked from architecture specific code after exiting a guest. * Must be invoked with interrupts disabled and the caller must be * non-instrumentable. * The caller has to invoke guest_timing_exit_irqoff() after this. * * Note: this is analogous to enter_from_user_mode(). */ static __always_inline void guest_state_exit_irqoff(void) { lockdep_hardirqs_off(CALLER_ADDR0); guest_context_exit_irqoff(); instrumentation_begin(); trace_hardirqs_off_finish(); instrumentation_end(); } static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) { /* * The memory barrier ensures a previous write to vcpu->requests cannot * be reordered with the read of vcpu->mode. It pairs with the general * memory barrier following the write of vcpu->mode in VCPU RUN. */ smp_mb__before_atomic(); return cmpxchg(&vcpu->mode, IN_GUEST_MODE, EXITING_GUEST_MODE); } /* * Some of the bitops functions do not support too long bitmaps. * This number must be determined not to exceed such limits. */ #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1) /* * Since at idle each memslot belongs to two memslot sets it has to contain * two embedded nodes for each data structure that it forms a part of. * * Two memslot sets (one active and one inactive) are necessary so the VM * continues to run on one memslot set while the other is being modified. * * These two memslot sets normally point to the same set of memslots. * They can, however, be desynchronized when performing a memslot management * operation by replacing the memslot to be modified by its copy. * After the operation is complete, both memslot sets once again point to * the same, common set of memslot data. * * The memslots themselves are independent of each other so they can be * individually added or deleted. */ struct kvm_memory_slot { struct hlist_node id_node[2]; struct interval_tree_node hva_node[2]; struct rb_node gfn_node[2]; gfn_t base_gfn; unsigned long npages; unsigned long *dirty_bitmap; struct kvm_arch_memory_slot arch; unsigned long userspace_addr; u32 flags; short id; u16 as_id; #ifdef CONFIG_KVM_PRIVATE_MEM struct { struct file __rcu *file; pgoff_t pgoff; } gmem; #endif }; static inline bool kvm_slot_can_be_private(const struct kvm_memory_slot *slot) { return slot && (slot->flags & KVM_MEM_GUEST_MEMFD); } static inline bool kvm_slot_dirty_track_enabled(const struct kvm_memory_slot *slot) { return slot->flags & KVM_MEM_LOG_DIRTY_PAGES; } static inline unsigned long kvm_dirty_bitmap_bytes(struct kvm_memory_slot *memslot) { return ALIGN(memslot->npages, BITS_PER_LONG) / 8; } static inline unsigned long *kvm_second_dirty_bitmap(struct kvm_memory_slot *memslot) { unsigned long len = kvm_dirty_bitmap_bytes(memslot); return memslot->dirty_bitmap + len / sizeof(*memslot->dirty_bitmap); } #ifndef KVM_DIRTY_LOG_MANUAL_CAPS #define KVM_DIRTY_LOG_MANUAL_CAPS KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE #endif struct kvm_s390_adapter_int { u64 ind_addr; u64 summary_addr; u64 ind_offset; u32 summary_offset; u32 adapter_id; }; struct kvm_hv_sint { u32 vcpu; u32 sint; }; struct kvm_xen_evtchn { u32 port; u32 vcpu_id; int vcpu_idx; u32 priority; }; struct kvm_kernel_irq_routing_entry { u32 gsi; u32 type; int (*set)(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status); union { struct { unsigned irqchip; unsigned pin; } irqchip; struct { u32 address_lo; u32 address_hi; u32 data; u32 flags; u32 devid; } msi; struct kvm_s390_adapter_int adapter; struct kvm_hv_sint hv_sint; struct kvm_xen_evtchn xen_evtchn; }; struct hlist_node link; }; #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING struct kvm_irq_routing_table { int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; u32 nr_rt_entries; /* * Array indexed by gsi. Each entry contains list of irq chips * the gsi is connected to. */ struct hlist_head map[] __counted_by(nr_rt_entries); }; #endif bool kvm_arch_irqchip_in_kernel(struct kvm *kvm); #ifndef KVM_INTERNAL_MEM_SLOTS #define KVM_INTERNAL_MEM_SLOTS 0 #endif #define KVM_MEM_SLOTS_NUM SHRT_MAX #define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_INTERNAL_MEM_SLOTS) #if KVM_MAX_NR_ADDRESS_SPACES == 1 static inline int kvm_arch_nr_memslot_as_ids(struct kvm *kvm) { return KVM_MAX_NR_ADDRESS_SPACES; } static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu) { return 0; } #endif /* * Arch code must define kvm_arch_has_private_mem if support for private memory * is enabled. */ #if !defined(kvm_arch_has_private_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) static inline bool kvm_arch_has_private_mem(struct kvm *kvm) { return false; } #endif #ifndef kvm_arch_has_readonly_mem static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm) { return IS_ENABLED(CONFIG_HAVE_KVM_READONLY_MEM); } #endif struct kvm_memslots { u64 generation; atomic_long_t last_used_slot; struct rb_root_cached hva_tree; struct rb_root gfn_tree; /* * The mapping table from slot id to memslot. * * 7-bit bucket count matches the size of the old id to index array for * 512 slots, while giving good performance with this slot count. * Higher bucket counts bring only small performance improvements but * always result in higher memory usage (even for lower memslot counts). */ DECLARE_HASHTABLE(id_hash, 7); int node_idx; }; struct kvm { #ifdef KVM_HAVE_MMU_RWLOCK rwlock_t mmu_lock; #else spinlock_t mmu_lock; #endif /* KVM_HAVE_MMU_RWLOCK */ struct mutex slots_lock; /* * Protects the arch-specific fields of struct kvm_memory_slots in * use by the VM. To be used under the slots_lock (above) or in a * kvm->srcu critical section where acquiring the slots_lock would * lead to deadlock with the synchronize_srcu in * kvm_swap_active_memslots(). */ struct mutex slots_arch_lock; struct mm_struct *mm; /* userspace tied to this vm */ unsigned long nr_memslot_pages; /* The two memslot sets - active and inactive (per address space) */ struct kvm_memslots __memslots[KVM_MAX_NR_ADDRESS_SPACES][2]; /* The current active memslot set for each address space */ struct kvm_memslots __rcu *memslots[KVM_MAX_NR_ADDRESS_SPACES]; struct xarray vcpu_array; /* * Protected by slots_lock, but can be read outside if an * incorrect answer is acceptable. */ atomic_t nr_memslots_dirty_logging; /* Used to wait for completion of MMU notifiers. */ spinlock_t mn_invalidate_lock; unsigned long mn_active_invalidate_count; struct rcuwait mn_memslots_update_rcuwait; /* For management / invalidation of gfn_to_pfn_caches */ spinlock_t gpc_lock; struct list_head gpc_list; /* * created_vcpus is protected by kvm->lock, and is incremented * at the beginning of KVM_CREATE_VCPU. online_vcpus is only * incremented after storing the kvm_vcpu pointer in vcpus, * and is accessed atomically. */ atomic_t online_vcpus; int max_vcpus; int created_vcpus; int last_boosted_vcpu; struct list_head vm_list; struct mutex lock; struct kvm_io_bus __rcu *buses[KVM_NR_BUSES]; #ifdef CONFIG_HAVE_KVM_IRQCHIP struct { spinlock_t lock; struct list_head items; /* resampler_list update side is protected by resampler_lock. */ struct list_head resampler_list; struct mutex resampler_lock; } irqfds; #endif struct list_head ioeventfds; struct kvm_vm_stat stat; struct kvm_arch arch; refcount_t users_count; #ifdef CONFIG_KVM_MMIO struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; spinlock_t ring_lock; struct list_head coalesced_zones; #endif struct mutex irq_lock; #ifdef CONFIG_HAVE_KVM_IRQCHIP /* * Update side is protected by irq_lock. */ struct kvm_irq_routing_table __rcu *irq_routing; struct hlist_head irq_ack_notifier_list; #endif #ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER struct mmu_notifier mmu_notifier; unsigned long mmu_invalidate_seq; long mmu_invalidate_in_progress; gfn_t mmu_invalidate_range_start; gfn_t mmu_invalidate_range_end; #endif struct list_head devices; u64 manual_dirty_log_protect; struct dentry *debugfs_dentry; struct kvm_stat_data **debugfs_stat_data; struct srcu_struct srcu; struct srcu_struct irq_srcu; pid_t userspace_pid; bool override_halt_poll_ns; unsigned int max_halt_poll_ns; u32 dirty_ring_size; bool dirty_ring_with_bitmap; bool vm_bugged; bool vm_dead; #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER struct notifier_block pm_notifier; #endif #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES /* Protected by slots_locks (for writes) and RCU (for reads) */ struct xarray mem_attr_array; #endif char stats_id[KVM_STATS_NAME_SIZE]; }; #define kvm_err(fmt, ...) \ pr_err("kvm [%i]: " fmt, task_pid_nr(current), ## __VA_ARGS__) #define kvm_info(fmt, ...) \ pr_info("kvm [%i]: " fmt, task_pid_nr(current), ## __VA_ARGS__) #define kvm_debug(fmt, ...) \ pr_debug("kvm [%i]: " fmt, task_pid_nr(current), ## __VA_ARGS__) #define kvm_debug_ratelimited(fmt, ...) \ pr_debug_ratelimited("kvm [%i]: " fmt, task_pid_nr(current), \ ## __VA_ARGS__) #define kvm_pr_unimpl(fmt, ...) \ pr_err_ratelimited("kvm [%i]: " fmt, \ task_tgid_nr(current), ## __VA_ARGS__) /* The guest did something we don't support. */ #define vcpu_unimpl(vcpu, fmt, ...) \ kvm_pr_unimpl("vcpu%i, guest rIP: 0x%lx " fmt, \ (vcpu)->vcpu_id, kvm_rip_read(vcpu), ## __VA_ARGS__) #define vcpu_debug(vcpu, fmt, ...) \ kvm_debug("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__) #define vcpu_debug_ratelimited(vcpu, fmt, ...) \ kvm_debug_ratelimited("vcpu%i " fmt, (vcpu)->vcpu_id, \ ## __VA_ARGS__) #define vcpu_err(vcpu, fmt, ...) \ kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__) static inline void kvm_vm_dead(struct kvm *kvm) { kvm->vm_dead = true; kvm_make_all_cpus_request(kvm, KVM_REQ_VM_DEAD); } static inline void kvm_vm_bugged(struct kvm *kvm) { kvm->vm_bugged = true; kvm_vm_dead(kvm); } #define KVM_BUG(cond, kvm, fmt...) \ ({ \ bool __ret = !!(cond); \ \ if (WARN_ONCE(__ret && !(kvm)->vm_bugged, fmt)) \ kvm_vm_bugged(kvm); \ unlikely(__ret); \ }) #define KVM_BUG_ON(cond, kvm) \ ({ \ bool __ret = !!(cond); \ \ if (WARN_ON_ONCE(__ret && !(kvm)->vm_bugged)) \ kvm_vm_bugged(kvm); \ unlikely(__ret); \ }) /* * Note, "data corruption" refers to corruption of host kernel data structures, * not guest data. Guest data corruption, suspected or confirmed, that is tied * and contained to a single VM should *never* BUG() and potentially panic the * host, i.e. use this variant of KVM_BUG() if and only if a KVM data structure * is corrupted and that corruption can have a cascading effect to other parts * of the hosts and/or to other VMs. */ #define KVM_BUG_ON_DATA_CORRUPTION(cond, kvm) \ ({ \ bool __ret = !!(cond); \ \ if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) \ BUG_ON(__ret); \ else if (WARN_ON_ONCE(__ret && !(kvm)->vm_bugged)) \ kvm_vm_bugged(kvm); \ unlikely(__ret); \ }) static inline void kvm_vcpu_srcu_read_lock(struct kvm_vcpu *vcpu) { #ifdef CONFIG_PROVE_RCU WARN_ONCE(vcpu->srcu_depth++, "KVM: Illegal vCPU srcu_idx LOCK, depth=%d", vcpu->srcu_depth - 1); #endif vcpu->____srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); } static inline void kvm_vcpu_srcu_read_unlock(struct kvm_vcpu *vcpu) { srcu_read_unlock(&vcpu->kvm->srcu, vcpu->____srcu_idx); #ifdef CONFIG_PROVE_RCU WARN_ONCE(--vcpu->srcu_depth, "KVM: Illegal vCPU srcu_idx UNLOCK, depth=%d", vcpu->srcu_depth); #endif } static inline bool kvm_dirty_log_manual_protect_and_init_set(struct kvm *kvm) { return !!(kvm->manual_dirty_log_protect & KVM_DIRTY_LOG_INITIALLY_SET); } static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx) { return srcu_dereference_check(kvm->buses[idx], &kvm->srcu, lockdep_is_held(&kvm->slots_lock) || !refcount_read(&kvm->users_count)); } static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) { int num_vcpus = atomic_read(&kvm->online_vcpus); i = array_index_nospec(i, num_vcpus); /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu. */ smp_rmb(); return xa_load(&kvm->vcpu_array, i); } #define kvm_for_each_vcpu(idx, vcpup, kvm) \ xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \ (atomic_read(&kvm->online_vcpus) - 1)) static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) { struct kvm_vcpu *vcpu = NULL; unsigned long i; if (id < 0) return NULL; if (id < KVM_MAX_VCPUS) vcpu = kvm_get_vcpu(kvm, id); if (vcpu && vcpu->vcpu_id == id) return vcpu; kvm_for_each_vcpu(i, vcpu, kvm) if (vcpu->vcpu_id == id) return vcpu; return NULL; } void kvm_destroy_vcpus(struct kvm *kvm); void vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); #ifdef __KVM_HAVE_IOAPIC void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm); void kvm_arch_post_irq_routing_update(struct kvm *kvm); #else static inline void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm) { } static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm) { } #endif #ifdef CONFIG_HAVE_KVM_IRQCHIP int kvm_irqfd_init(void); void kvm_irqfd_exit(void); #else static inline int kvm_irqfd_init(void) { return 0; } static inline void kvm_irqfd_exit(void) { } #endif int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module); void kvm_exit(void); void kvm_get_kvm(struct kvm *kvm); bool kvm_get_kvm_safe(struct kvm *kvm); void kvm_put_kvm(struct kvm *kvm); bool file_is_kvm(struct file *file); void kvm_put_kvm_no_destroy(struct kvm *kvm); static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id) { as_id = array_index_nospec(as_id, KVM_MAX_NR_ADDRESS_SPACES); return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu, lockdep_is_held(&kvm->slots_lock) || !refcount_read(&kvm->users_count)); } static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) { return __kvm_memslots(kvm, 0); } static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu) { int as_id = kvm_arch_vcpu_memslots_id(vcpu); return __kvm_memslots(vcpu->kvm, as_id); } static inline bool kvm_memslots_empty(struct kvm_memslots *slots) { return RB_EMPTY_ROOT(&slots->gfn_tree); } bool kvm_are_all_memslots_empty(struct kvm *kvm); #define kvm_for_each_memslot(memslot, bkt, slots) \ hash_for_each(slots->id_hash, bkt, memslot, id_node[slots->node_idx]) \ if (WARN_ON_ONCE(!memslot->npages)) { \ } else static inline struct kvm_memory_slot *id_to_memslot(struct kvm_memslots *slots, int id) { struct kvm_memory_slot *slot; int idx = slots->node_idx; hash_for_each_possible(slots->id_hash, slot, id_node[idx], id) { if (slot->id == id) return slot; } return NULL; } /* Iterator used for walking memslots that overlap a gfn range. */ struct kvm_memslot_iter { struct kvm_memslots *slots; struct rb_node *node; struct kvm_memory_slot *slot; }; static inline void kvm_memslot_iter_next(struct kvm_memslot_iter *iter) { iter->node = rb_next(iter->node); if (!iter->node) return; iter->slot = container_of(iter->node, struct kvm_memory_slot, gfn_node[iter->slots->node_idx]); } static inline void kvm_memslot_iter_start(struct kvm_memslot_iter *iter, struct kvm_memslots *slots, gfn_t start) { int idx = slots->node_idx; struct rb_node *tmp; struct kvm_memory_slot *slot; iter->slots = slots; /* * Find the so called "upper bound" of a key - the first node that has * its key strictly greater than the searched one (the start gfn in our case). */ iter->node = NULL; for (tmp = slots->gfn_tree.rb_node; tmp; ) { slot = container_of(tmp, struct kvm_memory_slot, gfn_node[idx]); if (start < slot->base_gfn) { iter->node = tmp; tmp = tmp->rb_left; } else { tmp = tmp->rb_right; } } /* * Find the slot with the lowest gfn that can possibly intersect with * the range, so we'll ideally have slot start <= range start */ if (iter->node) { /* * A NULL previous node means that the very first slot * already has a higher start gfn. * In this case slot start > range start. */ tmp = rb_prev(iter->node); if (tmp) iter->node = tmp; } else { /* a NULL node below means no slots */ iter->node = rb_last(&slots->gfn_tree); } if (iter->node) { iter->slot = container_of(iter->node, struct kvm_memory_slot, gfn_node[idx]); /* * It is possible in the slot start < range start case that the * found slot ends before or at range start (slot end <= range start) * and so it does not overlap the requested range. * * In such non-overlapping case the next slot (if it exists) will * already have slot start > range start, otherwise the logic above * would have found it instead of the current slot. */ if (iter->slot->base_gfn + iter->slot->npages <= start) kvm_memslot_iter_next(iter); } } static inline bool kvm_memslot_iter_is_valid(struct kvm_memslot_iter *iter, gfn_t end) { if (!iter->node) return false; /* * If this slot starts beyond or at the end of the range so does * every next one */ return iter->slot->base_gfn < end; } /* Iterate over each memslot at least partially intersecting [start, end) range */ #define kvm_for_each_memslot_in_gfn_range(iter, slots, start, end) \ for (kvm_memslot_iter_start(iter, slots, start); \ kvm_memslot_iter_is_valid(iter, end); \ kvm_memslot_iter_next(iter)) /* * KVM_SET_USER_MEMORY_REGION ioctl allows the following operations: * - create a new memory slot * - delete an existing memory slot * - modify an existing memory slot * -- move it in the guest physical memory space * -- just change its flags * * Since flags can be changed by some of these operations, the following * differentiation is the best we can do for __kvm_set_memory_region(): */ enum kvm_mr_change { KVM_MR_CREATE, KVM_MR_DELETE, KVM_MR_MOVE, KVM_MR_FLAGS_ONLY, }; int kvm_set_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region2 *mem); int __kvm_set_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region2 *mem); void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot); void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen); int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old, struct kvm_memory_slot *new, enum kvm_mr_change change); void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_memory_slot *old, const struct kvm_memory_slot *new, enum kvm_mr_change change); /* flush all memory translations */ void kvm_arch_flush_shadow_all(struct kvm *kvm); /* flush memory translations pointing to 'slot' */ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot); int gfn_to_page_many_atomic(struct kvm_memory_slot *slot, gfn_t gfn, struct page **pages, int nr_pages); struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn, bool *writable); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); kvm_pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn); kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, bool *writable); kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn); kvm_pfn_t gfn_to_pfn_memslot_atomic(const struct kvm_memory_slot *slot, gfn_t gfn); kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn, bool atomic, bool interruptible, bool *async, bool write_fault, bool *writable, hva_t *hva); void kvm_release_pfn_clean(kvm_pfn_t pfn); void kvm_release_pfn_dirty(kvm_pfn_t pfn); void kvm_set_pfn_dirty(kvm_pfn_t pfn); void kvm_set_pfn_accessed(kvm_pfn_t pfn); void kvm_release_pfn(kvm_pfn_t pfn, bool dirty); int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int len); int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, void *data, unsigned long len); int kvm_read_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, void *data, unsigned int offset, unsigned long len); int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, int offset, int len); int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, unsigned long len); int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, void *data, unsigned long len); int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, void *data, unsigned int offset, unsigned long len); int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, gpa_t gpa, unsigned long len); #define __kvm_get_guest(kvm, gfn, offset, v) \ ({ \ unsigned long __addr = gfn_to_hva(kvm, gfn); \ typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset); \ int __ret = -EFAULT; \ \ if (!kvm_is_error_hva(__addr)) \ __ret = get_user(v, __uaddr); \ __ret; \ }) #define kvm_get_guest(kvm, gpa, v) \ ({ \ gpa_t __gpa = gpa; \ struct kvm *__kvm = kvm; \ \ __kvm_get_guest(__kvm, __gpa >> PAGE_SHIFT, \ offset_in_page(__gpa), v); \ }) #define __kvm_put_guest(kvm, gfn, offset, v) \ ({ \ unsigned long __addr = gfn_to_hva(kvm, gfn); \ typeof(v) __user *__uaddr = (typeof(__uaddr))(__addr + offset); \ int __ret = -EFAULT; \ \ if (!kvm_is_error_hva(__addr)) \ __ret = put_user(v, __uaddr); \ if (!__ret) \ mark_page_dirty(kvm, gfn); \ __ret; \ }) #define kvm_put_guest(kvm, gpa, v) \ ({ \ gpa_t __gpa = gpa; \ struct kvm *__kvm = kvm; \ \ __kvm_put_guest(__kvm, __gpa >> PAGE_SHIFT, \ offset_in_page(__gpa), v); \ }) int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); bool kvm_vcpu_is_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn); void mark_page_dirty_in_slot(struct kvm *kvm, const struct kvm_memory_slot *memslot, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn); kvm_pfn_t kvm_vcpu_gfn_to_pfn_atomic(struct kvm_vcpu *vcpu, gfn_t gfn); kvm_pfn_t kvm_vcpu_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn); int kvm_vcpu_map(struct kvm_vcpu *vcpu, gpa_t gpa, struct kvm_host_map *map); void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map, bool dirty); unsigned long kvm_vcpu_gfn_to_hva(struct kvm_vcpu *vcpu, gfn_t gfn); unsigned long kvm_vcpu_gfn_to_hva_prot(struct kvm_vcpu *vcpu, gfn_t gfn, bool *writable); int kvm_vcpu_read_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, void *data, int offset, int len); int kvm_vcpu_read_guest_atomic(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, unsigned long len); int kvm_vcpu_read_guest(struct kvm_vcpu *vcpu, gpa_t gpa, void *data, unsigned long len); int kvm_vcpu_write_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, const void *data, int offset, int len); int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, unsigned long len); void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); /** * kvm_gpc_init - initialize gfn_to_pfn_cache. * * @gpc: struct gfn_to_pfn_cache object. * @kvm: pointer to kvm instance. * * This sets up a gfn_to_pfn_cache by initializing locks and assigning the * immutable attributes. Note, the cache must be zero-allocated (or zeroed by * the caller before init). */ void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm); /** * kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest * physical address. * * @gpc: struct gfn_to_pfn_cache object. * @gpa: guest physical address to map. * @len: sanity check; the range being access must fit a single page. * * @return: 0 for success. * -EINVAL for a mapping which would cross a page boundary. * -EFAULT for an untranslatable guest physical address. * * This primes a gfn_to_pfn_cache and links it into the @gpc->kvm's list for * invalidations to be processed. Callers are required to use kvm_gpc_check() * to ensure that the cache is valid before accessing the target page. */ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len); /** * kvm_gpc_activate_hva - prepare a cached kernel mapping and HPA for a given HVA. * * @gpc: struct gfn_to_pfn_cache object. * @hva: userspace virtual address to map. * @len: sanity check; the range being access must fit a single page. * * @return: 0 for success. * -EINVAL for a mapping which would cross a page boundary. * -EFAULT for an untranslatable guest physical address. * * The semantics of this function are the same as those of kvm_gpc_activate(). It * merely bypasses a layer of address translation. */ int kvm_gpc_activate_hva(struct gfn_to_pfn_cache *gpc, unsigned long hva, unsigned long len); /** * kvm_gpc_check - check validity of a gfn_to_pfn_cache. * * @gpc: struct gfn_to_pfn_cache object. * @len: sanity check; the range being access must fit a single page. * * @return: %true if the cache is still valid and the address matches. * %false if the cache is not valid. * * Callers outside IN_GUEST_MODE context should hold a read lock on @gpc->lock * while calling this function, and then continue to hold the lock until the * access is complete. * * Callers in IN_GUEST_MODE may do so without locking, although they should * still hold a read lock on kvm->scru for the memslot checks. */ bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len); /** * kvm_gpc_refresh - update a previously initialized cache. * * @gpc: struct gfn_to_pfn_cache object. * @len: sanity check; the range being access must fit a single page. * * @return: 0 for success. * -EINVAL for a mapping which would cross a page boundary. * -EFAULT for an untranslatable guest physical address. * * This will attempt to refresh a gfn_to_pfn_cache. Note that a successful * return from this function does not mean the page can be immediately * accessed because it may have raced with an invalidation. Callers must * still lock and check the cache status, as this function does not return * with the lock still held to permit access. */ int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len); /** * kvm_gpc_deactivate - deactivate and unlink a gfn_to_pfn_cache. * * @gpc: struct gfn_to_pfn_cache object. * * This removes a cache from the VM's list to be processed on MMU notifier * invocation. */ void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc); static inline bool kvm_gpc_is_gpa_active(struct gfn_to_pfn_cache *gpc) { return gpc->active && !kvm_is_error_gpa(gpc->gpa); } static inline bool kvm_gpc_is_hva_active(struct gfn_to_pfn_cache *gpc) { return gpc->active && kvm_is_error_gpa(gpc->gpa); } void kvm_sigset_activate(struct kvm_vcpu *vcpu); void kvm_sigset_deactivate(struct kvm_vcpu *vcpu); void kvm_vcpu_halt(struct kvm_vcpu *vcpu); bool kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); int kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode); void kvm_flush_remote_tlbs(struct kvm *kvm); void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages); void kvm_flush_remote_tlbs_memslot(struct kvm *kvm, const struct kvm_memory_slot *memslot); #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min); int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, int min); int kvm_mmu_memory_cache_nr_free_objects(struct kvm_mmu_memory_cache *mc); void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc); void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); #endif void kvm_mmu_invalidate_begin(struct kvm *kvm); void kvm_mmu_invalidate_range_add(struct kvm *kvm, gfn_t start, gfn_t end); void kvm_mmu_invalidate_end(struct kvm *kvm); bool kvm_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range); long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf); int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext); void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask); void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot); #ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log); int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, int *is_dirty, struct kvm_memory_slot **memslot); #endif int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, bool line_status); int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap); int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); long kvm_arch_vm_compat_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu); int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr); int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs); int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs); int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs); int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state); int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state); int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg); int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id); int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state); #endif #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry); #else static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {} #endif #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING int kvm_arch_hardware_enable(void); void kvm_arch_hardware_disable(void); #endif int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_preempted_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_post_init_vm(struct kvm *kvm); void kvm_arch_pre_destroy_vm(struct kvm *kvm); void kvm_arch_create_vm_debugfs(struct kvm *kvm); #ifndef __KVM_HAVE_ARCH_VM_ALLOC /* * All architectures that want to use vzalloc currently also * need their own kvm_arch_alloc_vm implementation. */ static inline struct kvm *kvm_arch_alloc_vm(void) { return kzalloc(sizeof(struct kvm), GFP_KERNEL_ACCOUNT); } #endif static inline void __kvm_arch_free_vm(struct kvm *kvm) { kvfree(kvm); } #ifndef __KVM_HAVE_ARCH_VM_FREE static inline void kvm_arch_free_vm(struct kvm *kvm) { __kvm_arch_free_vm(kvm); } #endif #ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm) { return -ENOTSUPP; } #else int kvm_arch_flush_remote_tlbs(struct kvm *kvm); #endif #ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages) { return -EOPNOTSUPP; } #else int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages); #endif #ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA void kvm_arch_register_noncoherent_dma(struct kvm *kvm); void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm); bool kvm_arch_has_noncoherent_dma(struct kvm *kvm); #else static inline void kvm_arch_register_noncoherent_dma(struct kvm *kvm) { } static inline void kvm_arch_unregister_noncoherent_dma(struct kvm *kvm) { } static inline bool kvm_arch_has_noncoherent_dma(struct kvm *kvm) { return false; } #endif #ifdef __KVM_HAVE_ARCH_ASSIGNED_DEVICE void kvm_arch_start_assignment(struct kvm *kvm); void kvm_arch_end_assignment(struct kvm *kvm); bool kvm_arch_has_assigned_device(struct kvm *kvm); #else static inline void kvm_arch_start_assignment(struct kvm *kvm) { } static inline void kvm_arch_end_assignment(struct kvm *kvm) { } static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm) { return false; } #endif static inline struct rcuwait *kvm_arch_vcpu_get_wait(struct kvm_vcpu *vcpu) { #ifdef __KVM_HAVE_ARCH_WQP return vcpu->arch.waitp; #else return &vcpu->wait; #endif } /* * Wake a vCPU if necessary, but don't do any stats/metadata updates. Returns * true if the vCPU was blocking and was awakened, false otherwise. */ static inline bool __kvm_vcpu_wake_up(struct kvm_vcpu *vcpu) { return !!rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu)); } static inline bool kvm_vcpu_is_blocking(struct kvm_vcpu *vcpu) { return rcuwait_active(kvm_arch_vcpu_get_wait(vcpu)); } #ifdef __KVM_HAVE_ARCH_INTC_INITIALIZED /* * returns true if the virtual interrupt controller is initialized and * ready to accept virtual IRQ. On some architectures the virtual interrupt * controller is dynamically instantiated and this is not always true. */ bool kvm_arch_intc_initialized(struct kvm *kvm); #else static inline bool kvm_arch_intc_initialized(struct kvm *kvm) { return true; } #endif #ifdef CONFIG_GUEST_PERF_EVENTS unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu); void kvm_register_perf_callbacks(unsigned int (*pt_intr_handler)(void)); void kvm_unregister_perf_callbacks(void); #else static inline void kvm_register_perf_callbacks(void *ign) {} static inline void kvm_unregister_perf_callbacks(void) {} #endif /* CONFIG_GUEST_PERF_EVENTS */ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type); void kvm_arch_destroy_vm(struct kvm *kvm); void kvm_arch_sync_events(struct kvm *kvm); int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); struct page *kvm_pfn_to_refcounted_page(kvm_pfn_t pfn); bool kvm_is_zone_device_page(struct page *page); struct kvm_irq_ack_notifier { struct hlist_node link; unsigned gsi; void (*irq_acked)(struct kvm_irq_ack_notifier *kian); }; int kvm_irq_map_gsi(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *entries, int gsi); int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin); int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status); int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, int irq_source_id, int level, bool line_status); int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status); bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_notify_acked_gsi(struct kvm *kvm, int gsi); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); void kvm_unregister_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); int kvm_request_irq_source_id(struct kvm *kvm); void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args); /* * Returns a pointer to the memslot if it contains gfn. * Otherwise returns NULL. */ static inline struct kvm_memory_slot * try_get_memslot(struct kvm_memory_slot *slot, gfn_t gfn) { if (!slot) return NULL; if (gfn >= slot->base_gfn && gfn < slot->base_gfn + slot->npages) return slot; else return NULL; } /* * Returns a pointer to the memslot that contains gfn. Otherwise returns NULL. * * With "approx" set returns the memslot also when the address falls * in a hole. In that case one of the memslots bordering the hole is * returned. */ static inline struct kvm_memory_slot * search_memslots(struct kvm_memslots *slots, gfn_t gfn, bool approx) { struct kvm_memory_slot *slot; struct rb_node *node; int idx = slots->node_idx; slot = NULL; for (node = slots->gfn_tree.rb_node; node; ) { slot = container_of(node, struct kvm_memory_slot, gfn_node[idx]); if (gfn >= slot->base_gfn) { if (gfn < slot->base_gfn + slot->npages) return slot; node = node->rb_right; } else node = node->rb_left; } return approx ? slot : NULL; } static inline struct kvm_memory_slot * ____gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn, bool approx) { struct kvm_memory_slot *slot; slot = (struct kvm_memory_slot *)atomic_long_read(&slots->last_used_slot); slot = try_get_memslot(slot, gfn); if (slot) return slot; slot = search_memslots(slots, gfn, approx); if (slot) { atomic_long_set(&slots->last_used_slot, (unsigned long)slot); return slot; } return NULL; } /* * __gfn_to_memslot() and its descendants are here to allow arch code to inline * the lookups in hot paths. gfn_to_memslot() itself isn't here as an inline * because that would bloat other code too much. */ static inline struct kvm_memory_slot * __gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn) { return ____gfn_to_memslot(slots, gfn, false); } static inline unsigned long __gfn_to_hva_memslot(const struct kvm_memory_slot *slot, gfn_t gfn) { /* * The index was checked originally in search_memslots. To avoid * that a malicious guest builds a Spectre gadget out of e.g. page * table walks, do not let the processor speculate loads outside * the guest's registered memslots. */ unsigned long offset = gfn - slot->base_gfn; offset = array_index_nospec(offset, slot->npages); return slot->userspace_addr + offset * PAGE_SIZE; } static inline int memslot_id(struct kvm *kvm, gfn_t gfn) { return gfn_to_memslot(kvm, gfn)->id; } static inline gfn_t hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot) { gfn_t gfn_offset = (hva - slot->userspace_addr) >> PAGE_SHIFT; return slot->base_gfn + gfn_offset; } static inline gpa_t gfn_to_gpa(gfn_t gfn) { return (gpa_t)gfn << PAGE_SHIFT; } static inline gfn_t gpa_to_gfn(gpa_t gpa) { return (gfn_t)(gpa >> PAGE_SHIFT); } static inline hpa_t pfn_to_hpa(kvm_pfn_t pfn) { return (hpa_t)pfn << PAGE_SHIFT; } static inline bool kvm_is_gpa_in_memslot(struct kvm *kvm, gpa_t gpa) { unsigned long hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)); return !kvm_is_error_hva(hva); } static inline void kvm_gpc_mark_dirty_in_slot(struct gfn_to_pfn_cache *gpc) { lockdep_assert_held(&gpc->lock); if (!gpc->memslot) return; mark_page_dirty_in_slot(gpc->kvm, gpc->memslot, gpa_to_gfn(gpc->gpa)); } enum kvm_stat_kind { KVM_STAT_VM, KVM_STAT_VCPU, }; struct kvm_stat_data { struct kvm *kvm; const struct _kvm_stats_desc *desc; enum kvm_stat_kind kind; }; struct _kvm_stats_desc { struct kvm_stats_desc desc; char name[KVM_STATS_NAME_SIZE]; }; #define STATS_DESC_COMMON(type, unit, base, exp, sz, bsz) \ .flags = type | unit | base | \ BUILD_BUG_ON_ZERO(type & ~KVM_STATS_TYPE_MASK) | \ BUILD_BUG_ON_ZERO(unit & ~KVM_STATS_UNIT_MASK) | \ BUILD_BUG_ON_ZERO(base & ~KVM_STATS_BASE_MASK), \ .exponent = exp, \ .size = sz, \ .bucket_size = bsz #define VM_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ { \ { \ STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ .offset = offsetof(struct kvm_vm_stat, generic.stat) \ }, \ .name = #stat, \ } #define VCPU_GENERIC_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ { \ { \ STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ .offset = offsetof(struct kvm_vcpu_stat, generic.stat) \ }, \ .name = #stat, \ } #define VM_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ { \ { \ STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ .offset = offsetof(struct kvm_vm_stat, stat) \ }, \ .name = #stat, \ } #define VCPU_STATS_DESC(stat, type, unit, base, exp, sz, bsz) \ { \ { \ STATS_DESC_COMMON(type, unit, base, exp, sz, bsz), \ .offset = offsetof(struct kvm_vcpu_stat, stat) \ }, \ .name = #stat, \ } /* SCOPE: VM, VM_GENERIC, VCPU, VCPU_GENERIC */ #define STATS_DESC(SCOPE, stat, type, unit, base, exp, sz, bsz) \ SCOPE##_STATS_DESC(stat, type, unit, base, exp, sz, bsz) #define STATS_DESC_CUMULATIVE(SCOPE, name, unit, base, exponent) \ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_CUMULATIVE, \ unit, base, exponent, 1, 0) #define STATS_DESC_INSTANT(SCOPE, name, unit, base, exponent) \ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_INSTANT, \ unit, base, exponent, 1, 0) #define STATS_DESC_PEAK(SCOPE, name, unit, base, exponent) \ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_PEAK, \ unit, base, exponent, 1, 0) #define STATS_DESC_LINEAR_HIST(SCOPE, name, unit, base, exponent, sz, bsz) \ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_LINEAR_HIST, \ unit, base, exponent, sz, bsz) #define STATS_DESC_LOG_HIST(SCOPE, name, unit, base, exponent, sz) \ STATS_DESC(SCOPE, name, KVM_STATS_TYPE_LOG_HIST, \ unit, base, exponent, sz, 0) /* Cumulative counter, read/write */ #define STATS_DESC_COUNTER(SCOPE, name) \ STATS_DESC_CUMULATIVE(SCOPE, name, KVM_STATS_UNIT_NONE, \ KVM_STATS_BASE_POW10, 0) /* Instantaneous counter, read only */ #define STATS_DESC_ICOUNTER(SCOPE, name) \ STATS_DESC_INSTANT(SCOPE, name, KVM_STATS_UNIT_NONE, \ KVM_STATS_BASE_POW10, 0) /* Peak counter, read/write */ #define STATS_DESC_PCOUNTER(SCOPE, name) \ STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_NONE, \ KVM_STATS_BASE_POW10, 0) /* Instantaneous boolean value, read only */ #define STATS_DESC_IBOOLEAN(SCOPE, name) \ STATS_DESC_INSTANT(SCOPE, name, KVM_STATS_UNIT_BOOLEAN, \ KVM_STATS_BASE_POW10, 0) /* Peak (sticky) boolean value, read/write */ #define STATS_DESC_PBOOLEAN(SCOPE, name) \ STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_BOOLEAN, \ KVM_STATS_BASE_POW10, 0) /* Cumulative time in nanosecond */ #define STATS_DESC_TIME_NSEC(SCOPE, name) \ STATS_DESC_CUMULATIVE(SCOPE, name, KVM_STATS_UNIT_SECONDS, \ KVM_STATS_BASE_POW10, -9) /* Linear histogram for time in nanosecond */ #define STATS_DESC_LINHIST_TIME_NSEC(SCOPE, name, sz, bsz) \ STATS_DESC_LINEAR_HIST(SCOPE, name, KVM_STATS_UNIT_SECONDS, \ KVM_STATS_BASE_POW10, -9, sz, bsz) /* Logarithmic histogram for time in nanosecond */ #define STATS_DESC_LOGHIST_TIME_NSEC(SCOPE, name, sz) \ STATS_DESC_LOG_HIST(SCOPE, name, KVM_STATS_UNIT_SECONDS, \ KVM_STATS_BASE_POW10, -9, sz) #define KVM_GENERIC_VM_STATS() \ STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush), \ STATS_DESC_COUNTER(VM_GENERIC, remote_tlb_flush_requests) #define KVM_GENERIC_VCPU_STATS() \ STATS_DESC_COUNTER(VCPU_GENERIC, halt_successful_poll), \ STATS_DESC_COUNTER(VCPU_GENERIC, halt_attempted_poll), \ STATS_DESC_COUNTER(VCPU_GENERIC, halt_poll_invalid), \ STATS_DESC_COUNTER(VCPU_GENERIC, halt_wakeup), \ STATS_DESC_TIME_NSEC(VCPU_GENERIC, halt_poll_success_ns), \ STATS_DESC_TIME_NSEC(VCPU_GENERIC, halt_poll_fail_ns), \ STATS_DESC_TIME_NSEC(VCPU_GENERIC, halt_wait_ns), \ STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_poll_success_hist, \ HALT_POLL_HIST_COUNT), \ STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_poll_fail_hist, \ HALT_POLL_HIST_COUNT), \ STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_wait_hist, \ HALT_POLL_HIST_COUNT), \ STATS_DESC_IBOOLEAN(VCPU_GENERIC, blocking) ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header, const struct _kvm_stats_desc *desc, void *stats, size_t size_stats, char __user *user_buffer, size_t size, loff_t *offset); /** * kvm_stats_linear_hist_update() - Update bucket value for linear histogram * statistics data. * * @data: start address of the stats data * @size: the number of bucket of the stats data * @value: the new value used to update the linear histogram's bucket * @bucket_size: the size (width) of a bucket */ static inline void kvm_stats_linear_hist_update(u64 *data, size_t size, u64 value, size_t bucket_size) { size_t index = div64_u64(value, bucket_size); index = min(index, size - 1); ++data[index]; } /** * kvm_stats_log_hist_update() - Update bucket value for logarithmic histogram * statistics data. * * @data: start address of the stats data * @size: the number of bucket of the stats data * @value: the new value used to update the logarithmic histogram's bucket */ static inline void kvm_stats_log_hist_update(u64 *data, size_t size, u64 value) { size_t index = fls64(value); index = min(index, size - 1); ++data[index]; } #define KVM_STATS_LINEAR_HIST_UPDATE(array, value, bsize) \ kvm_stats_linear_hist_update(array, ARRAY_SIZE(array), value, bsize) #define KVM_STATS_LOG_HIST_UPDATE(array, value) \ kvm_stats_log_hist_update(array, ARRAY_SIZE(array), value) extern const struct kvm_stats_header kvm_vm_stats_header; extern const struct _kvm_stats_desc kvm_vm_stats_desc[]; extern const struct kvm_stats_header kvm_vcpu_stats_header; extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[]; #ifdef CONFIG_KVM_GENERIC_MMU_NOTIFIER static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq) { if (unlikely(kvm->mmu_invalidate_in_progress)) return 1; /* * Ensure the read of mmu_invalidate_in_progress happens before * the read of mmu_invalidate_seq. This interacts with the * smp_wmb() in mmu_notifier_invalidate_range_end to make sure * that the caller either sees the old (non-zero) value of * mmu_invalidate_in_progress or the new (incremented) value of * mmu_invalidate_seq. * * PowerPC Book3s HV KVM calls this under a per-page lock rather * than under kvm->mmu_lock, for scalability, so can't rely on * kvm->mmu_lock to keep things ordered. */ smp_rmb(); if (kvm->mmu_invalidate_seq != mmu_seq) return 1; return 0; } static inline int mmu_invalidate_retry_gfn(struct kvm *kvm, unsigned long mmu_seq, gfn_t gfn) { lockdep_assert_held(&kvm->mmu_lock); /* * If mmu_invalidate_in_progress is non-zero, then the range maintained * by kvm_mmu_notifier_invalidate_range_start contains all addresses * that might be being invalidated. Note that it may include some false * positives, due to shortcuts when handing concurrent invalidations. */ if (unlikely(kvm->mmu_invalidate_in_progress)) { /* * Dropping mmu_lock after bumping mmu_invalidate_in_progress * but before updating the range is a KVM bug. */ if (WARN_ON_ONCE(kvm->mmu_invalidate_range_start == INVALID_GPA || kvm->mmu_invalidate_range_end == INVALID_GPA)) return 1; if (gfn >= kvm->mmu_invalidate_range_start && gfn < kvm->mmu_invalidate_range_end) return 1; } if (kvm->mmu_invalidate_seq != mmu_seq) return 1; return 0; } /* * This lockless version of the range-based retry check *must* be paired with a * call to the locked version after acquiring mmu_lock, i.e. this is safe to * use only as a pre-check to avoid contending mmu_lock. This version *will* * get false negatives and false positives. */ static inline bool mmu_invalidate_retry_gfn_unsafe(struct kvm *kvm, unsigned long mmu_seq, gfn_t gfn) { /* * Use READ_ONCE() to ensure the in-progress flag and sequence counter * are always read from memory, e.g. so that checking for retry in a * loop won't result in an infinite retry loop. Don't force loads for * start+end, as the key to avoiding infinite retry loops is observing * the 1=>0 transition of in-progress, i.e. getting false negatives * due to stale start+end values is acceptable. */ if (unlikely(READ_ONCE(kvm->mmu_invalidate_in_progress)) && gfn >= kvm->mmu_invalidate_range_start && gfn < kvm->mmu_invalidate_range_end) return true; return READ_ONCE(kvm->mmu_invalidate_seq) != mmu_seq; } #endif #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING #define KVM_MAX_IRQ_ROUTES 4096 /* might need extension/rework in the future */ bool kvm_arch_can_set_irq_routing(struct kvm *kvm); int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *entries, unsigned nr, unsigned flags); int kvm_init_irq_routing(struct kvm *kvm); int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue); void kvm_free_irq_routing(struct kvm *kvm); #else static inline void kvm_free_irq_routing(struct kvm *kvm) {} static inline int kvm_init_irq_routing(struct kvm *kvm) { return 0; } #endif int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); void kvm_eventfd_init(struct kvm *kvm); int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); #ifdef CONFIG_HAVE_KVM_IRQCHIP int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args); void kvm_irqfd_release(struct kvm *kvm); bool kvm_notify_irqfd_resampler(struct kvm *kvm, unsigned int irqchip, unsigned int pin); void kvm_irq_routing_update(struct kvm *); #else static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) { return -EINVAL; } static inline void kvm_irqfd_release(struct kvm *kvm) {} static inline bool kvm_notify_irqfd_resampler(struct kvm *kvm, unsigned int irqchip, unsigned int pin) { return false; } #endif /* CONFIG_HAVE_KVM_IRQCHIP */ void kvm_arch_irq_routing_update(struct kvm *kvm); static inline void __kvm_make_request(int req, struct kvm_vcpu *vcpu) { /* * Ensure the rest of the request is published to kvm_check_request's * caller. Paired with the smp_mb__after_atomic in kvm_check_request. */ smp_wmb(); set_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests); } static __always_inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) { /* * Request that don't require vCPU action should never be logged in * vcpu->requests. The vCPU won't clear the request, so it will stay * logged indefinitely and prevent the vCPU from entering the guest. */ BUILD_BUG_ON(!__builtin_constant_p(req) || (req & KVM_REQUEST_NO_ACTION)); __kvm_make_request(req, vcpu); } static inline bool kvm_request_pending(struct kvm_vcpu *vcpu) { return READ_ONCE(vcpu->requests); } static inline bool kvm_test_request(int req, struct kvm_vcpu *vcpu) { return test_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests); } static inline void kvm_clear_request(int req, struct kvm_vcpu *vcpu) { clear_bit(req & KVM_REQUEST_MASK, (void *)&vcpu->requests); } static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) { if (kvm_test_request(req, vcpu)) { kvm_clear_request(req, vcpu); /* * Ensure the rest of the request is visible to kvm_check_request's * caller. Paired with the smp_wmb in kvm_make_request. */ smp_mb__after_atomic(); return true; } else { return false; } } #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING extern bool kvm_rebooting; #endif extern unsigned int halt_poll_ns; extern unsigned int halt_poll_ns_grow; extern unsigned int halt_poll_ns_grow_start; extern unsigned int halt_poll_ns_shrink; struct kvm_device { const struct kvm_device_ops *ops; struct kvm *kvm; void *private; struct list_head vm_node; }; /* create, destroy, and name are mandatory */ struct kvm_device_ops { const char *name; /* * create is called holding kvm->lock and any operations not suitable * to do while holding the lock should be deferred to init (see * below). */ int (*create)(struct kvm_device *dev, u32 type); /* * init is called after create if create is successful and is called * outside of holding kvm->lock. */ void (*init)(struct kvm_device *dev); /* * Destroy is responsible for freeing dev. * * Destroy may be called before or after destructors are called * on emulated I/O regions, depending on whether a reference is * held by a vcpu or other kvm component that gets destroyed * after the emulated I/O. */ void (*destroy)(struct kvm_device *dev); /* * Release is an alternative method to free the device. It is * called when the device file descriptor is closed. Once * release is called, the destroy method will not be called * anymore as the device is removed from the device list of * the VM. kvm->lock is held. */ void (*release)(struct kvm_device *dev); int (*set_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); int (*get_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); int (*has_attr)(struct kvm_device *dev, struct kvm_device_attr *attr); long (*ioctl)(struct kvm_device *dev, unsigned int ioctl, unsigned long arg); int (*mmap)(struct kvm_device *dev, struct vm_area_struct *vma); }; struct kvm_device *kvm_device_from_filp(struct file *filp); int kvm_register_device_ops(const struct kvm_device_ops *ops, u32 type); void kvm_unregister_device_ops(u32 type); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_arm_vgic_v2_ops; extern struct kvm_device_ops kvm_arm_vgic_v3_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) { vcpu->spin_loop.in_spin_loop = val; } static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) { vcpu->spin_loop.dy_eligible = val; } #else /* !CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) { } static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) { } #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ static inline bool kvm_is_visible_memslot(struct kvm_memory_slot *memslot) { return (memslot && memslot->id < KVM_USER_MEM_SLOTS && !(memslot->flags & KVM_MEMSLOT_INVALID)); } struct kvm_vcpu *kvm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS bool kvm_arch_has_irq_bypass(void); int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *, struct irq_bypass_producer *); void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *, struct irq_bypass_producer *); void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *); void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *); int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); bool kvm_arch_irqfd_route_changed(struct kvm_kernel_irq_routing_entry *, struct kvm_kernel_irq_routing_entry *); #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ #ifdef CONFIG_HAVE_KVM_INVALID_WAKEUPS /* If we wakeup during the poll time, was it a sucessful poll? */ static inline bool vcpu_valid_wakeup(struct kvm_vcpu *vcpu) { return vcpu->valid_wakeup; } #else static inline bool vcpu_valid_wakeup(struct kvm_vcpu *vcpu) { return true; } #endif /* CONFIG_HAVE_KVM_INVALID_WAKEUPS */ #ifdef CONFIG_HAVE_KVM_NO_POLL /* Callback that tells if we must not poll */ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu); #else static inline bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) { return false; } #endif /* CONFIG_HAVE_KVM_NO_POLL */ #ifdef CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL long kvm_arch_vcpu_async_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); #else static inline long kvm_arch_vcpu_async_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { return -ENOIOCTLCMD; } #endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */ void kvm_arch_guest_memory_reclaimed(struct kvm *kvm); #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu); #else static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) { return 0; } #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */ typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data); int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, uintptr_t data, const char *name, struct task_struct **thread_ptr); #ifdef CONFIG_KVM_XFER_TO_GUEST_WORK static inline void kvm_handle_signal_exit(struct kvm_vcpu *vcpu) { vcpu->run->exit_reason = KVM_EXIT_INTR; vcpu->stat.signal_exits++; } #endif /* CONFIG_KVM_XFER_TO_GUEST_WORK */ /* * If more than one page is being (un)accounted, @virt must be the address of * the first page of a block of pages what were allocated together (i.e * accounted together). * * kvm_account_pgtable_pages() is thread-safe because mod_lruvec_page_state() * is thread-safe. */ static inline void kvm_account_pgtable_pages(void *virt, int nr) { mod_lruvec_page_state(virt_to_page(virt), NR_SECONDARY_PAGETABLE, nr); } /* * This defines how many reserved entries we want to keep before we * kick the vcpu to the userspace to avoid dirty ring full. This * value can be tuned to higher if e.g. PML is enabled on the host. */ #define KVM_DIRTY_RING_RSVD_ENTRIES 64 /* Max number of entries allowed for each kvm dirty ring */ #define KVM_DIRTY_RING_MAX_ENTRIES 65536 static inline void kvm_prepare_memory_fault_exit(struct kvm_vcpu *vcpu, gpa_t gpa, gpa_t size, bool is_write, bool is_exec, bool is_private) { vcpu->run->exit_reason = KVM_EXIT_MEMORY_FAULT; vcpu->run->memory_fault.gpa = gpa; vcpu->run->memory_fault.size = size; /* RWX flags are not (yet) defined or communicated to userspace. */ vcpu->run->memory_fault.flags = 0; if (is_private) vcpu->run->memory_fault.flags |= KVM_MEMORY_EXIT_FLAG_PRIVATE; } #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn) { return xa_to_value(xa_load(&kvm->mem_attr_array, gfn)); } bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end, unsigned long mask, unsigned long attrs); bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, struct kvm_gfn_range *range); static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) { return IS_ENABLED(CONFIG_KVM_PRIVATE_MEM) && kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE; } #else static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn) { return false; } #endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */ #ifdef CONFIG_KVM_PRIVATE_MEM int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, kvm_pfn_t *pfn, int *max_order); #else static inline int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, kvm_pfn_t *pfn, int *max_order) { KVM_BUG_ON(1, kvm); return -EIO; } #endif /* CONFIG_KVM_PRIVATE_MEM */ #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order); #endif #ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM /** * kvm_gmem_populate() - Populate/prepare a GPA range with guest data * * @kvm: KVM instance * @gfn: starting GFN to be populated * @src: userspace-provided buffer containing data to copy into GFN range * (passed to @post_populate, and incremented on each iteration * if not NULL) * @npages: number of pages to copy from userspace-buffer * @post_populate: callback to issue for each gmem page that backs the GPA * range * @opaque: opaque data to pass to @post_populate callback * * This is primarily intended for cases where a gmem-backed GPA range needs * to be initialized with userspace-provided data prior to being mapped into * the guest as a private page. This should be called with the slots->lock * held so that caller-enforced invariants regarding the expected memory * attributes of the GPA range do not race with KVM_SET_MEMORY_ATTRIBUTES. * * Returns the number of pages that were populated. */ typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, void __user *src, int order, void *opaque); long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages, kvm_gmem_populate_cb post_populate, void *opaque); #endif #ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end); #endif #ifdef CONFIG_KVM_GENERIC_PRE_FAULT_MEMORY long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, struct kvm_pre_fault_memory *range); #endif #endif |
| 392 393 392 154 154 20 20 19 19 143 154 89 84 83 82 81 1 28 5 137 29 131 89 81 132 131 132 132 115 131 132 14 14 4 14 56 2 56 1 1 1 1 56 56 56 56 56 56 1 204 239 204 239 118 118 118 118 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 | // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Generic INET6 transport hashtables * * Authors: Lotsa people, from code originally in tcp, generalised here * by Arnaldo Carvalho de Melo <acme@mandriva.com> */ #include <linux/module.h> #include <linux/random.h> #include <net/addrconf.h> #include <net/hotdata.h> #include <net/inet_connection_sock.h> #include <net/inet_hashtables.h> #include <net/inet6_hashtables.h> #include <net/secure_seq.h> #include <net/ip.h> #include <net/sock_reuseport.h> #include <net/tcp.h> u32 inet6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, const struct in6_addr *faddr, const __be16 fport) { u32 lhash, fhash; net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret)); net_get_random_once(&tcp_ipv6_hash_secret, sizeof(tcp_ipv6_hash_secret)); lhash = (__force u32)laddr->s6_addr32[3]; fhash = __ipv6_addr_jhash(faddr, tcp_ipv6_hash_secret); return __inet6_ehashfn(lhash, lport, fhash, fport, inet6_ehash_secret + net_hash_mix(net)); } EXPORT_SYMBOL_GPL(inet6_ehashfn); /* * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM * * The sockhash lock must be held as a reader here. */ struct sock *__inet6_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, const int dif, const int sdif) { struct sock *sk; const struct hlist_nulls_node *node; const __portpair ports = INET_COMBINED_PORTS(sport, hnum); /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport); unsigned int slot = hash & hashinfo->ehash_mask; struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { if (sk->sk_hash != hash) continue; if (!inet6_match(net, sk, saddr, daddr, ports, dif, sdif)) continue; if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) goto out; if (unlikely(!inet6_match(net, sk, saddr, daddr, ports, dif, sdif))) { sock_gen_put(sk); goto begin; } goto found; } if (get_nulls_value(node) != slot) goto begin; out: sk = NULL; found: return sk; } EXPORT_SYMBOL(__inet6_lookup_established); static inline int compute_score(struct sock *sk, struct net *net, const unsigned short hnum, const struct in6_addr *daddr, const int dif, const int sdif) { int score = -1; if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum && sk->sk_family == PF_INET6) { if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return -1; if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) return -1; score = sk->sk_bound_dev_if ? 2 : 1; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; } return score; } /** * inet6_lookup_reuseport() - execute reuseport logic on AF_INET6 socket if necessary. * @net: network namespace. * @sk: AF_INET6 socket, must be in TCP_LISTEN state for TCP or TCP_CLOSE for UDP. * @skb: context for a potential SK_REUSEPORT program. * @doff: header offset. * @saddr: source address. * @sport: source port. * @daddr: destination address. * @hnum: destination port in host byte order. * @ehashfn: hash function used to generate the fallback hash. * * Return: NULL if sk doesn't have SO_REUSEPORT set, otherwise a pointer to * the selected sock or an error. */ struct sock *inet6_lookup_reuseport(struct net *net, struct sock *sk, struct sk_buff *skb, int doff, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned short hnum, inet6_ehashfn_t *ehashfn) { struct sock *reuse_sk = NULL; u32 phash; if (sk->sk_reuseport) { phash = INDIRECT_CALL_INET(ehashfn, udp6_ehashfn, inet6_ehashfn, net, daddr, hnum, saddr, sport); reuse_sk = reuseport_select_sock(sk, phash, skb, doff); } return reuse_sk; } EXPORT_SYMBOL_GPL(inet6_lookup_reuseport); /* called with rcu_read_lock() */ static struct sock *inet6_lhash2_lookup(struct net *net, struct inet_listen_hashbucket *ilb2, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const unsigned short hnum, const int dif, const int sdif) { struct sock *sk, *result = NULL; struct hlist_nulls_node *node; int score, hiscore = 0; sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) { score = compute_score(sk, net, hnum, daddr, dif, sdif); if (score > hiscore) { result = inet6_lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum, inet6_ehashfn); if (result) return result; result = sk; hiscore = score; } } return result; } struct sock *inet6_lookup_run_sk_lookup(struct net *net, int protocol, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, const int dif, inet6_ehashfn_t *ehashfn) { struct sock *sk, *reuse_sk; bool no_reuseport; no_reuseport = bpf_sk_lookup_run_v6(net, protocol, saddr, sport, daddr, hnum, dif, &sk); if (no_reuseport || IS_ERR_OR_NULL(sk)) return sk; reuse_sk = inet6_lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum, ehashfn); if (reuse_sk) sk = reuse_sk; return sk; } EXPORT_SYMBOL_GPL(inet6_lookup_run_sk_lookup); struct sock *inet6_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const unsigned short hnum, const int dif, const int sdif) { struct inet_listen_hashbucket *ilb2; struct sock *result = NULL; unsigned int hash2; /* Lookup redirect from BPF */ if (static_branch_unlikely(&bpf_sk_lookup_enabled) && hashinfo == net->ipv4.tcp_death_row.hashinfo) { result = inet6_lookup_run_sk_lookup(net, IPPROTO_TCP, skb, doff, saddr, sport, daddr, hnum, dif, inet6_ehashfn); if (result) goto done; } hash2 = ipv6_portaddr_hash(net, daddr, hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); result = inet6_lhash2_lookup(net, ilb2, skb, doff, saddr, sport, daddr, hnum, dif, sdif); if (result) goto done; /* Lookup lhash2 with in6addr_any */ hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); result = inet6_lhash2_lookup(net, ilb2, skb, doff, saddr, sport, &in6addr_any, hnum, dif, sdif); done: if (IS_ERR(result)) return NULL; return result; } EXPORT_SYMBOL_GPL(inet6_lookup_listener); struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const __be16 dport, const int dif) { struct sock *sk; bool refcounted; sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, ntohs(dport), dif, 0, &refcounted); if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; } EXPORT_SYMBOL_GPL(inet6_lookup); static int __inet6_check_established(struct inet_timewait_death_row *death_row, struct sock *sk, const __u16 lport, struct inet_timewait_sock **twp) { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr; const struct in6_addr *saddr = &sk->sk_v6_daddr; const int dif = sk->sk_bound_dev_if; struct net *net = sock_net(sk); const int sdif = l3mdev_master_ifindex_by_index(net, dif); const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, inet->inet_dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); spinlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_nulls_node *node; struct inet_timewait_sock *tw = NULL; spin_lock(lock); sk_nulls_for_each(sk2, node, &head->chain) { if (sk2->sk_hash != hash) continue; if (likely(inet6_match(net, sk2, saddr, daddr, ports, dif, sdif))) { if (sk2->sk_state == TCP_TIME_WAIT) { tw = inet_twsk(sk2); if (sk->sk_protocol == IPPROTO_TCP && tcp_twsk_unique(sk, sk2, twp)) break; } goto not_unique; } } /* Must record num and sport now. Otherwise we will see * in hash table socket with a funny identity. */ inet->inet_num = lport; inet->inet_sport = htons(lport); sk->sk_hash = hash; WARN_ON(!sk_unhashed(sk)); __sk_nulls_add_node_rcu(sk, &head->chain); if (tw) { sk_nulls_del_node_init_rcu((struct sock *)tw); __NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED); } spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); if (twp) { *twp = tw; } else if (tw) { /* Silly. Should hash-dance instead... */ inet_twsk_deschedule_put(tw); } return 0; not_unique: spin_unlock(lock); return -EADDRNOTAVAIL; } static u64 inet6_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); return secure_ipv6_port_ephemeral(sk->sk_v6_rcv_saddr.s6_addr32, sk->sk_v6_daddr.s6_addr32, inet->inet_dport); } int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { u64 port_offset = 0; if (!inet_sk(sk)->inet_num) port_offset = inet6_sk_port_offset(sk); return __inet_hash_connect(death_row, sk, port_offset, __inet6_check_established); } EXPORT_SYMBOL_GPL(inet6_hash_connect); int inet6_hash(struct sock *sk) { int err = 0; if (sk->sk_state != TCP_CLOSE) err = __inet_hash(sk, NULL); return err; } EXPORT_SYMBOL_GPL(inet6_hash); |
| 66 45 73 23 15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 | /* SPDX-License-Identifier: GPL-2.0 */ /* * iommu trace points * * Copyright (C) 2013 Shuah Khan <shuah.kh@samsung.com> * */ #undef TRACE_SYSTEM #define TRACE_SYSTEM iommu #if !defined(_TRACE_IOMMU_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_IOMMU_H #include <linux/tracepoint.h> struct device; DECLARE_EVENT_CLASS(iommu_group_event, TP_PROTO(int group_id, struct device *dev), TP_ARGS(group_id, dev), TP_STRUCT__entry( __field(int, gid) __string(device, dev_name(dev)) ), TP_fast_assign( __entry->gid = group_id; __assign_str(device); ), TP_printk("IOMMU: groupID=%d device=%s", __entry->gid, __get_str(device) ) ); DEFINE_EVENT(iommu_group_event, add_device_to_group, TP_PROTO(int group_id, struct device *dev), TP_ARGS(group_id, dev) ); DEFINE_EVENT(iommu_group_event, remove_device_from_group, TP_PROTO(int group_id, struct device *dev), TP_ARGS(group_id, dev) ); DECLARE_EVENT_CLASS(iommu_device_event, TP_PROTO(struct device *dev), TP_ARGS(dev), TP_STRUCT__entry( __string(device, dev_name(dev)) ), TP_fast_assign( __assign_str(device); ), TP_printk("IOMMU: device=%s", __get_str(device) ) ); DEFINE_EVENT(iommu_device_event, attach_device_to_domain, TP_PROTO(struct device *dev), TP_ARGS(dev) ); TRACE_EVENT(map, TP_PROTO(unsigned long iova, phys_addr_t paddr, size_t size), TP_ARGS(iova, paddr, size), TP_STRUCT__entry( __field(u64, iova) __field(u64, paddr) __field(size_t, size) ), TP_fast_assign( __entry->iova = iova; __entry->paddr = paddr; __entry->size = size; ), TP_printk("IOMMU: iova=0x%016llx - 0x%016llx paddr=0x%016llx size=%zu", __entry->iova, __entry->iova + __entry->size, __entry->paddr, __entry->size ) ); TRACE_EVENT(unmap, TP_PROTO(unsigned long iova, size_t size, size_t unmapped_size), TP_ARGS(iova, size, unmapped_size), TP_STRUCT__entry( __field(u64, iova) __field(size_t, size) __field(size_t, unmapped_size) ), TP_fast_assign( __entry->iova = iova; __entry->size = size; __entry->unmapped_size = unmapped_size; ), TP_printk("IOMMU: iova=0x%016llx - 0x%016llx size=%zu unmapped_size=%zu", __entry->iova, __entry->iova + __entry->size, __entry->size, __entry->unmapped_size ) ); DECLARE_EVENT_CLASS(iommu_error, TP_PROTO(struct device *dev, unsigned long iova, int flags), TP_ARGS(dev, iova, flags), TP_STRUCT__entry( __string(device, dev_name(dev)) __string(driver, dev_driver_string(dev)) __field(u64, iova) __field(int, flags) ), TP_fast_assign( __assign_str(device); __assign_str(driver); __entry->iova = iova; __entry->flags = flags; ), TP_printk("IOMMU:%s %s iova=0x%016llx flags=0x%04x", __get_str(driver), __get_str(device), __entry->iova, __entry->flags ) ); DEFINE_EVENT(iommu_error, io_page_fault, TP_PROTO(struct device *dev, unsigned long iova, int flags), TP_ARGS(dev, iova, flags) ); #endif /* _TRACE_IOMMU_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
| 11 6701 6712 150 41 41 41 5 5 1 147 174 19 174 171 174 19 111 9 111 110 111 110 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 | /* SPDX-License-Identifier: GPL-2.0+ */ /* * Driver for 8250/16550-type serial ports * * Based on drivers/char/serial.c, by Linus Torvalds, Theodore Ts'o. * * Copyright (C) 2001 Russell King. */ #include <linux/bits.h> #include <linux/serial_8250.h> #include <linux/serial_core.h> #include <linux/dmaengine.h> #include "../serial_mctrl_gpio.h" struct uart_8250_dma { int (*tx_dma)(struct uart_8250_port *p); int (*rx_dma)(struct uart_8250_port *p); void (*prepare_tx_dma)(struct uart_8250_port *p); void (*prepare_rx_dma)(struct uart_8250_port *p); /* Filter function */ dma_filter_fn fn; /* Parameter to the filter function */ void *rx_param; void *tx_param; struct dma_slave_config rxconf; struct dma_slave_config txconf; struct dma_chan *rxchan; struct dma_chan *txchan; /* Device address base for DMA operations */ phys_addr_t rx_dma_addr; phys_addr_t tx_dma_addr; /* DMA address of the buffer in memory */ dma_addr_t rx_addr; dma_addr_t tx_addr; dma_cookie_t rx_cookie; dma_cookie_t tx_cookie; void *rx_buf; size_t rx_size; size_t tx_size; unsigned char tx_running; unsigned char tx_err; unsigned char rx_running; }; struct old_serial_port { unsigned int uart; unsigned int baud_base; unsigned int port; unsigned int irq; upf_t flags; unsigned char io_type; unsigned char __iomem *iomem_base; unsigned short iomem_reg_shift; }; struct serial8250_config { const char *name; unsigned short fifo_size; unsigned short tx_loadsz; unsigned char fcr; unsigned char rxtrig_bytes[UART_FCR_R_TRIG_MAX_STATE]; unsigned int flags; }; #define UART_CAP_FIFO BIT(8) /* UART has FIFO */ #define UART_CAP_EFR BIT(9) /* UART has EFR */ #define UART_CAP_SLEEP BIT(10) /* UART has IER sleep */ #define UART_CAP_AFE BIT(11) /* MCR-based hw flow control */ #define UART_CAP_UUE BIT(12) /* UART needs IER bit 6 set (Xscale) */ #define UART_CAP_RTOIE BIT(13) /* UART needs IER bit 4 set (Xscale, Tegra) */ #define UART_CAP_HFIFO BIT(14) /* UART has a "hidden" FIFO */ #define UART_CAP_RPM BIT(15) /* Runtime PM is active while idle */ #define UART_CAP_IRDA BIT(16) /* UART supports IrDA line discipline */ #define UART_CAP_MINI BIT(17) /* Mini UART on BCM283X family lacks: * STOP PARITY EPAR SPAR WLEN5 WLEN6 */ #define UART_CAP_NOTEMT BIT(18) /* UART without interrupt on TEMT available */ #define UART_BUG_QUOT BIT(0) /* UART has buggy quot LSB */ #define UART_BUG_TXEN BIT(1) /* UART has buggy TX IIR status */ #define UART_BUG_NOMSR BIT(2) /* UART has buggy MSR status bits (Au1x00) */ #define UART_BUG_THRE BIT(3) /* UART has buggy THRE reassertion */ #define UART_BUG_TXRACE BIT(5) /* UART Tx fails to set remote DR */ /* Module parameters */ #define UART_NR CONFIG_SERIAL_8250_NR_UARTS extern unsigned int nr_uarts; #ifdef CONFIG_SERIAL_8250_SHARE_IRQ #define SERIAL8250_SHARE_IRQS 1 #else #define SERIAL8250_SHARE_IRQS 0 #endif extern unsigned int share_irqs; extern unsigned int skip_txen_test; #define SERIAL8250_PORT_FLAGS(_base, _irq, _flags) \ { \ .iobase = _base, \ .irq = _irq, \ .uartclk = 1843200, \ .iotype = UPIO_PORT, \ .flags = UPF_BOOT_AUTOCONF | (_flags), \ } #define SERIAL8250_PORT(_base, _irq) SERIAL8250_PORT_FLAGS(_base, _irq, 0) extern struct uart_driver serial8250_reg; void serial8250_register_ports(struct uart_driver *drv, struct device *dev); /* Legacy ISA bus related APIs */ typedef void (*serial8250_isa_config_fn)(int, struct uart_port *, u32 *); extern serial8250_isa_config_fn serial8250_isa_config; void serial8250_isa_init_ports(void); extern struct platform_device *serial8250_isa_devs; extern const struct uart_ops *univ8250_port_base_ops; extern struct uart_ops univ8250_port_ops; static inline int serial_in(struct uart_8250_port *up, int offset) { return up->port.serial_in(&up->port, offset); } static inline void serial_out(struct uart_8250_port *up, int offset, int value) { up->port.serial_out(&up->port, offset, value); } /** * serial_lsr_in - Read LSR register and preserve flags across reads * @up: uart 8250 port * * Read LSR register and handle saving non-preserved flags across reads. * The flags that are not preserved across reads are stored into * up->lsr_saved_flags. * * Returns LSR value or'ed with the preserved flags (if any). */ static inline u16 serial_lsr_in(struct uart_8250_port *up) { u16 lsr = up->lsr_saved_flags; lsr |= serial_in(up, UART_LSR); up->lsr_saved_flags = lsr & up->lsr_save_mask; return lsr; } /* * For the 16C950 */ static void serial_icr_write(struct uart_8250_port *up, int offset, int value) { serial_out(up, UART_SCR, offset); serial_out(up, UART_ICR, value); } static unsigned int __maybe_unused serial_icr_read(struct uart_8250_port *up, int offset) { unsigned int value; serial_icr_write(up, UART_ACR, up->acr | UART_ACR_ICRRD); serial_out(up, UART_SCR, offset); value = serial_in(up, UART_ICR); serial_icr_write(up, UART_ACR, up->acr); return value; } void serial8250_clear_and_reinit_fifos(struct uart_8250_port *p); static inline u32 serial_dl_read(struct uart_8250_port *up) { return up->dl_read(up); } static inline void serial_dl_write(struct uart_8250_port *up, u32 value) { up->dl_write(up, value); } static inline bool serial8250_set_THRI(struct uart_8250_port *up) { /* Port locked to synchronize UART_IER access against the console. */ lockdep_assert_held_once(&up->port.lock); if (up->ier & UART_IER_THRI) return false; up->ier |= UART_IER_THRI; serial_out(up, UART_IER, up->ier); return true; } static inline bool serial8250_clear_THRI(struct uart_8250_port *up) { /* Port locked to synchronize UART_IER access against the console. */ lockdep_assert_held_once(&up->port.lock); if (!(up->ier & UART_IER_THRI)) return false; up->ier &= ~UART_IER_THRI; serial_out(up, UART_IER, up->ier); return true; } struct uart_8250_port *serial8250_setup_port(int index); struct uart_8250_port *serial8250_get_port(int line); void serial8250_rpm_get(struct uart_8250_port *p); void serial8250_rpm_put(struct uart_8250_port *p); void serial8250_rpm_get_tx(struct uart_8250_port *p); void serial8250_rpm_put_tx(struct uart_8250_port *p); int serial8250_em485_config(struct uart_port *port, struct ktermios *termios, struct serial_rs485 *rs485); void serial8250_em485_start_tx(struct uart_8250_port *p); void serial8250_em485_stop_tx(struct uart_8250_port *p); void serial8250_em485_destroy(struct uart_8250_port *p); extern struct serial_rs485 serial8250_em485_supported; /* MCR <-> TIOCM conversion */ static inline int serial8250_TIOCM_to_MCR(int tiocm) { int mcr = 0; if (tiocm & TIOCM_RTS) mcr |= UART_MCR_RTS; if (tiocm & TIOCM_DTR) mcr |= UART_MCR_DTR; if (tiocm & TIOCM_OUT1) mcr |= UART_MCR_OUT1; if (tiocm & TIOCM_OUT2) mcr |= UART_MCR_OUT2; if (tiocm & TIOCM_LOOP) mcr |= UART_MCR_LOOP; return mcr; } static inline int serial8250_MCR_to_TIOCM(int mcr) { int tiocm = 0; if (mcr & UART_MCR_RTS) tiocm |= TIOCM_RTS; if (mcr & UART_MCR_DTR) tiocm |= TIOCM_DTR; if (mcr & UART_MCR_OUT1) tiocm |= TIOCM_OUT1; if (mcr & UART_MCR_OUT2) tiocm |= TIOCM_OUT2; if (mcr & UART_MCR_LOOP) tiocm |= TIOCM_LOOP; return tiocm; } /* MSR <-> TIOCM conversion */ static inline int serial8250_MSR_to_TIOCM(int msr) { int tiocm = 0; if (msr & UART_MSR_DCD) tiocm |= TIOCM_CAR; if (msr & UART_MSR_RI) tiocm |= TIOCM_RNG; if (msr & UART_MSR_DSR) tiocm |= TIOCM_DSR; if (msr & UART_MSR_CTS) tiocm |= TIOCM_CTS; return tiocm; } static inline void serial8250_out_MCR(struct uart_8250_port *up, int value) { serial_out(up, UART_MCR, value); if (up->gpios) mctrl_gpio_set(up->gpios, serial8250_MCR_to_TIOCM(value)); } static inline int serial8250_in_MCR(struct uart_8250_port *up) { int mctrl; mctrl = serial_in(up, UART_MCR); if (up->gpios) { unsigned int mctrl_gpio = 0; mctrl_gpio = mctrl_gpio_get_outputs(up->gpios, &mctrl_gpio); mctrl |= serial8250_TIOCM_to_MCR(mctrl_gpio); } return mctrl; } #ifdef CONFIG_SERIAL_8250_PNP int serial8250_pnp_init(void); void serial8250_pnp_exit(void); #else static inline int serial8250_pnp_init(void) { return 0; } static inline void serial8250_pnp_exit(void) { } #endif #ifdef CONFIG_SERIAL_8250_RSA void univ8250_rsa_support(struct uart_ops *ops); #else static inline void univ8250_rsa_support(struct uart_ops *ops) { } #endif #ifdef CONFIG_SERIAL_8250_FINTEK int fintek_8250_probe(struct uart_8250_port *uart); #else static inline int fintek_8250_probe(struct uart_8250_port *uart) { return 0; } #endif #ifdef CONFIG_ARCH_OMAP1 #include <linux/soc/ti/omap1-soc.h> static inline int is_omap1_8250(struct uart_8250_port *pt) { int res; switch (pt->port.mapbase) { case OMAP1_UART1_BASE: case OMAP1_UART2_BASE: case OMAP1_UART3_BASE: res = 1; break; default: res = 0; break; } return res; } static inline int is_omap1510_8250(struct uart_8250_port *pt) { if (!cpu_is_omap1510()) return 0; return is_omap1_8250(pt); } #else static inline int is_omap1_8250(struct uart_8250_port *pt) { return 0; } static inline int is_omap1510_8250(struct uart_8250_port *pt) { return 0; } #endif #ifdef CONFIG_SERIAL_8250_DMA extern int serial8250_tx_dma(struct uart_8250_port *); extern int serial8250_rx_dma(struct uart_8250_port *); extern void serial8250_rx_dma_flush(struct uart_8250_port *); extern int serial8250_request_dma(struct uart_8250_port *); extern void serial8250_release_dma(struct uart_8250_port *); static inline void serial8250_do_prepare_tx_dma(struct uart_8250_port *p) { struct uart_8250_dma *dma = p->dma; if (dma->prepare_tx_dma) dma->prepare_tx_dma(p); } static inline void serial8250_do_prepare_rx_dma(struct uart_8250_port *p) { struct uart_8250_dma *dma = p->dma; if (dma->prepare_rx_dma) dma->prepare_rx_dma(p); } static inline bool serial8250_tx_dma_running(struct uart_8250_port *p) { struct uart_8250_dma *dma = p->dma; return dma && dma->tx_running; } #else static inline int serial8250_tx_dma(struct uart_8250_port *p) { return -1; } static inline int serial8250_rx_dma(struct uart_8250_port *p) { return -1; } static inline void serial8250_rx_dma_flush(struct uart_8250_port *p) { } static inline int serial8250_request_dma(struct uart_8250_port *p) { return -1; } static inline void serial8250_release_dma(struct uart_8250_port *p) { } static inline bool serial8250_tx_dma_running(struct uart_8250_port *p) { return false; } #endif static inline int ns16550a_goto_highspeed(struct uart_8250_port *up) { unsigned char status; status = serial_in(up, 0x04); /* EXCR2 */ #define PRESL(x) ((x) & 0x30) if (PRESL(status) == 0x10) { /* already in high speed mode */ return 0; } else { status &= ~0xB0; /* Disable LOCK, mask out PRESL[01] */ status |= 0x10; /* 1.625 divisor for baud_base --> 921600 */ serial_out(up, 0x04, status); } return 1; } static inline int serial_index(struct uart_port *port) { return port->minor - 64; } |
| 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 | // SPDX-License-Identifier: GPL-2.0-or-later /* * A driver for the AverMedia MR 800 USB FM radio. This device plugs * into both the USB and an analog audio input, so this thing * only deals with initialization and frequency setting, the * audio data has to be handled by a sound driver. * * Copyright (c) 2008 Alexey Klimov <klimov.linux@gmail.com> */ /* * Big thanks to authors and contributors of dsbr100.c and radio-si470x.c * * When work was looked pretty good, i discover this: * http://av-usbradio.sourceforge.net/index.php * http://sourceforge.net/projects/av-usbradio/ * Latest release of theirs project was in 2005. * Probably, this driver could be improved through using their * achievements (specifications given). * Also, Faidon Liambotis <paravoid@debian.org> wrote nice driver for this radio * in 2007. He allowed to use his driver to improve current mr800 radio driver. * http://www.spinics.net/lists/linux-usb-devel/msg10109.html * * Version 0.01: First working version. * It's required to blacklist AverMedia USB Radio * in usbhid/hid-quirks.c * Version 0.10: A lot of cleanups and fixes: unpluging the device, * few mutex locks were added, codinstyle issues, etc. * Added stereo support. Thanks to * Douglas Schilling Landgraf <dougsland@gmail.com> and * David Ellingsworth <david@identd.dyndns.org> * for discussion, help and support. * Version 0.11: Converted to v4l2_device. * * Many things to do: * - Correct power management of device (suspend & resume) * - Add code for scanning and smooth tuning * - Add code for sensitivity value * - Correct mistakes * - In Japan another FREQ_MIN and FREQ_MAX */ /* kernel includes */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/input.h> #include <linux/videodev2.h> #include <media/v4l2-device.h> #include <media/v4l2-ioctl.h> #include <media/v4l2-ctrls.h> #include <media/v4l2-event.h> #include <linux/usb.h> #include <linux/mutex.h> /* driver and module definitions */ #define DRIVER_AUTHOR "Alexey Klimov <klimov.linux@gmail.com>" #define DRIVER_DESC "AverMedia MR 800 USB FM radio driver" #define DRIVER_VERSION "0.1.2" MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); MODULE_VERSION(DRIVER_VERSION); #define USB_AMRADIO_VENDOR 0x07ca #define USB_AMRADIO_PRODUCT 0xb800 /* dev_warn macro with driver name */ #define MR800_DRIVER_NAME "radio-mr800" #define amradio_dev_warn(dev, fmt, arg...) \ dev_warn(dev, MR800_DRIVER_NAME " - " fmt, ##arg) #define amradio_dev_err(dev, fmt, arg...) \ dev_err(dev, MR800_DRIVER_NAME " - " fmt, ##arg) /* Probably USB_TIMEOUT should be modified in module parameter */ #define BUFFER_LENGTH 8 #define USB_TIMEOUT 500 /* Frequency limits in MHz -- these are European values. For Japanese devices, that would be 76 and 91. */ #define FREQ_MIN 87.5 #define FREQ_MAX 108.0 #define FREQ_MUL 16000 /* * Commands that device should understand * List isn't full and will be updated with implementation of new functions */ #define AMRADIO_SET_FREQ 0xa4 #define AMRADIO_GET_READY_FLAG 0xa5 #define AMRADIO_GET_SIGNAL 0xa7 #define AMRADIO_GET_FREQ 0xa8 #define AMRADIO_SET_SEARCH_UP 0xa9 #define AMRADIO_SET_SEARCH_DOWN 0xaa #define AMRADIO_SET_MUTE 0xab #define AMRADIO_SET_RIGHT_MUTE 0xac #define AMRADIO_SET_LEFT_MUTE 0xad #define AMRADIO_SET_MONO 0xae #define AMRADIO_SET_SEARCH_LVL 0xb0 #define AMRADIO_STOP_SEARCH 0xb1 /* Comfortable defines for amradio_set_stereo */ #define WANT_STEREO 0x00 #define WANT_MONO 0x01 /* module parameter */ static int radio_nr = -1; module_param(radio_nr, int, 0); MODULE_PARM_DESC(radio_nr, "Radio Nr"); /* Data for one (physical) device */ struct amradio_device { /* reference to USB and video device */ struct usb_device *usbdev; struct usb_interface *intf; struct video_device vdev; struct v4l2_device v4l2_dev; struct v4l2_ctrl_handler hdl; u8 *buffer; struct mutex lock; /* buffer locking */ int curfreq; int stereo; int muted; }; static inline struct amradio_device *to_amradio_dev(struct v4l2_device *v4l2_dev) { return container_of(v4l2_dev, struct amradio_device, v4l2_dev); } static int amradio_send_cmd(struct amradio_device *radio, u8 cmd, u8 arg, u8 *extra, u8 extralen, bool reply) { int retval; int size; radio->buffer[0] = 0x00; radio->buffer[1] = 0x55; radio->buffer[2] = 0xaa; radio->buffer[3] = extralen; radio->buffer[4] = cmd; radio->buffer[5] = arg; radio->buffer[6] = 0x00; radio->buffer[7] = extra || reply ? 8 : 0; retval = usb_bulk_msg(radio->usbdev, usb_sndintpipe(radio->usbdev, 2), radio->buffer, BUFFER_LENGTH, &size, USB_TIMEOUT); if (retval < 0 || size != BUFFER_LENGTH) { if (video_is_registered(&radio->vdev)) amradio_dev_warn(&radio->vdev.dev, "cmd %02x failed\n", cmd); return retval ? retval : -EIO; } if (!extra && !reply) return 0; if (extra) { memcpy(radio->buffer, extra, extralen); memset(radio->buffer + extralen, 0, 8 - extralen); retval = usb_bulk_msg(radio->usbdev, usb_sndintpipe(radio->usbdev, 2), radio->buffer, BUFFER_LENGTH, &size, USB_TIMEOUT); } else { memset(radio->buffer, 0, 8); retval = usb_bulk_msg(radio->usbdev, usb_rcvbulkpipe(radio->usbdev, 0x81), radio->buffer, BUFFER_LENGTH, &size, USB_TIMEOUT); } if (retval == 0 && size == BUFFER_LENGTH) return 0; if (video_is_registered(&radio->vdev) && cmd != AMRADIO_GET_READY_FLAG) amradio_dev_warn(&radio->vdev.dev, "follow-up to cmd %02x failed\n", cmd); return retval ? retval : -EIO; } /* switch on/off the radio. Send 8 bytes to device */ static int amradio_set_mute(struct amradio_device *radio, bool mute) { int ret = amradio_send_cmd(radio, AMRADIO_SET_MUTE, mute, NULL, 0, false); if (!ret) radio->muted = mute; return ret; } /* set a frequency, freq is defined by v4l's TUNER_LOW, i.e. 1/16th kHz */ static int amradio_set_freq(struct amradio_device *radio, int freq) { unsigned short freq_send; u8 buf[3]; int retval; /* we need to be sure that frequency isn't out of range */ freq = clamp_t(unsigned, freq, FREQ_MIN * FREQ_MUL, FREQ_MAX * FREQ_MUL); freq_send = 0x10 + (freq >> 3) / 25; /* frequency is calculated from freq_send and placed in first 2 bytes */ buf[0] = (freq_send >> 8) & 0xff; buf[1] = freq_send & 0xff; buf[2] = 0x01; retval = amradio_send_cmd(radio, AMRADIO_SET_FREQ, 0, buf, 3, false); if (retval) return retval; radio->curfreq = freq; msleep(40); return 0; } static int amradio_set_stereo(struct amradio_device *radio, bool stereo) { int ret = amradio_send_cmd(radio, AMRADIO_SET_MONO, !stereo, NULL, 0, false); if (!ret) radio->stereo = stereo; return ret; } static int amradio_get_stat(struct amradio_device *radio, bool *is_stereo, u32 *signal) { int ret = amradio_send_cmd(radio, AMRADIO_GET_SIGNAL, 0, NULL, 0, true); if (ret) return ret; *is_stereo = radio->buffer[2] >> 7; *signal = (radio->buffer[3] & 0xf0) << 8; return 0; } /* Handle unplugging the device. * We call video_unregister_device in any case. * The last function called in this procedure is * usb_amradio_device_release. */ static void usb_amradio_disconnect(struct usb_interface *intf) { struct amradio_device *radio = to_amradio_dev(usb_get_intfdata(intf)); mutex_lock(&radio->lock); video_unregister_device(&radio->vdev); amradio_set_mute(radio, true); usb_set_intfdata(intf, NULL); v4l2_device_disconnect(&radio->v4l2_dev); mutex_unlock(&radio->lock); v4l2_device_put(&radio->v4l2_dev); } /* vidioc_querycap - query device capabilities */ static int vidioc_querycap(struct file *file, void *priv, struct v4l2_capability *v) { struct amradio_device *radio = video_drvdata(file); strscpy(v->driver, "radio-mr800", sizeof(v->driver)); strscpy(v->card, "AverMedia MR 800 USB FM Radio", sizeof(v->card)); usb_make_path(radio->usbdev, v->bus_info, sizeof(v->bus_info)); return 0; } /* vidioc_g_tuner - get tuner attributes */ static int vidioc_g_tuner(struct file *file, void *priv, struct v4l2_tuner *v) { struct amradio_device *radio = video_drvdata(file); bool is_stereo = false; int retval; if (v->index > 0) return -EINVAL; v->signal = 0; retval = amradio_get_stat(radio, &is_stereo, &v->signal); if (retval) return retval; strscpy(v->name, "FM", sizeof(v->name)); v->type = V4L2_TUNER_RADIO; v->rangelow = FREQ_MIN * FREQ_MUL; v->rangehigh = FREQ_MAX * FREQ_MUL; v->capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_STEREO | V4L2_TUNER_CAP_HWSEEK_WRAP; v->rxsubchans = is_stereo ? V4L2_TUNER_SUB_STEREO : V4L2_TUNER_SUB_MONO; v->audmode = radio->stereo ? V4L2_TUNER_MODE_STEREO : V4L2_TUNER_MODE_MONO; return 0; } /* vidioc_s_tuner - set tuner attributes */ static int vidioc_s_tuner(struct file *file, void *priv, const struct v4l2_tuner *v) { struct amradio_device *radio = video_drvdata(file); if (v->index > 0) return -EINVAL; /* mono/stereo selector */ switch (v->audmode) { case V4L2_TUNER_MODE_MONO: return amradio_set_stereo(radio, WANT_MONO); default: return amradio_set_stereo(radio, WANT_STEREO); } } /* vidioc_s_frequency - set tuner radio frequency */ static int vidioc_s_frequency(struct file *file, void *priv, const struct v4l2_frequency *f) { struct amradio_device *radio = video_drvdata(file); if (f->tuner != 0) return -EINVAL; return amradio_set_freq(radio, f->frequency); } /* vidioc_g_frequency - get tuner radio frequency */ static int vidioc_g_frequency(struct file *file, void *priv, struct v4l2_frequency *f) { struct amradio_device *radio = video_drvdata(file); if (f->tuner != 0 || f->type != V4L2_TUNER_RADIO) return -EINVAL; f->type = V4L2_TUNER_RADIO; f->frequency = radio->curfreq; return 0; } static int vidioc_s_hw_freq_seek(struct file *file, void *priv, const struct v4l2_hw_freq_seek *seek) { static u8 buf[8] = { 0x3d, 0x32, 0x0f, 0x08, 0x3d, 0x32, 0x0f, 0x08 }; struct amradio_device *radio = video_drvdata(file); unsigned long timeout; int retval; if (seek->tuner != 0 || !seek->wrap_around) return -EINVAL; if (file->f_flags & O_NONBLOCK) return -EWOULDBLOCK; retval = amradio_send_cmd(radio, AMRADIO_SET_SEARCH_LVL, 0, buf, 8, false); if (retval) return retval; amradio_set_freq(radio, radio->curfreq); retval = amradio_send_cmd(radio, seek->seek_upward ? AMRADIO_SET_SEARCH_UP : AMRADIO_SET_SEARCH_DOWN, 0, NULL, 0, false); if (retval) return retval; timeout = jiffies + msecs_to_jiffies(30000); for (;;) { if (time_after(jiffies, timeout)) { retval = -ENODATA; break; } if (schedule_timeout_interruptible(msecs_to_jiffies(10))) { retval = -ERESTARTSYS; break; } retval = amradio_send_cmd(radio, AMRADIO_GET_READY_FLAG, 0, NULL, 0, true); if (retval) continue; amradio_send_cmd(radio, AMRADIO_GET_FREQ, 0, NULL, 0, true); if (radio->buffer[1] || radio->buffer[2]) { /* To check: sometimes radio->curfreq is set to out of range value */ radio->curfreq = (radio->buffer[1] << 8) | radio->buffer[2]; radio->curfreq = (radio->curfreq - 0x10) * 200; amradio_send_cmd(radio, AMRADIO_STOP_SEARCH, 0, NULL, 0, false); amradio_set_freq(radio, radio->curfreq); retval = 0; break; } } amradio_send_cmd(radio, AMRADIO_STOP_SEARCH, 0, NULL, 0, false); amradio_set_freq(radio, radio->curfreq); return retval; } static int usb_amradio_s_ctrl(struct v4l2_ctrl *ctrl) { struct amradio_device *radio = container_of(ctrl->handler, struct amradio_device, hdl); switch (ctrl->id) { case V4L2_CID_AUDIO_MUTE: return amradio_set_mute(radio, ctrl->val); } return -EINVAL; } static int usb_amradio_init(struct amradio_device *radio) { int retval; retval = amradio_set_mute(radio, true); if (retval) goto out_err; retval = amradio_set_stereo(radio, true); if (retval) goto out_err; retval = amradio_set_freq(radio, radio->curfreq); if (retval) goto out_err; return 0; out_err: amradio_dev_err(&radio->vdev.dev, "initialization failed\n"); return retval; } /* Suspend device - stop device. Need to be checked and fixed */ static int usb_amradio_suspend(struct usb_interface *intf, pm_message_t message) { struct amradio_device *radio = to_amradio_dev(usb_get_intfdata(intf)); mutex_lock(&radio->lock); if (!radio->muted) { amradio_set_mute(radio, true); radio->muted = false; } mutex_unlock(&radio->lock); dev_info(&intf->dev, "going into suspend..\n"); return 0; } /* Resume device - start device. Need to be checked and fixed */ static int usb_amradio_resume(struct usb_interface *intf) { struct amradio_device *radio = to_amradio_dev(usb_get_intfdata(intf)); mutex_lock(&radio->lock); amradio_set_stereo(radio, radio->stereo); amradio_set_freq(radio, radio->curfreq); if (!radio->muted) amradio_set_mute(radio, false); mutex_unlock(&radio->lock); dev_info(&intf->dev, "coming out of suspend..\n"); return 0; } static const struct v4l2_ctrl_ops usb_amradio_ctrl_ops = { .s_ctrl = usb_amradio_s_ctrl, }; /* File system interface */ static const struct v4l2_file_operations usb_amradio_fops = { .owner = THIS_MODULE, .open = v4l2_fh_open, .release = v4l2_fh_release, .poll = v4l2_ctrl_poll, .unlocked_ioctl = video_ioctl2, }; static const struct v4l2_ioctl_ops usb_amradio_ioctl_ops = { .vidioc_querycap = vidioc_querycap, .vidioc_g_tuner = vidioc_g_tuner, .vidioc_s_tuner = vidioc_s_tuner, .vidioc_g_frequency = vidioc_g_frequency, .vidioc_s_frequency = vidioc_s_frequency, .vidioc_s_hw_freq_seek = vidioc_s_hw_freq_seek, .vidioc_log_status = v4l2_ctrl_log_status, .vidioc_subscribe_event = v4l2_ctrl_subscribe_event, .vidioc_unsubscribe_event = v4l2_event_unsubscribe, }; static void usb_amradio_release(struct v4l2_device *v4l2_dev) { struct amradio_device *radio = to_amradio_dev(v4l2_dev); /* free rest memory */ v4l2_ctrl_handler_free(&radio->hdl); v4l2_device_unregister(&radio->v4l2_dev); kfree(radio->buffer); kfree(radio); } /* check if the device is present and register with v4l and usb if it is */ static int usb_amradio_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct amradio_device *radio; int retval; radio = kzalloc(sizeof(struct amradio_device), GFP_KERNEL); if (!radio) { dev_err(&intf->dev, "kmalloc for amradio_device failed\n"); retval = -ENOMEM; goto err; } radio->buffer = kmalloc(BUFFER_LENGTH, GFP_KERNEL); if (!radio->buffer) { dev_err(&intf->dev, "kmalloc for radio->buffer failed\n"); retval = -ENOMEM; goto err_nobuf; } retval = v4l2_device_register(&intf->dev, &radio->v4l2_dev); if (retval < 0) { dev_err(&intf->dev, "couldn't register v4l2_device\n"); goto err_v4l2; } v4l2_ctrl_handler_init(&radio->hdl, 1); v4l2_ctrl_new_std(&radio->hdl, &usb_amradio_ctrl_ops, V4L2_CID_AUDIO_MUTE, 0, 1, 1, 1); if (radio->hdl.error) { retval = radio->hdl.error; dev_err(&intf->dev, "couldn't register control\n"); goto err_ctrl; } mutex_init(&radio->lock); radio->v4l2_dev.ctrl_handler = &radio->hdl; radio->v4l2_dev.release = usb_amradio_release; strscpy(radio->vdev.name, radio->v4l2_dev.name, sizeof(radio->vdev.name)); radio->vdev.v4l2_dev = &radio->v4l2_dev; radio->vdev.fops = &usb_amradio_fops; radio->vdev.ioctl_ops = &usb_amradio_ioctl_ops; radio->vdev.release = video_device_release_empty; radio->vdev.lock = &radio->lock; radio->vdev.device_caps = V4L2_CAP_RADIO | V4L2_CAP_TUNER | V4L2_CAP_HW_FREQ_SEEK; radio->usbdev = interface_to_usbdev(intf); radio->intf = intf; usb_set_intfdata(intf, &radio->v4l2_dev); radio->curfreq = 95.16 * FREQ_MUL; video_set_drvdata(&radio->vdev, radio); retval = usb_amradio_init(radio); if (retval) goto err_vdev; retval = video_register_device(&radio->vdev, VFL_TYPE_RADIO, radio_nr); if (retval < 0) { dev_err(&intf->dev, "could not register video device\n"); goto err_vdev; } return 0; err_vdev: v4l2_ctrl_handler_free(&radio->hdl); err_ctrl: v4l2_device_unregister(&radio->v4l2_dev); err_v4l2: kfree(radio->buffer); err_nobuf: kfree(radio); err: return retval; } /* USB Device ID List */ static const struct usb_device_id usb_amradio_device_table[] = { { USB_DEVICE_AND_INTERFACE_INFO(USB_AMRADIO_VENDOR, USB_AMRADIO_PRODUCT, USB_CLASS_HID, 0, 0) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, usb_amradio_device_table); /* USB subsystem interface */ static struct usb_driver usb_amradio_driver = { .name = MR800_DRIVER_NAME, .probe = usb_amradio_probe, .disconnect = usb_amradio_disconnect, .suspend = usb_amradio_suspend, .resume = usb_amradio_resume, .reset_resume = usb_amradio_resume, .id_table = usb_amradio_device_table, }; module_usb_driver(usb_amradio_driver); |
| 16 16 16 16 14 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 14 16 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 | // SPDX-License-Identifier: GPL-2.0-only /* * handle transition of Linux booting another kernel * Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com> */ #define pr_fmt(fmt) "kexec: " fmt #include <linux/mm.h> #include <linux/kexec.h> #include <linux/string.h> #include <linux/gfp.h> #include <linux/reboot.h> #include <linux/numa.h> #include <linux/ftrace.h> #include <linux/io.h> #include <linux/suspend.h> #include <linux/vmalloc.h> #include <linux/efi.h> #include <linux/cc_platform.h> #include <asm/init.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> #include <asm/io_apic.h> #include <asm/debugreg.h> #include <asm/kexec-bzimage64.h> #include <asm/setup.h> #include <asm/set_memory.h> #include <asm/cpu.h> #ifdef CONFIG_ACPI /* * Used while adding mapping for ACPI tables. * Can be reused when other iomem regions need be mapped */ struct init_pgtable_data { struct x86_mapping_info *info; pgd_t *level4p; }; static int mem_region_callback(struct resource *res, void *arg) { struct init_pgtable_data *data = arg; return kernel_ident_mapping_init(data->info, data->level4p, res->start, res->end + 1); } static int map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p) { struct init_pgtable_data data; unsigned long flags; int ret; data.info = info; data.level4p = level4p; flags = IORESOURCE_MEM | IORESOURCE_BUSY; ret = walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, &data, mem_region_callback); if (ret && ret != -EINVAL) return ret; /* ACPI tables could be located in ACPI Non-volatile Storage region */ ret = walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &data, mem_region_callback); if (ret && ret != -EINVAL) return ret; return 0; } #else static int map_acpi_tables(struct x86_mapping_info *info, pgd_t *level4p) { return 0; } #endif #ifdef CONFIG_KEXEC_FILE const struct kexec_file_ops * const kexec_file_loaders[] = { &kexec_bzImage64_ops, NULL }; #endif static int map_efi_systab(struct x86_mapping_info *info, pgd_t *level4p) { #ifdef CONFIG_EFI unsigned long mstart, mend; if (!efi_enabled(EFI_BOOT)) return 0; mstart = (boot_params.efi_info.efi_systab | ((u64)boot_params.efi_info.efi_systab_hi<<32)); if (efi_enabled(EFI_64BIT)) mend = mstart + sizeof(efi_system_table_64_t); else mend = mstart + sizeof(efi_system_table_32_t); if (!mstart) return 0; return kernel_ident_mapping_init(info, level4p, mstart, mend); #endif return 0; } static void free_transition_pgtable(struct kimage *image) { free_page((unsigned long)image->arch.p4d); image->arch.p4d = NULL; free_page((unsigned long)image->arch.pud); image->arch.pud = NULL; free_page((unsigned long)image->arch.pmd); image->arch.pmd = NULL; free_page((unsigned long)image->arch.pte); image->arch.pte = NULL; } static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) { pgprot_t prot = PAGE_KERNEL_EXEC_NOENC; unsigned long vaddr, paddr; int result = -ENOMEM; p4d_t *p4d; pud_t *pud; pmd_t *pmd; pte_t *pte; vaddr = (unsigned long)relocate_kernel; paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE); pgd += pgd_index(vaddr); if (!pgd_present(*pgd)) { p4d = (p4d_t *)get_zeroed_page(GFP_KERNEL); if (!p4d) goto err; image->arch.p4d = p4d; set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE)); } p4d = p4d_offset(pgd, vaddr); if (!p4d_present(*p4d)) { pud = (pud_t *)get_zeroed_page(GFP_KERNEL); if (!pud) goto err; image->arch.pud = pud; set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE)); } pud = pud_offset(p4d, vaddr); if (!pud_present(*pud)) { pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL); if (!pmd) goto err; image->arch.pmd = pmd; set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); } pmd = pmd_offset(pud, vaddr); if (!pmd_present(*pmd)) { pte = (pte_t *)get_zeroed_page(GFP_KERNEL); if (!pte) goto err; image->arch.pte = pte; set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); } pte = pte_offset_kernel(pmd, vaddr); if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) prot = PAGE_KERNEL_EXEC; set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot)); return 0; err: return result; } static void *alloc_pgt_page(void *data) { struct kimage *image = (struct kimage *)data; struct page *page; void *p = NULL; page = kimage_alloc_control_pages(image, 0); if (page) { p = page_address(page); clear_page(p); } return p; } static int init_pgtable(struct kimage *image, unsigned long start_pgtable) { struct x86_mapping_info info = { .alloc_pgt_page = alloc_pgt_page, .context = image, .page_flag = __PAGE_KERNEL_LARGE_EXEC, .kernpg_flag = _KERNPG_TABLE_NOENC, }; unsigned long mstart, mend; pgd_t *level4p; int result; int i; level4p = (pgd_t *)__va(start_pgtable); clear_page(level4p); if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { info.page_flag |= _PAGE_ENC; info.kernpg_flag |= _PAGE_ENC; } if (direct_gbpages) info.direct_gbpages = true; for (i = 0; i < nr_pfn_mapped; i++) { mstart = pfn_mapped[i].start << PAGE_SHIFT; mend = pfn_mapped[i].end << PAGE_SHIFT; result = kernel_ident_mapping_init(&info, level4p, mstart, mend); if (result) return result; } /* * segments's mem ranges could be outside 0 ~ max_pfn, * for example when jump back to original kernel from kexeced kernel. * or first kernel is booted with user mem map, and second kernel * could be loaded out of that range. */ for (i = 0; i < image->nr_segments; i++) { mstart = image->segment[i].mem; mend = mstart + image->segment[i].memsz; result = kernel_ident_mapping_init(&info, level4p, mstart, mend); if (result) return result; } /* * Prepare EFI systab and ACPI tables for kexec kernel since they are * not covered by pfn_mapped. */ result = map_efi_systab(&info, level4p); if (result) return result; result = map_acpi_tables(&info, level4p); if (result) return result; return init_transition_pgtable(image, level4p); } static void load_segments(void) { __asm__ __volatile__ ( "\tmovl %0,%%ds\n" "\tmovl %0,%%es\n" "\tmovl %0,%%ss\n" "\tmovl %0,%%fs\n" "\tmovl %0,%%gs\n" : : "a" (__KERNEL_DS) : "memory" ); } int machine_kexec_prepare(struct kimage *image) { unsigned long start_pgtable; int result; /* Calculate the offsets */ start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; /* Setup the identity mapped 64bit page table */ result = init_pgtable(image, start_pgtable); if (result) return result; return 0; } void machine_kexec_cleanup(struct kimage *image) { free_transition_pgtable(image); } /* * Do not allocate memory (or fail in any way) in machine_kexec(). * We are past the point of no return, committed to rebooting now. */ void machine_kexec(struct kimage *image) { unsigned long page_list[PAGES_NR]; unsigned int host_mem_enc_active; int save_ftrace_enabled; void *control_page; /* * This must be done before load_segments() since if call depth tracking * is used then GS must be valid to make any function calls. */ host_mem_enc_active = cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT); #ifdef CONFIG_KEXEC_JUMP if (image->preserve_context) save_processor_state(); #endif save_ftrace_enabled = __ftrace_enabled_save(); /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); hw_breakpoint_disable(); cet_disable(); if (image->preserve_context) { #ifdef CONFIG_X86_IO_APIC /* * We need to put APICs in legacy mode so that we can * get timer interrupts in second kernel. kexec/kdump * paths already have calls to restore_boot_irq_mode() * in one form or other. kexec jump path also need one. */ clear_IO_APIC(); restore_boot_irq_mode(); #endif } control_page = page_address(image->control_code_page) + PAGE_SIZE; __memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; page_list[PA_TABLE_PAGE] = (unsigned long)__pa(page_address(image->control_code_page)); if (image->type == KEXEC_TYPE_DEFAULT) page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT); /* * The segment registers are funny things, they have both a * visible and an invisible part. Whenever the visible part is * set to a specific selector, the invisible part is loaded * with from a table in memory. At no other time is the * descriptor table in memory accessed. * * I take advantage of this here by force loading the * segments, before I zap the gdt with an invalid value. */ load_segments(); /* * The gdt & idt are now invalid. * If you want to load them you must set up your own idt & gdt. */ native_idt_invalidate(); native_gdt_invalidate(); /* now call it */ image->start = relocate_kernel((unsigned long)image->head, (unsigned long)page_list, image->start, image->preserve_context, host_mem_enc_active); #ifdef CONFIG_KEXEC_JUMP if (image->preserve_context) restore_processor_state(); #endif __ftrace_enabled_restore(save_ftrace_enabled); } /* arch-dependent functionality related to kexec file-based syscall */ #ifdef CONFIG_KEXEC_FILE /* * Apply purgatory relocations. * * @pi: Purgatory to be relocated. * @section: Section relocations applying to. * @relsec: Section containing RELAs. * @symtabsec: Corresponding symtab. * * TODO: Some of the code belongs to generic code. Move that in kexec.c. */ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, Elf_Shdr *section, const Elf_Shdr *relsec, const Elf_Shdr *symtabsec) { unsigned int i; Elf64_Rela *rel; Elf64_Sym *sym; void *location; unsigned long address, sec_base, value; const char *strtab, *name, *shstrtab; const Elf_Shdr *sechdrs; /* String & section header string table */ sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff; strtab = (char *)pi->ehdr + sechdrs[symtabsec->sh_link].sh_offset; shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset; rel = (void *)pi->ehdr + relsec->sh_offset; pr_debug("Applying relocate section %s to %u\n", shstrtab + relsec->sh_name, relsec->sh_info); for (i = 0; i < relsec->sh_size / sizeof(*rel); i++) { /* * rel[i].r_offset contains byte offset from beginning * of section to the storage unit affected. * * This is location to update. This is temporary buffer * where section is currently loaded. This will finally be * loaded to a different address later, pointed to by * ->sh_addr. kexec takes care of moving it * (kexec_load_segment()). */ location = pi->purgatory_buf; location += section->sh_offset; location += rel[i].r_offset; /* Final address of the location */ address = section->sh_addr + rel[i].r_offset; /* * rel[i].r_info contains information about symbol table index * w.r.t which relocation must be made and type of relocation * to apply. ELF64_R_SYM() and ELF64_R_TYPE() macros get * these respectively. */ sym = (void *)pi->ehdr + symtabsec->sh_offset; sym += ELF64_R_SYM(rel[i].r_info); if (sym->st_name) name = strtab + sym->st_name; else name = shstrtab + sechdrs[sym->st_shndx].sh_name; pr_debug("Symbol: %s info: %02x shndx: %02x value=%llx size: %llx\n", name, sym->st_info, sym->st_shndx, sym->st_value, sym->st_size); if (sym->st_shndx == SHN_UNDEF) { pr_err("Undefined symbol: %s\n", name); return -ENOEXEC; } if (sym->st_shndx == SHN_COMMON) { pr_err("symbol '%s' in common section\n", name); return -ENOEXEC; } if (sym->st_shndx == SHN_ABS) sec_base = 0; else if (sym->st_shndx >= pi->ehdr->e_shnum) { pr_err("Invalid section %d for symbol %s\n", sym->st_shndx, name); return -ENOEXEC; } else sec_base = pi->sechdrs[sym->st_shndx].sh_addr; value = sym->st_value; value += sec_base; value += rel[i].r_addend; switch (ELF64_R_TYPE(rel[i].r_info)) { case R_X86_64_NONE: break; case R_X86_64_64: *(u64 *)location = value; break; case R_X86_64_32: *(u32 *)location = value; if (value != *(u32 *)location) goto overflow; break; case R_X86_64_32S: *(s32 *)location = value; if ((s64)value != *(s32 *)location) goto overflow; break; case R_X86_64_PC32: case R_X86_64_PLT32: value -= (u64)address; *(u32 *)location = value; break; default: pr_err("Unknown rela relocation: %llu\n", ELF64_R_TYPE(rel[i].r_info)); return -ENOEXEC; } } return 0; overflow: pr_err("Overflow in relocation type %d value 0x%lx\n", (int)ELF64_R_TYPE(rel[i].r_info), value); return -ENOEXEC; } int arch_kimage_file_post_load_cleanup(struct kimage *image) { vfree(image->elf_headers); image->elf_headers = NULL; image->elf_headers_sz = 0; return kexec_image_post_load_cleanup_default(image); } #endif /* CONFIG_KEXEC_FILE */ #ifdef CONFIG_CRASH_DUMP static int kexec_mark_range(unsigned long start, unsigned long end, bool protect) { struct page *page; unsigned int nr_pages; /* * For physical range: [start, end]. We must skip the unassigned * crashk resource with zero-valued "end" member. */ if (!end || start > end) return 0; page = pfn_to_page(start >> PAGE_SHIFT); nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1; if (protect) return set_pages_ro(page, nr_pages); else return set_pages_rw(page, nr_pages); } static void kexec_mark_crashkres(bool protect) { unsigned long control; kexec_mark_range(crashk_low_res.start, crashk_low_res.end, protect); /* Don't touch the control code page used in crash_kexec().*/ control = PFN_PHYS(page_to_pfn(kexec_crash_image->control_code_page)); /* Control code page is located in the 2nd page. */ kexec_mark_range(crashk_res.start, control + PAGE_SIZE - 1, protect); control += KEXEC_CONTROL_PAGE_SIZE; kexec_mark_range(control, crashk_res.end, protect); } void arch_kexec_protect_crashkres(void) { kexec_mark_crashkres(true); } void arch_kexec_unprotect_crashkres(void) { kexec_mark_crashkres(false); } #endif /* * During a traditional boot under SME, SME will encrypt the kernel, * so the SME kexec kernel also needs to be un-encrypted in order to * replicate a normal SME boot. * * During a traditional boot under SEV, the kernel has already been * loaded encrypted, so the SEV kexec kernel needs to be encrypted in * order to replicate a normal SEV boot. */ int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages, gfp_t gfp) { if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) return 0; /* * If host memory encryption is active we need to be sure that kexec * pages are not encrypted because when we boot to the new kernel the * pages won't be accessed encrypted (initially). */ return set_memory_decrypted((unsigned long)vaddr, pages); } void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) return; /* * If host memory encryption is active we need to reset the pages back * to being an encrypted mapping before freeing them. */ set_memory_encrypted((unsigned long)vaddr, pages); } |
| 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 | // SPDX-License-Identifier: GPL-2.0-or-later /* * USB SD Host Controller (USHC) controller driver. * * Copyright (C) 2010 Cambridge Silicon Radio Ltd. * * Notes: * - Only version 2 devices are supported. * - Version 2 devices only support SDIO cards/devices (R2 response is * unsupported). * * References: * [USHC] USB SD Host Controller specification (CS-118793-SP) */ #include <linux/module.h> #include <linux/usb.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/dma-mapping.h> #include <linux/mmc/host.h> enum ushc_request { USHC_GET_CAPS = 0x00, USHC_HOST_CTRL = 0x01, USHC_PWR_CTRL = 0x02, USHC_CLK_FREQ = 0x03, USHC_EXEC_CMD = 0x04, USHC_READ_RESP = 0x05, USHC_RESET = 0x06, }; enum ushc_request_type { USHC_GET_CAPS_TYPE = USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, USHC_HOST_CTRL_TYPE = USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, USHC_PWR_CTRL_TYPE = USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, USHC_CLK_FREQ_TYPE = USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, USHC_EXEC_CMD_TYPE = USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, USHC_READ_RESP_TYPE = USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, USHC_RESET_TYPE = USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, }; #define USHC_GET_CAPS_VERSION_MASK 0xff #define USHC_GET_CAPS_3V3 (1 << 8) #define USHC_GET_CAPS_3V0 (1 << 9) #define USHC_GET_CAPS_1V8 (1 << 10) #define USHC_GET_CAPS_HIGH_SPD (1 << 16) #define USHC_HOST_CTRL_4BIT (1 << 1) #define USHC_HOST_CTRL_HIGH_SPD (1 << 0) #define USHC_PWR_CTRL_OFF 0x00 #define USHC_PWR_CTRL_3V3 0x01 #define USHC_PWR_CTRL_3V0 0x02 #define USHC_PWR_CTRL_1V8 0x03 #define USHC_READ_RESP_BUSY (1 << 4) #define USHC_READ_RESP_ERR_TIMEOUT (1 << 3) #define USHC_READ_RESP_ERR_CRC (1 << 2) #define USHC_READ_RESP_ERR_DAT (1 << 1) #define USHC_READ_RESP_ERR_CMD (1 << 0) #define USHC_READ_RESP_ERR_MASK 0x0f struct ushc_cbw { __u8 signature; __u8 cmd_idx; __le16 block_size; __le32 arg; } __attribute__((packed)); #define USHC_CBW_SIGNATURE 'C' struct ushc_csw { __u8 signature; __u8 status; __le32 response; } __attribute__((packed)); #define USHC_CSW_SIGNATURE 'S' struct ushc_int_data { u8 status; u8 reserved[3]; }; #define USHC_INT_STATUS_SDIO_INT (1 << 1) #define USHC_INT_STATUS_CARD_PRESENT (1 << 0) struct ushc_data { struct usb_device *usb_dev; struct mmc_host *mmc; struct urb *int_urb; struct ushc_int_data *int_data; struct urb *cbw_urb; struct ushc_cbw *cbw; struct urb *data_urb; struct urb *csw_urb; struct ushc_csw *csw; spinlock_t lock; struct mmc_request *current_req; u32 caps; u16 host_ctrl; unsigned long flags; u8 last_status; int clock_freq; }; #define DISCONNECTED 0 #define INT_EN 1 #define IGNORE_NEXT_INT 2 static void data_callback(struct urb *urb); static int ushc_hw_reset(struct ushc_data *ushc) { return usb_control_msg(ushc->usb_dev, usb_sndctrlpipe(ushc->usb_dev, 0), USHC_RESET, USHC_RESET_TYPE, 0, 0, NULL, 0, 100); } static int ushc_hw_get_caps(struct ushc_data *ushc) { int ret; int version; ret = usb_control_msg(ushc->usb_dev, usb_rcvctrlpipe(ushc->usb_dev, 0), USHC_GET_CAPS, USHC_GET_CAPS_TYPE, 0, 0, &ushc->caps, sizeof(ushc->caps), 100); if (ret < 0) return ret; ushc->caps = le32_to_cpu(ushc->caps); version = ushc->caps & USHC_GET_CAPS_VERSION_MASK; if (version != 0x02) { dev_err(&ushc->usb_dev->dev, "controller version %d is not supported\n", version); return -EINVAL; } return 0; } static int ushc_hw_set_host_ctrl(struct ushc_data *ushc, u16 mask, u16 val) { u16 host_ctrl; int ret; host_ctrl = (ushc->host_ctrl & ~mask) | val; ret = usb_control_msg(ushc->usb_dev, usb_sndctrlpipe(ushc->usb_dev, 0), USHC_HOST_CTRL, USHC_HOST_CTRL_TYPE, host_ctrl, 0, NULL, 0, 100); if (ret < 0) return ret; ushc->host_ctrl = host_ctrl; return 0; } static void int_callback(struct urb *urb) { struct ushc_data *ushc = urb->context; u8 status, last_status; if (urb->status < 0) return; status = ushc->int_data->status; last_status = ushc->last_status; ushc->last_status = status; /* * Ignore the card interrupt status on interrupt transfers that * were submitted while card interrupts where disabled. * * This avoid occasional spurious interrupts when enabling * interrupts immediately after clearing the source on the card. */ if (!test_and_clear_bit(IGNORE_NEXT_INT, &ushc->flags) && test_bit(INT_EN, &ushc->flags) && status & USHC_INT_STATUS_SDIO_INT) { mmc_signal_sdio_irq(ushc->mmc); } if ((status ^ last_status) & USHC_INT_STATUS_CARD_PRESENT) mmc_detect_change(ushc->mmc, msecs_to_jiffies(100)); if (!test_bit(INT_EN, &ushc->flags)) set_bit(IGNORE_NEXT_INT, &ushc->flags); usb_submit_urb(ushc->int_urb, GFP_ATOMIC); } static void cbw_callback(struct urb *urb) { struct ushc_data *ushc = urb->context; if (urb->status != 0) { usb_unlink_urb(ushc->data_urb); usb_unlink_urb(ushc->csw_urb); } } static void data_callback(struct urb *urb) { struct ushc_data *ushc = urb->context; if (urb->status != 0) usb_unlink_urb(ushc->csw_urb); } static void csw_callback(struct urb *urb) { struct ushc_data *ushc = urb->context; struct mmc_request *req = ushc->current_req; int status; status = ushc->csw->status; if (urb->status != 0) { req->cmd->error = urb->status; } else if (status & USHC_READ_RESP_ERR_CMD) { if (status & USHC_READ_RESP_ERR_CRC) req->cmd->error = -EIO; else req->cmd->error = -ETIMEDOUT; } if (req->data) { if (status & USHC_READ_RESP_ERR_DAT) { if (status & USHC_READ_RESP_ERR_CRC) req->data->error = -EIO; else req->data->error = -ETIMEDOUT; req->data->bytes_xfered = 0; } else { req->data->bytes_xfered = req->data->blksz * req->data->blocks; } } req->cmd->resp[0] = le32_to_cpu(ushc->csw->response); mmc_request_done(ushc->mmc, req); } static void ushc_request(struct mmc_host *mmc, struct mmc_request *req) { struct ushc_data *ushc = mmc_priv(mmc); int ret; unsigned long flags; spin_lock_irqsave(&ushc->lock, flags); if (test_bit(DISCONNECTED, &ushc->flags)) { ret = -ENODEV; goto out; } /* Version 2 firmware doesn't support the R2 response format. */ if (req->cmd->flags & MMC_RSP_136) { ret = -EINVAL; goto out; } /* The Astoria's data FIFOs don't work with clock speeds < 5MHz so limit commands with data to 6MHz or more. */ if (req->data && ushc->clock_freq < 6000000) { ret = -EINVAL; goto out; } ushc->current_req = req; /* Start cmd with CBW. */ ushc->cbw->cmd_idx = cpu_to_le16(req->cmd->opcode); if (req->data) ushc->cbw->block_size = cpu_to_le16(req->data->blksz); else ushc->cbw->block_size = 0; ushc->cbw->arg = cpu_to_le32(req->cmd->arg); ret = usb_submit_urb(ushc->cbw_urb, GFP_ATOMIC); if (ret < 0) goto out; /* Submit data (if any). */ if (req->data) { struct mmc_data *data = req->data; int pipe; if (data->flags & MMC_DATA_READ) pipe = usb_rcvbulkpipe(ushc->usb_dev, 6); else pipe = usb_sndbulkpipe(ushc->usb_dev, 2); usb_fill_bulk_urb(ushc->data_urb, ushc->usb_dev, pipe, NULL, data->sg->length, data_callback, ushc); ushc->data_urb->num_sgs = 1; ushc->data_urb->sg = data->sg; ret = usb_submit_urb(ushc->data_urb, GFP_ATOMIC); if (ret < 0) goto out; } /* Submit CSW. */ ret = usb_submit_urb(ushc->csw_urb, GFP_ATOMIC); out: spin_unlock_irqrestore(&ushc->lock, flags); if (ret < 0) { usb_unlink_urb(ushc->cbw_urb); usb_unlink_urb(ushc->data_urb); req->cmd->error = ret; mmc_request_done(mmc, req); } } static int ushc_set_power(struct ushc_data *ushc, unsigned char power_mode) { u16 voltage; switch (power_mode) { case MMC_POWER_OFF: voltage = USHC_PWR_CTRL_OFF; break; case MMC_POWER_UP: case MMC_POWER_ON: voltage = USHC_PWR_CTRL_3V3; break; default: return -EINVAL; } return usb_control_msg(ushc->usb_dev, usb_sndctrlpipe(ushc->usb_dev, 0), USHC_PWR_CTRL, USHC_PWR_CTRL_TYPE, voltage, 0, NULL, 0, 100); } static int ushc_set_bus_width(struct ushc_data *ushc, int bus_width) { return ushc_hw_set_host_ctrl(ushc, USHC_HOST_CTRL_4BIT, bus_width == 4 ? USHC_HOST_CTRL_4BIT : 0); } static int ushc_set_bus_freq(struct ushc_data *ushc, int clk, bool enable_hs) { int ret; /* Hardware can't detect interrupts while the clock is off. */ if (clk == 0) clk = 400000; ret = ushc_hw_set_host_ctrl(ushc, USHC_HOST_CTRL_HIGH_SPD, enable_hs ? USHC_HOST_CTRL_HIGH_SPD : 0); if (ret < 0) return ret; ret = usb_control_msg(ushc->usb_dev, usb_sndctrlpipe(ushc->usb_dev, 0), USHC_CLK_FREQ, USHC_CLK_FREQ_TYPE, clk & 0xffff, (clk >> 16) & 0xffff, NULL, 0, 100); if (ret < 0) return ret; ushc->clock_freq = clk; return 0; } static void ushc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) { struct ushc_data *ushc = mmc_priv(mmc); ushc_set_power(ushc, ios->power_mode); ushc_set_bus_width(ushc, 1 << ios->bus_width); ushc_set_bus_freq(ushc, ios->clock, ios->timing == MMC_TIMING_SD_HS); } static int ushc_get_cd(struct mmc_host *mmc) { struct ushc_data *ushc = mmc_priv(mmc); return !!(ushc->last_status & USHC_INT_STATUS_CARD_PRESENT); } static void ushc_enable_sdio_irq(struct mmc_host *mmc, int enable) { struct ushc_data *ushc = mmc_priv(mmc); if (enable) set_bit(INT_EN, &ushc->flags); else clear_bit(INT_EN, &ushc->flags); } static void ushc_clean_up(struct ushc_data *ushc) { usb_free_urb(ushc->int_urb); usb_free_urb(ushc->csw_urb); usb_free_urb(ushc->data_urb); usb_free_urb(ushc->cbw_urb); kfree(ushc->int_data); kfree(ushc->cbw); kfree(ushc->csw); mmc_free_host(ushc->mmc); } static const struct mmc_host_ops ushc_ops = { .request = ushc_request, .set_ios = ushc_set_ios, .get_cd = ushc_get_cd, .enable_sdio_irq = ushc_enable_sdio_irq, }; static int ushc_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *usb_dev = interface_to_usbdev(intf); struct mmc_host *mmc; struct ushc_data *ushc; int ret; if (intf->cur_altsetting->desc.bNumEndpoints < 1) return -ENODEV; mmc = mmc_alloc_host(sizeof(struct ushc_data), &intf->dev); if (mmc == NULL) return -ENOMEM; ushc = mmc_priv(mmc); usb_set_intfdata(intf, ushc); ushc->usb_dev = usb_dev; ushc->mmc = mmc; spin_lock_init(&ushc->lock); ret = ushc_hw_reset(ushc); if (ret < 0) goto err; /* Read capabilities. */ ret = ushc_hw_get_caps(ushc); if (ret < 0) goto err; mmc->ops = &ushc_ops; mmc->f_min = 400000; mmc->f_max = 50000000; mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_SDIO_IRQ; mmc->caps |= (ushc->caps & USHC_GET_CAPS_HIGH_SPD) ? MMC_CAP_SD_HIGHSPEED : 0; mmc->max_seg_size = 512*511; mmc->max_segs = 1; mmc->max_req_size = 512*511; mmc->max_blk_size = 512; mmc->max_blk_count = 511; ushc->int_urb = usb_alloc_urb(0, GFP_KERNEL); if (ushc->int_urb == NULL) { ret = -ENOMEM; goto err; } ushc->int_data = kzalloc(sizeof(struct ushc_int_data), GFP_KERNEL); if (ushc->int_data == NULL) { ret = -ENOMEM; goto err; } usb_fill_int_urb(ushc->int_urb, ushc->usb_dev, usb_rcvintpipe(usb_dev, intf->cur_altsetting->endpoint[0].desc.bEndpointAddress), ushc->int_data, sizeof(struct ushc_int_data), int_callback, ushc, intf->cur_altsetting->endpoint[0].desc.bInterval); ushc->cbw_urb = usb_alloc_urb(0, GFP_KERNEL); if (ushc->cbw_urb == NULL) { ret = -ENOMEM; goto err; } ushc->cbw = kzalloc(sizeof(struct ushc_cbw), GFP_KERNEL); if (ushc->cbw == NULL) { ret = -ENOMEM; goto err; } ushc->cbw->signature = USHC_CBW_SIGNATURE; usb_fill_bulk_urb(ushc->cbw_urb, ushc->usb_dev, usb_sndbulkpipe(usb_dev, 2), ushc->cbw, sizeof(struct ushc_cbw), cbw_callback, ushc); ushc->data_urb = usb_alloc_urb(0, GFP_KERNEL); if (ushc->data_urb == NULL) { ret = -ENOMEM; goto err; } ushc->csw_urb = usb_alloc_urb(0, GFP_KERNEL); if (ushc->csw_urb == NULL) { ret = -ENOMEM; goto err; } ushc->csw = kzalloc(sizeof(struct ushc_csw), GFP_KERNEL); if (ushc->csw == NULL) { ret = -ENOMEM; goto err; } usb_fill_bulk_urb(ushc->csw_urb, ushc->usb_dev, usb_rcvbulkpipe(usb_dev, 6), ushc->csw, sizeof(struct ushc_csw), csw_callback, ushc); ret = mmc_add_host(ushc->mmc); if (ret) goto err; ret = usb_submit_urb(ushc->int_urb, GFP_KERNEL); if (ret < 0) { mmc_remove_host(ushc->mmc); goto err; } return 0; err: ushc_clean_up(ushc); return ret; } static void ushc_disconnect(struct usb_interface *intf) { struct ushc_data *ushc = usb_get_intfdata(intf); spin_lock_irq(&ushc->lock); set_bit(DISCONNECTED, &ushc->flags); spin_unlock_irq(&ushc->lock); usb_kill_urb(ushc->int_urb); usb_kill_urb(ushc->cbw_urb); usb_kill_urb(ushc->data_urb); usb_kill_urb(ushc->csw_urb); mmc_remove_host(ushc->mmc); ushc_clean_up(ushc); } static struct usb_device_id ushc_id_table[] = { /* CSR USB SD Host Controller */ { USB_DEVICE(0x0a12, 0x5d10) }, { }, }; MODULE_DEVICE_TABLE(usb, ushc_id_table); static struct usb_driver ushc_driver = { .name = "ushc", .id_table = ushc_id_table, .probe = ushc_probe, .disconnect = ushc_disconnect, }; module_usb_driver(ushc_driver); MODULE_DESCRIPTION("USB SD Host Controller driver"); MODULE_AUTHOR("David Vrabel <david.vrabel@csr.com>"); MODULE_LICENSE("GPL"); |
| 2 2 1 1 1 2 1 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 | // SPDX-License-Identifier: GPL-2.0-or-later /* * HID driver for Saitek devices. * * PS1000 (USB gamepad): * Fixes the HID report descriptor by removing a non-existent axis and * clearing the constant bit on the input reports for buttons and d-pad. * (This module is based on "hid-ortek".) * Copyright (c) 2012 Andreas Hübner * * R.A.T.7, R.A.T.9, M.M.O.7 (USB gaming mice): * Fixes the mode button which cycles through three constantly pressed * buttons. All three press events are mapped to one button and the * missing release event is generated immediately. */ /* */ #include <linux/device.h> #include <linux/hid.h> #include <linux/module.h> #include <linux/kernel.h> #include "hid-ids.h" #define SAITEK_FIX_PS1000 0x0001 #define SAITEK_RELEASE_MODE_RAT7 0x0002 #define SAITEK_RELEASE_MODE_MMO7 0x0004 struct saitek_sc { unsigned long quirks; int mode; }; static int saitek_probe(struct hid_device *hdev, const struct hid_device_id *id) { unsigned long quirks = id->driver_data; struct saitek_sc *ssc; int ret; ssc = devm_kzalloc(&hdev->dev, sizeof(*ssc), GFP_KERNEL); if (ssc == NULL) { hid_err(hdev, "can't alloc saitek descriptor\n"); return -ENOMEM; } ssc->quirks = quirks; ssc->mode = -1; hid_set_drvdata(hdev, ssc); ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); return ret; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); if (ret) { hid_err(hdev, "hw start failed\n"); return ret; } return 0; } static __u8 *saitek_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { struct saitek_sc *ssc = hid_get_drvdata(hdev); if ((ssc->quirks & SAITEK_FIX_PS1000) && *rsize == 137 && rdesc[20] == 0x09 && rdesc[21] == 0x33 && rdesc[94] == 0x81 && rdesc[95] == 0x03 && rdesc[110] == 0x81 && rdesc[111] == 0x03) { hid_info(hdev, "Fixing up Saitek PS1000 report descriptor\n"); /* convert spurious axis to a "noop" Logical Minimum (0) */ rdesc[20] = 0x15; rdesc[21] = 0x00; /* clear constant bit on buttons and d-pad */ rdesc[95] = 0x02; rdesc[111] = 0x02; } return rdesc; } static int saitek_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *raw_data, int size) { struct saitek_sc *ssc = hid_get_drvdata(hdev); if (ssc->quirks & SAITEK_RELEASE_MODE_RAT7 && size == 7) { /* R.A.T.7 uses bits 13, 14, 15 for the mode */ int mode = -1; if (raw_data[1] & 0x01) mode = 0; else if (raw_data[1] & 0x02) mode = 1; else if (raw_data[1] & 0x04) mode = 2; /* clear mode bits */ raw_data[1] &= ~0x07; if (mode != ssc->mode) { hid_dbg(hdev, "entered mode %d\n", mode); if (ssc->mode != -1) { /* use bit 13 as the mode button */ raw_data[1] |= 0x04; } ssc->mode = mode; } } else if (ssc->quirks & SAITEK_RELEASE_MODE_MMO7 && size == 8) { /* M.M.O.7 uses bits 8, 22, 23 for the mode */ int mode = -1; if (raw_data[1] & 0x80) mode = 0; else if (raw_data[2] & 0x01) mode = 1; else if (raw_data[2] & 0x02) mode = 2; /* clear mode bits */ raw_data[1] &= ~0x80; raw_data[2] &= ~0x03; if (mode != ssc->mode) { hid_dbg(hdev, "entered mode %d\n", mode); if (ssc->mode != -1) { /* use bit 8 as the mode button, bits 22 * and 23 do not represent buttons * according to the HID report descriptor */ raw_data[1] |= 0x80; } ssc->mode = mode; } } return 0; } static int saitek_event(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage, __s32 value) { struct saitek_sc *ssc = hid_get_drvdata(hdev); struct input_dev *input = field->hidinput->input; if (usage->type == EV_KEY && value && (((ssc->quirks & SAITEK_RELEASE_MODE_RAT7) && usage->code - BTN_MOUSE == 10) || ((ssc->quirks & SAITEK_RELEASE_MODE_MMO7) && usage->code - BTN_MOUSE == 15))) { input_report_key(input, usage->code, 1); /* report missing release event */ input_report_key(input, usage->code, 0); return 1; } return 0; } static const struct hid_device_id saitek_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_PS1000), .driver_data = SAITEK_FIX_PS1000 }, { HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_RAT5), .driver_data = SAITEK_RELEASE_MODE_RAT7 }, { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_RAT7_OLD), .driver_data = SAITEK_RELEASE_MODE_RAT7 }, { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_RAT7), .driver_data = SAITEK_RELEASE_MODE_RAT7 }, { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_RAT7_CONTAGION), .driver_data = SAITEK_RELEASE_MODE_RAT7 }, { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_RAT9), .driver_data = SAITEK_RELEASE_MODE_RAT7 }, { HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_RAT9), .driver_data = SAITEK_RELEASE_MODE_RAT7 }, { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_MMO7), .driver_data = SAITEK_RELEASE_MODE_MMO7 }, { HID_USB_DEVICE(USB_VENDOR_ID_MADCATZ, USB_DEVICE_ID_MADCATZ_MMO7), .driver_data = SAITEK_RELEASE_MODE_MMO7 }, { } }; MODULE_DEVICE_TABLE(hid, saitek_devices); static struct hid_driver saitek_driver = { .name = "saitek", .id_table = saitek_devices, .probe = saitek_probe, .report_fixup = saitek_report_fixup, .raw_event = saitek_raw_event, .event = saitek_event, }; module_hid_driver(saitek_driver); MODULE_DESCRIPTION("HID driver for Saitek devices."); MODULE_LICENSE("GPL"); |
| 18 18 18 18 15 15 15 24 19 19 19 24 1 1 1 1 1 1 17 5 5 4 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | // SPDX-License-Identifier: GPL-2.0 #include <linux/fs.h> #include <linux/sched.h> #include <linux/slab.h> #include "internal.h" #include "mount.h" static DEFINE_SPINLOCK(pin_lock); void pin_remove(struct fs_pin *pin) { spin_lock(&pin_lock); hlist_del_init(&pin->m_list); hlist_del_init(&pin->s_list); spin_unlock(&pin_lock); spin_lock_irq(&pin->wait.lock); pin->done = 1; wake_up_locked(&pin->wait); spin_unlock_irq(&pin->wait.lock); } void pin_insert(struct fs_pin *pin, struct vfsmount *m) { spin_lock(&pin_lock); hlist_add_head(&pin->s_list, &m->mnt_sb->s_pins); hlist_add_head(&pin->m_list, &real_mount(m)->mnt_pins); spin_unlock(&pin_lock); } void pin_kill(struct fs_pin *p) { wait_queue_entry_t wait; if (!p) { rcu_read_unlock(); return; } init_wait(&wait); spin_lock_irq(&p->wait.lock); if (likely(!p->done)) { p->done = -1; spin_unlock_irq(&p->wait.lock); rcu_read_unlock(); p->kill(p); return; } if (p->done > 0) { spin_unlock_irq(&p->wait.lock); rcu_read_unlock(); return; } __add_wait_queue(&p->wait, &wait); while (1) { set_current_state(TASK_UNINTERRUPTIBLE); spin_unlock_irq(&p->wait.lock); rcu_read_unlock(); schedule(); rcu_read_lock(); if (likely(list_empty(&wait.entry))) break; /* OK, we know p couldn't have been freed yet */ spin_lock_irq(&p->wait.lock); if (p->done > 0) { spin_unlock_irq(&p->wait.lock); break; } } rcu_read_unlock(); } void mnt_pin_kill(struct mount *m) { while (1) { struct hlist_node *p; rcu_read_lock(); p = READ_ONCE(m->mnt_pins.first); if (!p) { rcu_read_unlock(); break; } pin_kill(hlist_entry(p, struct fs_pin, m_list)); } } void group_pin_kill(struct hlist_head *p) { while (1) { struct hlist_node *q; rcu_read_lock(); q = READ_ONCE(p->first); if (!q) { rcu_read_unlock(); break; } pin_kill(hlist_entry(q, struct fs_pin, s_list)); } } |
| 12 27 8 31 8 8 11 27 27 27 2 2 2 31 31 31 31 31 15 1 2 6 3 3 6 2 3 3 3 25 17 2 3 6 9 4 6 3 6 2 3 3 3 3 3 3 6 6 9 9 8 8 9 14 14 12 14 14 14 5 5 3 14 3 14 2 16 16 16 15 14 14 14 12 2 2 16 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 | // SPDX-License-Identifier: GPL-2.0 /* * Alarmtimer interface * * This interface provides a timer which is similar to hrtimers, * but triggers a RTC alarm if the box is suspend. * * This interface is influenced by the Android RTC Alarm timer * interface. * * Copyright (C) 2010 IBM Corporation * * Author: John Stultz <john.stultz@linaro.org> */ #include <linux/time.h> #include <linux/hrtimer.h> #include <linux/timerqueue.h> #include <linux/rtc.h> #include <linux/sched/signal.h> #include <linux/sched/debug.h> #include <linux/alarmtimer.h> #include <linux/mutex.h> #include <linux/platform_device.h> #include <linux/posix-timers.h> #include <linux/workqueue.h> #include <linux/freezer.h> #include <linux/compat.h> #include <linux/module.h> #include <linux/time_namespace.h> #include "posix-timers.h" #define CREATE_TRACE_POINTS #include <trace/events/alarmtimer.h> /** * struct alarm_base - Alarm timer bases * @lock: Lock for syncrhonized access to the base * @timerqueue: Timerqueue head managing the list of events * @get_ktime: Function to read the time correlating to the base * @get_timespec: Function to read the namespace time correlating to the base * @base_clockid: clockid for the base */ static struct alarm_base { spinlock_t lock; struct timerqueue_head timerqueue; ktime_t (*get_ktime)(void); void (*get_timespec)(struct timespec64 *tp); clockid_t base_clockid; } alarm_bases[ALARM_NUMTYPE]; #if defined(CONFIG_POSIX_TIMERS) || defined(CONFIG_RTC_CLASS) /* freezer information to handle clock_nanosleep triggered wakeups */ static enum alarmtimer_type freezer_alarmtype; static ktime_t freezer_expires; static ktime_t freezer_delta; static DEFINE_SPINLOCK(freezer_delta_lock); #endif #ifdef CONFIG_RTC_CLASS /* rtc timer and device for setting alarm wakeups at suspend */ static struct rtc_timer rtctimer; static struct rtc_device *rtcdev; static DEFINE_SPINLOCK(rtcdev_lock); /** * alarmtimer_get_rtcdev - Return selected rtcdevice * * This function returns the rtc device to use for wakealarms. */ struct rtc_device *alarmtimer_get_rtcdev(void) { unsigned long flags; struct rtc_device *ret; spin_lock_irqsave(&rtcdev_lock, flags); ret = rtcdev; spin_unlock_irqrestore(&rtcdev_lock, flags); return ret; } EXPORT_SYMBOL_GPL(alarmtimer_get_rtcdev); static int alarmtimer_rtc_add_device(struct device *dev) { unsigned long flags; struct rtc_device *rtc = to_rtc_device(dev); struct platform_device *pdev; int ret = 0; if (rtcdev) return -EBUSY; if (!test_bit(RTC_FEATURE_ALARM, rtc->features)) return -1; if (!device_may_wakeup(rtc->dev.parent)) return -1; pdev = platform_device_register_data(dev, "alarmtimer", PLATFORM_DEVID_AUTO, NULL, 0); if (!IS_ERR(pdev)) device_init_wakeup(&pdev->dev, true); spin_lock_irqsave(&rtcdev_lock, flags); if (!IS_ERR(pdev) && !rtcdev) { if (!try_module_get(rtc->owner)) { ret = -1; goto unlock; } rtcdev = rtc; /* hold a reference so it doesn't go away */ get_device(dev); pdev = NULL; } else { ret = -1; } unlock: spin_unlock_irqrestore(&rtcdev_lock, flags); platform_device_unregister(pdev); return ret; } static inline void alarmtimer_rtc_timer_init(void) { rtc_timer_init(&rtctimer, NULL, NULL); } static struct class_interface alarmtimer_rtc_interface = { .add_dev = &alarmtimer_rtc_add_device, }; static int alarmtimer_rtc_interface_setup(void) { alarmtimer_rtc_interface.class = &rtc_class; return class_interface_register(&alarmtimer_rtc_interface); } static void alarmtimer_rtc_interface_remove(void) { class_interface_unregister(&alarmtimer_rtc_interface); } #else static inline int alarmtimer_rtc_interface_setup(void) { return 0; } static inline void alarmtimer_rtc_interface_remove(void) { } static inline void alarmtimer_rtc_timer_init(void) { } #endif /** * alarmtimer_enqueue - Adds an alarm timer to an alarm_base timerqueue * @base: pointer to the base where the timer is being run * @alarm: pointer to alarm being enqueued. * * Adds alarm to a alarm_base timerqueue * * Must hold base->lock when calling. */ static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm) { if (alarm->state & ALARMTIMER_STATE_ENQUEUED) timerqueue_del(&base->timerqueue, &alarm->node); timerqueue_add(&base->timerqueue, &alarm->node); alarm->state |= ALARMTIMER_STATE_ENQUEUED; } /** * alarmtimer_dequeue - Removes an alarm timer from an alarm_base timerqueue * @base: pointer to the base where the timer is running * @alarm: pointer to alarm being removed * * Removes alarm to a alarm_base timerqueue * * Must hold base->lock when calling. */ static void alarmtimer_dequeue(struct alarm_base *base, struct alarm *alarm) { if (!(alarm->state & ALARMTIMER_STATE_ENQUEUED)) return; timerqueue_del(&base->timerqueue, &alarm->node); alarm->state &= ~ALARMTIMER_STATE_ENQUEUED; } /** * alarmtimer_fired - Handles alarm hrtimer being fired. * @timer: pointer to hrtimer being run * * When a alarm timer fires, this runs through the timerqueue to * see which alarms expired, and runs those. If there are more alarm * timers queued for the future, we set the hrtimer to fire when * the next future alarm timer expires. */ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer) { struct alarm *alarm = container_of(timer, struct alarm, timer); struct alarm_base *base = &alarm_bases[alarm->type]; unsigned long flags; int ret = HRTIMER_NORESTART; int restart = ALARMTIMER_NORESTART; spin_lock_irqsave(&base->lock, flags); alarmtimer_dequeue(base, alarm); spin_unlock_irqrestore(&base->lock, flags); if (alarm->function) restart = alarm->function(alarm, base->get_ktime()); spin_lock_irqsave(&base->lock, flags); if (restart != ALARMTIMER_NORESTART) { hrtimer_set_expires(&alarm->timer, alarm->node.expires); alarmtimer_enqueue(base, alarm); ret = HRTIMER_RESTART; } spin_unlock_irqrestore(&base->lock, flags); trace_alarmtimer_fired(alarm, base->get_ktime()); return ret; } ktime_t alarm_expires_remaining(const struct alarm *alarm) { struct alarm_base *base = &alarm_bases[alarm->type]; return ktime_sub(alarm->node.expires, base->get_ktime()); } EXPORT_SYMBOL_GPL(alarm_expires_remaining); #ifdef CONFIG_RTC_CLASS /** * alarmtimer_suspend - Suspend time callback * @dev: unused * * When we are going into suspend, we look through the bases * to see which is the soonest timer to expire. We then * set an rtc timer to fire that far into the future, which * will wake us from suspend. */ static int alarmtimer_suspend(struct device *dev) { ktime_t min, now, expires; int i, ret, type; struct rtc_device *rtc; unsigned long flags; struct rtc_time tm; spin_lock_irqsave(&freezer_delta_lock, flags); min = freezer_delta; expires = freezer_expires; type = freezer_alarmtype; freezer_delta = 0; spin_unlock_irqrestore(&freezer_delta_lock, flags); rtc = alarmtimer_get_rtcdev(); /* If we have no rtcdev, just return */ if (!rtc) return 0; /* Find the soonest timer to expire*/ for (i = 0; i < ALARM_NUMTYPE; i++) { struct alarm_base *base = &alarm_bases[i]; struct timerqueue_node *next; ktime_t delta; spin_lock_irqsave(&base->lock, flags); next = timerqueue_getnext(&base->timerqueue); spin_unlock_irqrestore(&base->lock, flags); if (!next) continue; delta = ktime_sub(next->expires, base->get_ktime()); if (!min || (delta < min)) { expires = next->expires; min = delta; type = i; } } if (min == 0) return 0; if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) { pm_wakeup_event(dev, 2 * MSEC_PER_SEC); return -EBUSY; } trace_alarmtimer_suspend(expires, type); /* Setup an rtc timer to fire that far in the future */ rtc_timer_cancel(rtc, &rtctimer); rtc_read_time(rtc, &tm); now = rtc_tm_to_ktime(tm); /* * If the RTC alarm timer only supports a limited time offset, set the * alarm time to the maximum supported value. * The system may wake up earlier (possibly much earlier) than expected * when the alarmtimer runs. This is the best the kernel can do if * the alarmtimer exceeds the time that the rtc device can be programmed * for. */ min = rtc_bound_alarmtime(rtc, min); now = ktime_add(now, min); /* Set alarm, if in the past reject suspend briefly to handle */ ret = rtc_timer_start(rtc, &rtctimer, now, 0); if (ret < 0) pm_wakeup_event(dev, MSEC_PER_SEC); return ret; } static int alarmtimer_resume(struct device *dev) { struct rtc_device *rtc; rtc = alarmtimer_get_rtcdev(); if (rtc) rtc_timer_cancel(rtc, &rtctimer); return 0; } #else static int alarmtimer_suspend(struct device *dev) { return 0; } static int alarmtimer_resume(struct device *dev) { return 0; } #endif static void __alarm_init(struct alarm *alarm, enum alarmtimer_type type, enum alarmtimer_restart (*function)(struct alarm *, ktime_t)) { timerqueue_init(&alarm->node); alarm->timer.function = alarmtimer_fired; alarm->function = function; alarm->type = type; alarm->state = ALARMTIMER_STATE_INACTIVE; } /** * alarm_init - Initialize an alarm structure * @alarm: ptr to alarm to be initialized * @type: the type of the alarm * @function: callback that is run when the alarm fires */ void alarm_init(struct alarm *alarm, enum alarmtimer_type type, enum alarmtimer_restart (*function)(struct alarm *, ktime_t)) { hrtimer_init(&alarm->timer, alarm_bases[type].base_clockid, HRTIMER_MODE_ABS); __alarm_init(alarm, type, function); } EXPORT_SYMBOL_GPL(alarm_init); /** * alarm_start - Sets an absolute alarm to fire * @alarm: ptr to alarm to set * @start: time to run the alarm */ void alarm_start(struct alarm *alarm, ktime_t start) { struct alarm_base *base = &alarm_bases[alarm->type]; unsigned long flags; spin_lock_irqsave(&base->lock, flags); alarm->node.expires = start; alarmtimer_enqueue(base, alarm); hrtimer_start(&alarm->timer, alarm->node.expires, HRTIMER_MODE_ABS); spin_unlock_irqrestore(&base->lock, flags); trace_alarmtimer_start(alarm, base->get_ktime()); } EXPORT_SYMBOL_GPL(alarm_start); /** * alarm_start_relative - Sets a relative alarm to fire * @alarm: ptr to alarm to set * @start: time relative to now to run the alarm */ void alarm_start_relative(struct alarm *alarm, ktime_t start) { struct alarm_base *base = &alarm_bases[alarm->type]; start = ktime_add_safe(start, base->get_ktime()); alarm_start(alarm, start); } EXPORT_SYMBOL_GPL(alarm_start_relative); void alarm_restart(struct alarm *alarm) { struct alarm_base *base = &alarm_bases[alarm->type]; unsigned long flags; spin_lock_irqsave(&base->lock, flags); hrtimer_set_expires(&alarm->timer, alarm->node.expires); hrtimer_restart(&alarm->timer); alarmtimer_enqueue(base, alarm); spin_unlock_irqrestore(&base->lock, flags); } EXPORT_SYMBOL_GPL(alarm_restart); /** * alarm_try_to_cancel - Tries to cancel an alarm timer * @alarm: ptr to alarm to be canceled * * Returns 1 if the timer was canceled, 0 if it was not running, * and -1 if the callback was running */ int alarm_try_to_cancel(struct alarm *alarm) { struct alarm_base *base = &alarm_bases[alarm->type]; unsigned long flags; int ret; spin_lock_irqsave(&base->lock, flags); ret = hrtimer_try_to_cancel(&alarm->timer); if (ret >= 0) alarmtimer_dequeue(base, alarm); spin_unlock_irqrestore(&base->lock, flags); trace_alarmtimer_cancel(alarm, base->get_ktime()); return ret; } EXPORT_SYMBOL_GPL(alarm_try_to_cancel); /** * alarm_cancel - Spins trying to cancel an alarm timer until it is done * @alarm: ptr to alarm to be canceled * * Returns 1 if the timer was canceled, 0 if it was not active. */ int alarm_cancel(struct alarm *alarm) { for (;;) { int ret = alarm_try_to_cancel(alarm); if (ret >= 0) return ret; hrtimer_cancel_wait_running(&alarm->timer); } } EXPORT_SYMBOL_GPL(alarm_cancel); u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval) { u64 overrun = 1; ktime_t delta; delta = ktime_sub(now, alarm->node.expires); if (delta < 0) return 0; if (unlikely(delta >= interval)) { s64 incr = ktime_to_ns(interval); overrun = ktime_divns(delta, incr); alarm->node.expires = ktime_add_ns(alarm->node.expires, incr*overrun); if (alarm->node.expires > now) return overrun; /* * This (and the ktime_add() below) is the * correction for exact: */ overrun++; } alarm->node.expires = ktime_add_safe(alarm->node.expires, interval); return overrun; } EXPORT_SYMBOL_GPL(alarm_forward); static u64 __alarm_forward_now(struct alarm *alarm, ktime_t interval, bool throttle) { struct alarm_base *base = &alarm_bases[alarm->type]; ktime_t now = base->get_ktime(); if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && throttle) { /* * Same issue as with posix_timer_fn(). Timers which are * periodic but the signal is ignored can starve the system * with a very small interval. The real fix which was * promised in the context of posix_timer_fn() never * materialized, but someone should really work on it. * * To prevent DOS fake @now to be 1 jiffie out which keeps * the overrun accounting correct but creates an * inconsistency vs. timer_gettime(2). */ ktime_t kj = NSEC_PER_SEC / HZ; if (interval < kj) now = ktime_add(now, kj); } return alarm_forward(alarm, now, interval); } u64 alarm_forward_now(struct alarm *alarm, ktime_t interval) { return __alarm_forward_now(alarm, interval, false); } EXPORT_SYMBOL_GPL(alarm_forward_now); #ifdef CONFIG_POSIX_TIMERS static void alarmtimer_freezerset(ktime_t absexp, enum alarmtimer_type type) { struct alarm_base *base; unsigned long flags; ktime_t delta; switch(type) { case ALARM_REALTIME: base = &alarm_bases[ALARM_REALTIME]; type = ALARM_REALTIME_FREEZER; break; case ALARM_BOOTTIME: base = &alarm_bases[ALARM_BOOTTIME]; type = ALARM_BOOTTIME_FREEZER; break; default: WARN_ONCE(1, "Invalid alarm type: %d\n", type); return; } delta = ktime_sub(absexp, base->get_ktime()); spin_lock_irqsave(&freezer_delta_lock, flags); if (!freezer_delta || (delta < freezer_delta)) { freezer_delta = delta; freezer_expires = absexp; freezer_alarmtype = type; } spin_unlock_irqrestore(&freezer_delta_lock, flags); } /** * clock2alarm - helper that converts from clockid to alarmtypes * @clockid: clockid. */ static enum alarmtimer_type clock2alarm(clockid_t clockid) { if (clockid == CLOCK_REALTIME_ALARM) return ALARM_REALTIME; if (clockid == CLOCK_BOOTTIME_ALARM) return ALARM_BOOTTIME; return -1; } /** * alarm_handle_timer - Callback for posix timers * @alarm: alarm that fired * @now: time at the timer expiration * * Posix timer callback for expired alarm timers. * * Return: whether the timer is to be restarted */ static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm, ktime_t now) { struct k_itimer *ptr = container_of(alarm, struct k_itimer, it.alarm.alarmtimer); enum alarmtimer_restart result = ALARMTIMER_NORESTART; unsigned long flags; int si_private = 0; spin_lock_irqsave(&ptr->it_lock, flags); ptr->it_active = 0; if (ptr->it_interval) si_private = ++ptr->it_requeue_pending; if (posix_timer_event(ptr, si_private) && ptr->it_interval) { /* * Handle ignored signals and rearm the timer. This will go * away once we handle ignored signals proper. Ensure that * small intervals cannot starve the system. */ ptr->it_overrun += __alarm_forward_now(alarm, ptr->it_interval, true); ++ptr->it_requeue_pending; ptr->it_active = 1; result = ALARMTIMER_RESTART; } spin_unlock_irqrestore(&ptr->it_lock, flags); return result; } /** * alarm_timer_rearm - Posix timer callback for rearming timer * @timr: Pointer to the posixtimer data struct */ static void alarm_timer_rearm(struct k_itimer *timr) { struct alarm *alarm = &timr->it.alarm.alarmtimer; timr->it_overrun += alarm_forward_now(alarm, timr->it_interval); alarm_start(alarm, alarm->node.expires); } /** * alarm_timer_forward - Posix timer callback for forwarding timer * @timr: Pointer to the posixtimer data struct * @now: Current time to forward the timer against */ static s64 alarm_timer_forward(struct k_itimer *timr, ktime_t now) { struct alarm *alarm = &timr->it.alarm.alarmtimer; return alarm_forward(alarm, timr->it_interval, now); } /** * alarm_timer_remaining - Posix timer callback to retrieve remaining time * @timr: Pointer to the posixtimer data struct * @now: Current time to calculate against */ static ktime_t alarm_timer_remaining(struct k_itimer *timr, ktime_t now) { struct alarm *alarm = &timr->it.alarm.alarmtimer; return ktime_sub(alarm->node.expires, now); } /** * alarm_timer_try_to_cancel - Posix timer callback to cancel a timer * @timr: Pointer to the posixtimer data struct */ static int alarm_timer_try_to_cancel(struct k_itimer *timr) { return alarm_try_to_cancel(&timr->it.alarm.alarmtimer); } /** * alarm_timer_wait_running - Posix timer callback to wait for a timer * @timr: Pointer to the posixtimer data struct * * Called from the core code when timer cancel detected that the callback * is running. @timr is unlocked and rcu read lock is held to prevent it * from being freed. */ static void alarm_timer_wait_running(struct k_itimer *timr) { hrtimer_cancel_wait_running(&timr->it.alarm.alarmtimer.timer); } /** * alarm_timer_arm - Posix timer callback to arm a timer * @timr: Pointer to the posixtimer data struct * @expires: The new expiry time * @absolute: Expiry value is absolute time * @sigev_none: Posix timer does not deliver signals */ static void alarm_timer_arm(struct k_itimer *timr, ktime_t expires, bool absolute, bool sigev_none) { struct alarm *alarm = &timr->it.alarm.alarmtimer; struct alarm_base *base = &alarm_bases[alarm->type]; if (!absolute) expires = ktime_add_safe(expires, base->get_ktime()); if (sigev_none) alarm->node.expires = expires; else alarm_start(&timr->it.alarm.alarmtimer, expires); } /** * alarm_clock_getres - posix getres interface * @which_clock: clockid * @tp: timespec to fill * * Returns the granularity of underlying alarm base clock */ static int alarm_clock_getres(const clockid_t which_clock, struct timespec64 *tp) { if (!alarmtimer_get_rtcdev()) return -EINVAL; tp->tv_sec = 0; tp->tv_nsec = hrtimer_resolution; return 0; } /** * alarm_clock_get_timespec - posix clock_get_timespec interface * @which_clock: clockid * @tp: timespec to fill. * * Provides the underlying alarm base time in a tasks time namespace. */ static int alarm_clock_get_timespec(clockid_t which_clock, struct timespec64 *tp) { struct alarm_base *base = &alarm_bases[clock2alarm(which_clock)]; if (!alarmtimer_get_rtcdev()) return -EINVAL; base->get_timespec(tp); return 0; } /** * alarm_clock_get_ktime - posix clock_get_ktime interface * @which_clock: clockid * * Provides the underlying alarm base time in the root namespace. */ static ktime_t alarm_clock_get_ktime(clockid_t which_clock) { struct alarm_base *base = &alarm_bases[clock2alarm(which_clock)]; if (!alarmtimer_get_rtcdev()) return -EINVAL; return base->get_ktime(); } /** * alarm_timer_create - posix timer_create interface * @new_timer: k_itimer pointer to manage * * Initializes the k_itimer structure. */ static int alarm_timer_create(struct k_itimer *new_timer) { enum alarmtimer_type type; if (!alarmtimer_get_rtcdev()) return -EOPNOTSUPP; if (!capable(CAP_WAKE_ALARM)) return -EPERM; type = clock2alarm(new_timer->it_clock); alarm_init(&new_timer->it.alarm.alarmtimer, type, alarm_handle_timer); return 0; } /** * alarmtimer_nsleep_wakeup - Wakeup function for alarm_timer_nsleep * @alarm: ptr to alarm that fired * @now: time at the timer expiration * * Wakes up the task that set the alarmtimer * * Return: ALARMTIMER_NORESTART */ static enum alarmtimer_restart alarmtimer_nsleep_wakeup(struct alarm *alarm, ktime_t now) { struct task_struct *task = alarm->data; alarm->data = NULL; if (task) wake_up_process(task); return ALARMTIMER_NORESTART; } /** * alarmtimer_do_nsleep - Internal alarmtimer nsleep implementation * @alarm: ptr to alarmtimer * @absexp: absolute expiration time * @type: alarm type (BOOTTIME/REALTIME). * * Sets the alarm timer and sleeps until it is fired or interrupted. */ static int alarmtimer_do_nsleep(struct alarm *alarm, ktime_t absexp, enum alarmtimer_type type) { struct restart_block *restart; alarm->data = (void *)current; do { set_current_state(TASK_INTERRUPTIBLE); alarm_start(alarm, absexp); if (likely(alarm->data)) schedule(); alarm_cancel(alarm); } while (alarm->data && !signal_pending(current)); __set_current_state(TASK_RUNNING); destroy_hrtimer_on_stack(&alarm->timer); if (!alarm->data) return 0; if (freezing(current)) alarmtimer_freezerset(absexp, type); restart = ¤t->restart_block; if (restart->nanosleep.type != TT_NONE) { struct timespec64 rmt; ktime_t rem; rem = ktime_sub(absexp, alarm_bases[type].get_ktime()); if (rem <= 0) return 0; rmt = ktime_to_timespec64(rem); return nanosleep_copyout(restart, &rmt); } return -ERESTART_RESTARTBLOCK; } static void alarm_init_on_stack(struct alarm *alarm, enum alarmtimer_type type, enum alarmtimer_restart (*function)(struct alarm *, ktime_t)) { hrtimer_init_on_stack(&alarm->timer, alarm_bases[type].base_clockid, HRTIMER_MODE_ABS); __alarm_init(alarm, type, function); } /** * alarm_timer_nsleep_restart - restartblock alarmtimer nsleep * @restart: ptr to restart block * * Handles restarted clock_nanosleep calls */ static long __sched alarm_timer_nsleep_restart(struct restart_block *restart) { enum alarmtimer_type type = restart->nanosleep.clockid; ktime_t exp = restart->nanosleep.expires; struct alarm alarm; alarm_init_on_stack(&alarm, type, alarmtimer_nsleep_wakeup); return alarmtimer_do_nsleep(&alarm, exp, type); } /** * alarm_timer_nsleep - alarmtimer nanosleep * @which_clock: clockid * @flags: determines abstime or relative * @tsreq: requested sleep time (abs or rel) * * Handles clock_nanosleep calls against _ALARM clockids */ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, const struct timespec64 *tsreq) { enum alarmtimer_type type = clock2alarm(which_clock); struct restart_block *restart = ¤t->restart_block; struct alarm alarm; ktime_t exp; int ret; if (!alarmtimer_get_rtcdev()) return -EOPNOTSUPP; if (flags & ~TIMER_ABSTIME) return -EINVAL; if (!capable(CAP_WAKE_ALARM)) return -EPERM; alarm_init_on_stack(&alarm, type, alarmtimer_nsleep_wakeup); exp = timespec64_to_ktime(*tsreq); /* Convert (if necessary) to absolute time */ if (flags != TIMER_ABSTIME) { ktime_t now = alarm_bases[type].get_ktime(); exp = ktime_add_safe(now, exp); } else { exp = timens_ktime_to_host(which_clock, exp); } ret = alarmtimer_do_nsleep(&alarm, exp, type); if (ret != -ERESTART_RESTARTBLOCK) return ret; /* abs timers don't set remaining time or restart */ if (flags == TIMER_ABSTIME) return -ERESTARTNOHAND; restart->nanosleep.clockid = type; restart->nanosleep.expires = exp; set_restart_fn(restart, alarm_timer_nsleep_restart); return ret; } const struct k_clock alarm_clock = { .clock_getres = alarm_clock_getres, .clock_get_ktime = alarm_clock_get_ktime, .clock_get_timespec = alarm_clock_get_timespec, .timer_create = alarm_timer_create, .timer_set = common_timer_set, .timer_del = common_timer_del, .timer_get = common_timer_get, .timer_arm = alarm_timer_arm, .timer_rearm = alarm_timer_rearm, .timer_forward = alarm_timer_forward, .timer_remaining = alarm_timer_remaining, .timer_try_to_cancel = alarm_timer_try_to_cancel, .timer_wait_running = alarm_timer_wait_running, .nsleep = alarm_timer_nsleep, }; #endif /* CONFIG_POSIX_TIMERS */ /* Suspend hook structures */ static const struct dev_pm_ops alarmtimer_pm_ops = { .suspend = alarmtimer_suspend, .resume = alarmtimer_resume, }; static struct platform_driver alarmtimer_driver = { .driver = { .name = "alarmtimer", .pm = &alarmtimer_pm_ops, } }; static void get_boottime_timespec(struct timespec64 *tp) { ktime_get_boottime_ts64(tp); timens_add_boottime(tp); } /** * alarmtimer_init - Initialize alarm timer code * * This function initializes the alarm bases and registers * the posix clock ids. */ static int __init alarmtimer_init(void) { int error; int i; alarmtimer_rtc_timer_init(); /* Initialize alarm bases */ alarm_bases[ALARM_REALTIME].base_clockid = CLOCK_REALTIME; alarm_bases[ALARM_REALTIME].get_ktime = &ktime_get_real; alarm_bases[ALARM_REALTIME].get_timespec = ktime_get_real_ts64; alarm_bases[ALARM_BOOTTIME].base_clockid = CLOCK_BOOTTIME; alarm_bases[ALARM_BOOTTIME].get_ktime = &ktime_get_boottime; alarm_bases[ALARM_BOOTTIME].get_timespec = get_boottime_timespec; for (i = 0; i < ALARM_NUMTYPE; i++) { timerqueue_init_head(&alarm_bases[i].timerqueue); spin_lock_init(&alarm_bases[i].lock); } error = alarmtimer_rtc_interface_setup(); if (error) return error; error = platform_driver_register(&alarmtimer_driver); if (error) goto out_if; return 0; out_if: alarmtimer_rtc_interface_remove(); return error; } device_initcall(alarmtimer_init); |
| 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | // SPDX-License-Identifier: GPL-2.0 /* * SM4 Cipher Algorithm. * * Copyright (C) 2018 ARM Limited or its affiliates. * All rights reserved. */ #include <crypto/algapi.h> #include <crypto/sm4.h> #include <linux/module.h> #include <linux/init.h> #include <linux/types.h> #include <linux/errno.h> #include <asm/byteorder.h> #include <asm/unaligned.h> /** * sm4_setkey - Set the SM4 key. * @tfm: The %crypto_tfm that is used in the context. * @in_key: The input key. * @key_len: The size of the key. * * This function uses sm4_expandkey() to expand the key. * &sm4_ctx _must_ be the private data embedded in @tfm which is * retrieved with crypto_tfm_ctx(). * * Return: 0 on success; -EINVAL on failure (only happens for bad key lengths) */ static int sm4_setkey(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); return sm4_expandkey(ctx, in_key, key_len); } /* encrypt a block of text */ static void sm4_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); sm4_crypt_block(ctx->rkey_enc, out, in); } /* decrypt a block of text */ static void sm4_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { const struct sm4_ctx *ctx = crypto_tfm_ctx(tfm); sm4_crypt_block(ctx->rkey_dec, out, in); } static struct crypto_alg sm4_alg = { .cra_name = "sm4", .cra_driver_name = "sm4-generic", .cra_priority = 100, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = SM4_BLOCK_SIZE, .cra_ctxsize = sizeof(struct sm4_ctx), .cra_module = THIS_MODULE, .cra_u = { .cipher = { .cia_min_keysize = SM4_KEY_SIZE, .cia_max_keysize = SM4_KEY_SIZE, .cia_setkey = sm4_setkey, .cia_encrypt = sm4_encrypt, .cia_decrypt = sm4_decrypt } } }; static int __init sm4_init(void) { return crypto_register_alg(&sm4_alg); } static void __exit sm4_fini(void) { crypto_unregister_alg(&sm4_alg); } subsys_initcall(sm4_init); module_exit(sm4_fini); MODULE_DESCRIPTION("SM4 Cipher Algorithm"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS_CRYPTO("sm4"); MODULE_ALIAS_CRYPTO("sm4-generic"); |
| 32854 32852 32771 9 32774 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_UNWIND_H #define _ASM_X86_UNWIND_H #include <linux/sched.h> #include <linux/ftrace.h> #include <linux/rethook.h> #include <asm/ptrace.h> #include <asm/stacktrace.h> #define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip)) #define IRET_FRAME_SIZE (sizeof(struct pt_regs) - IRET_FRAME_OFFSET) struct unwind_state { struct stack_info stack_info; unsigned long stack_mask; struct task_struct *task; int graph_idx; #if defined(CONFIG_RETHOOK) struct llist_node *kr_cur; #endif bool error; #if defined(CONFIG_UNWINDER_ORC) bool signal, full_regs; unsigned long sp, bp, ip; struct pt_regs *regs, *prev_regs; #elif defined(CONFIG_UNWINDER_FRAME_POINTER) bool got_irq; unsigned long *bp, *orig_sp, ip; /* * If non-NULL: The current frame is incomplete and doesn't contain a * valid BP. When looking for the next frame, use this instead of the * non-existent saved BP. */ unsigned long *next_bp; struct pt_regs *regs; #else unsigned long *sp; #endif }; void __unwind_start(struct unwind_state *state, struct task_struct *task, struct pt_regs *regs, unsigned long *first_frame); bool unwind_next_frame(struct unwind_state *state); unsigned long unwind_get_return_address(struct unwind_state *state); unsigned long *unwind_get_return_address_ptr(struct unwind_state *state); static inline bool unwind_done(struct unwind_state *state) { return state->stack_info.type == STACK_TYPE_UNKNOWN; } static inline bool unwind_error(struct unwind_state *state) { return state->error; } static inline void unwind_start(struct unwind_state *state, struct task_struct *task, struct pt_regs *regs, unsigned long *first_frame) { first_frame = first_frame ? : get_stack_pointer(task, regs); __unwind_start(state, task, regs, first_frame); } #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) /* * If 'partial' returns true, only the iret frame registers are valid. */ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state, bool *partial) { if (unwind_done(state)) return NULL; if (partial) { #ifdef CONFIG_UNWINDER_ORC *partial = !state->full_regs; #else *partial = false; #endif } return state->regs; } #else static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state, bool *partial) { return NULL; } #endif #ifdef CONFIG_UNWINDER_ORC void unwind_init(void); void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size); #else static inline void unwind_init(void) {} static inline void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size) {} #endif static inline unsigned long unwind_recover_rethook(struct unwind_state *state, unsigned long addr, unsigned long *addr_p) { #ifdef CONFIG_RETHOOK if (is_rethook_trampoline(addr)) return rethook_find_ret_addr(state->task, (unsigned long)addr_p, &state->kr_cur); #endif return addr; } /* Recover the return address modified by rethook and ftrace_graph. */ static inline unsigned long unwind_recover_ret_addr(struct unwind_state *state, unsigned long addr, unsigned long *addr_p) { unsigned long ret; ret = ftrace_graph_ret_addr(state->task, &state->graph_idx, addr, addr_p); return unwind_recover_rethook(state, ret, addr_p); } /* * This disables KASAN checking when reading a value from another task's stack, * since the other task could be running on another CPU and could have poisoned * the stack in the meantime. */ #define READ_ONCE_TASK_STACK(task, x) \ ({ \ unsigned long val; \ if (task == current) \ val = READ_ONCE(x); \ else \ val = READ_ONCE_NOCHECK(x); \ val; \ }) static inline bool task_on_another_cpu(struct task_struct *task) { #ifdef CONFIG_SMP return task != current && task->on_cpu; #else return false; #endif } #endif /* _ASM_X86_UNWIND_H */ |
| 21 60 60 60 60 60 60 60 3116 3120 22 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 | // SPDX-License-Identifier: GPL-2.0 /* * Wakeup statistics in sysfs * * Copyright (c) 2019 Linux Foundation * Copyright (c) 2019 Greg Kroah-Hartman <gregkh@linuxfoundation.org> * Copyright (c) 2019 Google Inc. */ #include <linux/device.h> #include <linux/idr.h> #include <linux/init.h> #include <linux/kdev_t.h> #include <linux/kernel.h> #include <linux/kobject.h> #include <linux/slab.h> #include <linux/timekeeping.h> #include "power.h" static struct class *wakeup_class; #define wakeup_attr(_name) \ static ssize_t _name##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct wakeup_source *ws = dev_get_drvdata(dev); \ \ return sysfs_emit(buf, "%lu\n", ws->_name); \ } \ static DEVICE_ATTR_RO(_name) wakeup_attr(active_count); wakeup_attr(event_count); wakeup_attr(wakeup_count); wakeup_attr(expire_count); static ssize_t active_time_ms_show(struct device *dev, struct device_attribute *attr, char *buf) { struct wakeup_source *ws = dev_get_drvdata(dev); ktime_t active_time = ws->active ? ktime_sub(ktime_get(), ws->last_time) : 0; return sysfs_emit(buf, "%lld\n", ktime_to_ms(active_time)); } static DEVICE_ATTR_RO(active_time_ms); static ssize_t total_time_ms_show(struct device *dev, struct device_attribute *attr, char *buf) { struct wakeup_source *ws = dev_get_drvdata(dev); ktime_t active_time; ktime_t total_time = ws->total_time; if (ws->active) { active_time = ktime_sub(ktime_get(), ws->last_time); total_time = ktime_add(total_time, active_time); } return sysfs_emit(buf, "%lld\n", ktime_to_ms(total_time)); } static DEVICE_ATTR_RO(total_time_ms); static ssize_t max_time_ms_show(struct device *dev, struct device_attribute *attr, char *buf) { struct wakeup_source *ws = dev_get_drvdata(dev); ktime_t active_time; ktime_t max_time = ws->max_time; if (ws->active) { active_time = ktime_sub(ktime_get(), ws->last_time); if (active_time > max_time) max_time = active_time; } return sysfs_emit(buf, "%lld\n", ktime_to_ms(max_time)); } static DEVICE_ATTR_RO(max_time_ms); static ssize_t last_change_ms_show(struct device *dev, struct device_attribute *attr, char *buf) { struct wakeup_source *ws = dev_get_drvdata(dev); return sysfs_emit(buf, "%lld\n", ktime_to_ms(ws->last_time)); } static DEVICE_ATTR_RO(last_change_ms); static ssize_t name_show(struct device *dev, struct device_attribute *attr, char *buf) { struct wakeup_source *ws = dev_get_drvdata(dev); return sysfs_emit(buf, "%s\n", ws->name); } static DEVICE_ATTR_RO(name); static ssize_t prevent_suspend_time_ms_show(struct device *dev, struct device_attribute *attr, char *buf) { struct wakeup_source *ws = dev_get_drvdata(dev); ktime_t prevent_sleep_time = ws->prevent_sleep_time; if (ws->active && ws->autosleep_enabled) { prevent_sleep_time = ktime_add(prevent_sleep_time, ktime_sub(ktime_get(), ws->start_prevent_time)); } return sysfs_emit(buf, "%lld\n", ktime_to_ms(prevent_sleep_time)); } static DEVICE_ATTR_RO(prevent_suspend_time_ms); static struct attribute *wakeup_source_attrs[] = { &dev_attr_name.attr, &dev_attr_active_count.attr, &dev_attr_event_count.attr, &dev_attr_wakeup_count.attr, &dev_attr_expire_count.attr, &dev_attr_active_time_ms.attr, &dev_attr_total_time_ms.attr, &dev_attr_max_time_ms.attr, &dev_attr_last_change_ms.attr, &dev_attr_prevent_suspend_time_ms.attr, NULL, }; ATTRIBUTE_GROUPS(wakeup_source); static void device_create_release(struct device *dev) { kfree(dev); } static struct device *wakeup_source_device_create(struct device *parent, struct wakeup_source *ws) { struct device *dev = NULL; int retval; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) { retval = -ENOMEM; goto error; } device_initialize(dev); dev->devt = MKDEV(0, 0); dev->class = wakeup_class; dev->parent = parent; dev->groups = wakeup_source_groups; dev->release = device_create_release; dev_set_drvdata(dev, ws); device_set_pm_not_required(dev); retval = dev_set_name(dev, "wakeup%d", ws->id); if (retval) goto error; retval = device_add(dev); if (retval) goto error; return dev; error: put_device(dev); return ERR_PTR(retval); } /** * wakeup_source_sysfs_add - Add wakeup_source attributes to sysfs. * @parent: Device given wakeup source is associated with (or NULL if virtual). * @ws: Wakeup source to be added in sysfs. */ int wakeup_source_sysfs_add(struct device *parent, struct wakeup_source *ws) { struct device *dev; dev = wakeup_source_device_create(parent, ws); if (IS_ERR(dev)) return PTR_ERR(dev); ws->dev = dev; return 0; } /** * pm_wakeup_source_sysfs_add - Add wakeup_source attributes to sysfs * for a device if they're missing. * @parent: Device given wakeup source is associated with */ int pm_wakeup_source_sysfs_add(struct device *parent) { if (!parent->power.wakeup || parent->power.wakeup->dev) return 0; return wakeup_source_sysfs_add(parent, parent->power.wakeup); } /** * wakeup_source_sysfs_remove - Remove wakeup_source attributes from sysfs. * @ws: Wakeup source to be removed from sysfs. */ void wakeup_source_sysfs_remove(struct wakeup_source *ws) { device_unregister(ws->dev); } static int __init wakeup_sources_sysfs_init(void) { wakeup_class = class_create("wakeup"); return PTR_ERR_OR_ZERO(wakeup_class); } postcore_initcall(wakeup_sources_sysfs_init); |
| 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 21 21 21 21 4 5 5 4 4 24 23 9 31 32 32 17 17 17 17 17 17 15 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 | // SPDX-License-Identifier: GPL-2.0-only #include <linux/ceph/ceph_debug.h> #include <linux/backing-dev.h> #include <linux/ctype.h> #include <linux/fs.h> #include <linux/inet.h> #include <linux/in6.h> #include <linux/key.h> #include <keys/ceph-type.h> #include <linux/module.h> #include <linux/mount.h> #include <linux/nsproxy.h> #include <linux/fs_parser.h> #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/statfs.h> #include <linux/string.h> #include <linux/vmalloc.h> #include <linux/ceph/ceph_features.h> #include <linux/ceph/libceph.h> #include <linux/ceph/debugfs.h> #include <linux/ceph/decode.h> #include <linux/ceph/mon_client.h> #include <linux/ceph/auth.h> #include "crypto.h" /* * Module compatibility interface. For now it doesn't do anything, * but its existence signals a certain level of functionality. * * The data buffer is used to pass information both to and from * libceph. The return value indicates whether libceph determines * it is compatible with the caller (from another kernel module), * given the provided data. * * The data pointer can be null. */ bool libceph_compatible(void *data) { return true; } EXPORT_SYMBOL(libceph_compatible); static int param_get_supported_features(char *buffer, const struct kernel_param *kp) { return sprintf(buffer, "0x%llx", CEPH_FEATURES_SUPPORTED_DEFAULT); } static const struct kernel_param_ops param_ops_supported_features = { .get = param_get_supported_features, }; module_param_cb(supported_features, ¶m_ops_supported_features, NULL, 0444); const char *ceph_msg_type_name(int type) { switch (type) { case CEPH_MSG_SHUTDOWN: return "shutdown"; case CEPH_MSG_PING: return "ping"; case CEPH_MSG_AUTH: return "auth"; case CEPH_MSG_AUTH_REPLY: return "auth_reply"; case CEPH_MSG_MON_MAP: return "mon_map"; case CEPH_MSG_MON_GET_MAP: return "mon_get_map"; case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe"; case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack"; case CEPH_MSG_STATFS: return "statfs"; case CEPH_MSG_STATFS_REPLY: return "statfs_reply"; case CEPH_MSG_MON_GET_VERSION: return "mon_get_version"; case CEPH_MSG_MON_GET_VERSION_REPLY: return "mon_get_version_reply"; case CEPH_MSG_MDS_MAP: return "mds_map"; case CEPH_MSG_FS_MAP_USER: return "fs_map_user"; case CEPH_MSG_CLIENT_SESSION: return "client_session"; case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect"; case CEPH_MSG_CLIENT_REQUEST: return "client_request"; case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward"; case CEPH_MSG_CLIENT_REPLY: return "client_reply"; case CEPH_MSG_CLIENT_CAPS: return "client_caps"; case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release"; case CEPH_MSG_CLIENT_QUOTA: return "client_quota"; case CEPH_MSG_CLIENT_SNAP: return "client_snap"; case CEPH_MSG_CLIENT_LEASE: return "client_lease"; case CEPH_MSG_POOLOP_REPLY: return "poolop_reply"; case CEPH_MSG_POOLOP: return "poolop"; case CEPH_MSG_MON_COMMAND: return "mon_command"; case CEPH_MSG_MON_COMMAND_ACK: return "mon_command_ack"; case CEPH_MSG_OSD_MAP: return "osd_map"; case CEPH_MSG_OSD_OP: return "osd_op"; case CEPH_MSG_OSD_OPREPLY: return "osd_opreply"; case CEPH_MSG_WATCH_NOTIFY: return "watch_notify"; case CEPH_MSG_OSD_BACKOFF: return "osd_backoff"; default: return "unknown"; } } EXPORT_SYMBOL(ceph_msg_type_name); /* * Initially learn our fsid, or verify an fsid matches. */ int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) { if (client->have_fsid) { if (ceph_fsid_compare(&client->fsid, fsid)) { pr_err("bad fsid, had %pU got %pU", &client->fsid, fsid); return -1; } } else { memcpy(&client->fsid, fsid, sizeof(*fsid)); } return 0; } EXPORT_SYMBOL(ceph_check_fsid); static int strcmp_null(const char *s1, const char *s2) { if (!s1 && !s2) return 0; if (s1 && !s2) return -1; if (!s1 && s2) return 1; return strcmp(s1, s2); } int ceph_compare_options(struct ceph_options *new_opt, struct ceph_client *client) { struct ceph_options *opt1 = new_opt; struct ceph_options *opt2 = client->options; int ofs = offsetof(struct ceph_options, mon_addr); int i; int ret; /* * Don't bother comparing options if network namespaces don't * match. */ if (!net_eq(current->nsproxy->net_ns, read_pnet(&client->msgr.net))) return -1; ret = memcmp(opt1, opt2, ofs); if (ret) return ret; ret = strcmp_null(opt1->name, opt2->name); if (ret) return ret; if (opt1->key && !opt2->key) return -1; if (!opt1->key && opt2->key) return 1; if (opt1->key && opt2->key) { if (opt1->key->type != opt2->key->type) return -1; if (opt1->key->created.tv_sec != opt2->key->created.tv_sec) return -1; if (opt1->key->created.tv_nsec != opt2->key->created.tv_nsec) return -1; if (opt1->key->len != opt2->key->len) return -1; if (opt1->key->key && !opt2->key->key) return -1; if (!opt1->key->key && opt2->key->key) return 1; if (opt1->key->key && opt2->key->key) { ret = memcmp(opt1->key->key, opt2->key->key, opt1->key->len); if (ret) return ret; } } ret = ceph_compare_crush_locs(&opt1->crush_locs, &opt2->crush_locs); if (ret) return ret; /* any matching mon ip implies a match */ for (i = 0; i < opt1->num_mon; i++) { if (ceph_monmap_contains(client->monc.monmap, &opt1->mon_addr[i])) return 0; } return -1; } EXPORT_SYMBOL(ceph_compare_options); int ceph_parse_fsid(const char *str, struct ceph_fsid *fsid) { int i = 0; char tmp[3]; int err = -EINVAL; int d; dout("%s '%s'\n", __func__, str); tmp[2] = 0; while (*str && i < 16) { if (ispunct(*str)) { str++; continue; } if (!isxdigit(str[0]) || !isxdigit(str[1])) break; tmp[0] = str[0]; tmp[1] = str[1]; if (sscanf(tmp, "%x", &d) < 1) break; fsid->fsid[i] = d & 0xff; i++; str += 2; } if (i == 16) err = 0; dout("%s ret %d got fsid %pU\n", __func__, err, fsid); return err; } EXPORT_SYMBOL(ceph_parse_fsid); /* * ceph options */ enum { Opt_osdkeepalivetimeout, Opt_mount_timeout, Opt_osd_idle_ttl, Opt_osd_request_timeout, /* int args above */ Opt_fsid, Opt_name, Opt_secret, Opt_key, Opt_ip, Opt_crush_location, Opt_read_from_replica, Opt_ms_mode, /* string args above */ Opt_share, Opt_crc, Opt_cephx_require_signatures, Opt_cephx_sign_messages, Opt_tcp_nodelay, Opt_abort_on_full, Opt_rxbounce, }; enum { Opt_read_from_replica_no, Opt_read_from_replica_balance, Opt_read_from_replica_localize, }; static const struct constant_table ceph_param_read_from_replica[] = { {"no", Opt_read_from_replica_no}, {"balance", Opt_read_from_replica_balance}, {"localize", Opt_read_from_replica_localize}, {} }; enum ceph_ms_mode { Opt_ms_mode_legacy, Opt_ms_mode_crc, Opt_ms_mode_secure, Opt_ms_mode_prefer_crc, Opt_ms_mode_prefer_secure }; static const struct constant_table ceph_param_ms_mode[] = { {"legacy", Opt_ms_mode_legacy}, {"crc", Opt_ms_mode_crc}, {"secure", Opt_ms_mode_secure}, {"prefer-crc", Opt_ms_mode_prefer_crc}, {"prefer-secure", Opt_ms_mode_prefer_secure}, {} }; static const struct fs_parameter_spec ceph_parameters[] = { fsparam_flag ("abort_on_full", Opt_abort_on_full), __fsparam (NULL, "cephx_require_signatures", Opt_cephx_require_signatures, fs_param_neg_with_no|fs_param_deprecated, NULL), fsparam_flag_no ("cephx_sign_messages", Opt_cephx_sign_messages), fsparam_flag_no ("crc", Opt_crc), fsparam_string ("crush_location", Opt_crush_location), fsparam_string ("fsid", Opt_fsid), fsparam_string ("ip", Opt_ip), fsparam_string ("key", Opt_key), fsparam_u32 ("mount_timeout", Opt_mount_timeout), fsparam_string ("name", Opt_name), fsparam_u32 ("osd_idle_ttl", Opt_osd_idle_ttl), fsparam_u32 ("osd_request_timeout", Opt_osd_request_timeout), fsparam_u32 ("osdkeepalive", Opt_osdkeepalivetimeout), fsparam_enum ("read_from_replica", Opt_read_from_replica, ceph_param_read_from_replica), fsparam_flag ("rxbounce", Opt_rxbounce), fsparam_enum ("ms_mode", Opt_ms_mode, ceph_param_ms_mode), fsparam_string ("secret", Opt_secret), fsparam_flag_no ("share", Opt_share), fsparam_flag_no ("tcp_nodelay", Opt_tcp_nodelay), {} }; struct ceph_options *ceph_alloc_options(void) { struct ceph_options *opt; opt = kzalloc(sizeof(*opt), GFP_KERNEL); if (!opt) return NULL; opt->crush_locs = RB_ROOT; opt->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*opt->mon_addr), GFP_KERNEL); if (!opt->mon_addr) { kfree(opt); return NULL; } opt->flags = CEPH_OPT_DEFAULT; opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT; opt->read_from_replica = CEPH_READ_FROM_REPLICA_DEFAULT; opt->con_modes[0] = CEPH_CON_MODE_UNKNOWN; opt->con_modes[1] = CEPH_CON_MODE_UNKNOWN; return opt; } EXPORT_SYMBOL(ceph_alloc_options); void ceph_destroy_options(struct ceph_options *opt) { dout("destroy_options %p\n", opt); if (!opt) return; ceph_clear_crush_locs(&opt->crush_locs); kfree(opt->name); if (opt->key) { ceph_crypto_key_destroy(opt->key); kfree(opt->key); } kfree(opt->mon_addr); kfree(opt); } EXPORT_SYMBOL(ceph_destroy_options); /* get secret from key store */ static int get_secret(struct ceph_crypto_key *dst, const char *name, struct p_log *log) { struct key *ukey; int key_err; int err = 0; struct ceph_crypto_key *ckey; ukey = request_key(&key_type_ceph, name, NULL); if (IS_ERR(ukey)) { /* request_key errors don't map nicely to mount(2) errors; don't even try, but still printk */ key_err = PTR_ERR(ukey); switch (key_err) { case -ENOKEY: error_plog(log, "Failed due to key not found: %s", name); break; case -EKEYEXPIRED: error_plog(log, "Failed due to expired key: %s", name); break; case -EKEYREVOKED: error_plog(log, "Failed due to revoked key: %s", name); break; default: error_plog(log, "Failed due to key error %d: %s", key_err, name); } err = -EPERM; goto out; } ckey = ukey->payload.data[0]; err = ceph_crypto_key_clone(dst, ckey); if (err) goto out_key; /* pass through, err is 0 */ out_key: key_put(ukey); out: return err; } int ceph_parse_mon_ips(const char *buf, size_t len, struct ceph_options *opt, struct fc_log *l, char delim) { struct p_log log = {.prefix = "libceph", .log = l}; int ret; /* ip1[:port1][<delim>ip2[:port2]...] */ ret = ceph_parse_ips(buf, buf + len, opt->mon_addr, CEPH_MAX_MON, &opt->num_mon, delim); if (ret) { error_plog(&log, "Failed to parse monitor IPs: %d", ret); return ret; } return 0; } EXPORT_SYMBOL(ceph_parse_mon_ips); int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, struct fc_log *l) { struct fs_parse_result result; int token, err; struct p_log log = {.prefix = "libceph", .log = l}; token = __fs_parse(&log, ceph_parameters, param, &result); dout("%s fs_parse '%s' token %d\n", __func__, param->key, token); if (token < 0) return token; switch (token) { case Opt_ip: err = ceph_parse_ips(param->string, param->string + param->size, &opt->my_addr, 1, NULL, ','); if (err) { error_plog(&log, "Failed to parse ip: %d", err); return err; } opt->flags |= CEPH_OPT_MYIP; break; case Opt_fsid: err = ceph_parse_fsid(param->string, &opt->fsid); if (err) { error_plog(&log, "Failed to parse fsid: %d", err); return err; } opt->flags |= CEPH_OPT_FSID; break; case Opt_name: kfree(opt->name); opt->name = param->string; param->string = NULL; break; case Opt_secret: ceph_crypto_key_destroy(opt->key); kfree(opt->key); opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL); if (!opt->key) return -ENOMEM; err = ceph_crypto_key_unarmor(opt->key, param->string); if (err) { error_plog(&log, "Failed to parse secret: %d", err); return err; } break; case Opt_key: ceph_crypto_key_destroy(opt->key); kfree(opt->key); opt->key = kzalloc(sizeof(*opt->key), GFP_KERNEL); if (!opt->key) return -ENOMEM; return get_secret(opt->key, param->string, &log); case Opt_crush_location: ceph_clear_crush_locs(&opt->crush_locs); err = ceph_parse_crush_location(param->string, &opt->crush_locs); if (err) { error_plog(&log, "Failed to parse CRUSH location: %d", err); return err; } break; case Opt_read_from_replica: switch (result.uint_32) { case Opt_read_from_replica_no: opt->read_from_replica = 0; break; case Opt_read_from_replica_balance: opt->read_from_replica = CEPH_OSD_FLAG_BALANCE_READS; break; case Opt_read_from_replica_localize: opt->read_from_replica = CEPH_OSD_FLAG_LOCALIZE_READS; break; default: BUG(); } break; case Opt_ms_mode: switch (result.uint_32) { case Opt_ms_mode_legacy: opt->con_modes[0] = CEPH_CON_MODE_UNKNOWN; opt->con_modes[1] = CEPH_CON_MODE_UNKNOWN; break; case Opt_ms_mode_crc: opt->con_modes[0] = CEPH_CON_MODE_CRC; opt->con_modes[1] = CEPH_CON_MODE_UNKNOWN; break; case Opt_ms_mode_secure: opt->con_modes[0] = CEPH_CON_MODE_SECURE; opt->con_modes[1] = CEPH_CON_MODE_UNKNOWN; break; case Opt_ms_mode_prefer_crc: opt->con_modes[0] = CEPH_CON_MODE_CRC; opt->con_modes[1] = CEPH_CON_MODE_SECURE; break; case Opt_ms_mode_prefer_secure: opt->con_modes[0] = CEPH_CON_MODE_SECURE; opt->con_modes[1] = CEPH_CON_MODE_CRC; break; default: BUG(); } break; case Opt_osdkeepalivetimeout: /* 0 isn't well defined right now, reject it */ if (result.uint_32 < 1 || result.uint_32 > INT_MAX / 1000) goto out_of_range; opt->osd_keepalive_timeout = msecs_to_jiffies(result.uint_32 * 1000); break; case Opt_osd_idle_ttl: /* 0 isn't well defined right now, reject it */ if (result.uint_32 < 1 || result.uint_32 > INT_MAX / 1000) goto out_of_range; opt->osd_idle_ttl = msecs_to_jiffies(result.uint_32 * 1000); break; case Opt_mount_timeout: /* 0 is "wait forever" (i.e. infinite timeout) */ if (result.uint_32 > INT_MAX / 1000) goto out_of_range; opt->mount_timeout = msecs_to_jiffies(result.uint_32 * 1000); break; case Opt_osd_request_timeout: /* 0 is "wait forever" (i.e. infinite timeout) */ if (result.uint_32 > INT_MAX / 1000) goto out_of_range; opt->osd_request_timeout = msecs_to_jiffies(result.uint_32 * 1000); break; case Opt_share: if (!result.negated) opt->flags &= ~CEPH_OPT_NOSHARE; else opt->flags |= CEPH_OPT_NOSHARE; break; case Opt_crc: if (!result.negated) opt->flags &= ~CEPH_OPT_NOCRC; else opt->flags |= CEPH_OPT_NOCRC; break; case Opt_cephx_require_signatures: if (!result.negated) warn_plog(&log, "Ignoring cephx_require_signatures"); else warn_plog(&log, "Ignoring nocephx_require_signatures, use nocephx_sign_messages"); break; case Opt_cephx_sign_messages: if (!result.negated) opt->flags &= ~CEPH_OPT_NOMSGSIGN; else opt->flags |= CEPH_OPT_NOMSGSIGN; break; case Opt_tcp_nodelay: if (!result.negated) opt->flags |= CEPH_OPT_TCP_NODELAY; else opt->flags &= ~CEPH_OPT_TCP_NODELAY; break; case Opt_abort_on_full: opt->flags |= CEPH_OPT_ABORT_ON_FULL; break; case Opt_rxbounce: opt->flags |= CEPH_OPT_RXBOUNCE; break; default: BUG(); } return 0; out_of_range: return inval_plog(&log, "%s out of range", param->key); } EXPORT_SYMBOL(ceph_parse_param); int ceph_print_client_options(struct seq_file *m, struct ceph_client *client, bool show_all) { struct ceph_options *opt = client->options; size_t pos = m->count; struct rb_node *n; if (opt->name) { seq_puts(m, "name="); seq_escape(m, opt->name, ", \t\n\\"); seq_putc(m, ','); } if (opt->key) seq_puts(m, "secret=<hidden>,"); if (!RB_EMPTY_ROOT(&opt->crush_locs)) { seq_puts(m, "crush_location="); for (n = rb_first(&opt->crush_locs); ; ) { struct crush_loc_node *loc = rb_entry(n, struct crush_loc_node, cl_node); seq_printf(m, "%s:%s", loc->cl_loc.cl_type_name, loc->cl_loc.cl_name); n = rb_next(n); if (!n) break; seq_putc(m, '|'); } seq_putc(m, ','); } if (opt->read_from_replica == CEPH_OSD_FLAG_BALANCE_READS) { seq_puts(m, "read_from_replica=balance,"); } else if (opt->read_from_replica == CEPH_OSD_FLAG_LOCALIZE_READS) { seq_puts(m, "read_from_replica=localize,"); } if (opt->con_modes[0] != CEPH_CON_MODE_UNKNOWN) { if (opt->con_modes[0] == CEPH_CON_MODE_CRC && opt->con_modes[1] == CEPH_CON_MODE_UNKNOWN) { seq_puts(m, "ms_mode=crc,"); } else if (opt->con_modes[0] == CEPH_CON_MODE_SECURE && opt->con_modes[1] == CEPH_CON_MODE_UNKNOWN) { seq_puts(m, "ms_mode=secure,"); } else if (opt->con_modes[0] == CEPH_CON_MODE_CRC && opt->con_modes[1] == CEPH_CON_MODE_SECURE) { seq_puts(m, "ms_mode=prefer-crc,"); } else if (opt->con_modes[0] == CEPH_CON_MODE_SECURE && opt->con_modes[1] == CEPH_CON_MODE_CRC) { seq_puts(m, "ms_mode=prefer-secure,"); } } if (opt->flags & CEPH_OPT_FSID) seq_printf(m, "fsid=%pU,", &opt->fsid); if (opt->flags & CEPH_OPT_NOSHARE) seq_puts(m, "noshare,"); if (opt->flags & CEPH_OPT_NOCRC) seq_puts(m, "nocrc,"); if (opt->flags & CEPH_OPT_NOMSGSIGN) seq_puts(m, "nocephx_sign_messages,"); if ((opt->flags & CEPH_OPT_TCP_NODELAY) == 0) seq_puts(m, "notcp_nodelay,"); if (show_all && (opt->flags & CEPH_OPT_ABORT_ON_FULL)) seq_puts(m, "abort_on_full,"); if (opt->flags & CEPH_OPT_RXBOUNCE) seq_puts(m, "rxbounce,"); if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) seq_printf(m, "mount_timeout=%d,", jiffies_to_msecs(opt->mount_timeout) / 1000); if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) seq_printf(m, "osd_idle_ttl=%d,", jiffies_to_msecs(opt->osd_idle_ttl) / 1000); if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) seq_printf(m, "osdkeepalivetimeout=%d,", jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000); if (opt->osd_request_timeout != CEPH_OSD_REQUEST_TIMEOUT_DEFAULT) seq_printf(m, "osd_request_timeout=%d,", jiffies_to_msecs(opt->osd_request_timeout) / 1000); /* drop redundant comma */ if (m->count != pos) m->count--; return 0; } EXPORT_SYMBOL(ceph_print_client_options); struct ceph_entity_addr *ceph_client_addr(struct ceph_client *client) { return &client->msgr.inst.addr; } EXPORT_SYMBOL(ceph_client_addr); u64 ceph_client_gid(struct ceph_client *client) { return client->monc.auth->global_id; } EXPORT_SYMBOL(ceph_client_gid); /* * create a fresh client instance */ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private) { struct ceph_client *client; struct ceph_entity_addr *myaddr = NULL; int err; err = wait_for_random_bytes(); if (err < 0) return ERR_PTR(err); client = kzalloc(sizeof(*client), GFP_KERNEL); if (client == NULL) return ERR_PTR(-ENOMEM); client->private = private; client->options = opt; mutex_init(&client->mount_mutex); init_waitqueue_head(&client->auth_wq); client->auth_err = 0; client->extra_mon_dispatch = NULL; client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT; client->required_features = CEPH_FEATURES_REQUIRED_DEFAULT; if (!ceph_test_opt(client, NOMSGSIGN)) client->required_features |= CEPH_FEATURE_MSG_AUTH; /* msgr */ if (ceph_test_opt(client, MYIP)) myaddr = &client->options->my_addr; ceph_messenger_init(&client->msgr, myaddr); /* subsystems */ err = ceph_monc_init(&client->monc, client); if (err < 0) goto fail; err = ceph_osdc_init(&client->osdc, client); if (err < 0) goto fail_monc; return client; fail_monc: ceph_monc_stop(&client->monc); fail: ceph_messenger_fini(&client->msgr); kfree(client); return ERR_PTR(err); } EXPORT_SYMBOL(ceph_create_client); void ceph_destroy_client(struct ceph_client *client) { dout("destroy_client %p\n", client); atomic_set(&client->msgr.stopping, 1); /* unmount */ ceph_osdc_stop(&client->osdc); ceph_monc_stop(&client->monc); ceph_messenger_fini(&client->msgr); ceph_debugfs_client_cleanup(client); ceph_destroy_options(client->options); kfree(client); dout("destroy_client %p done\n", client); } EXPORT_SYMBOL(ceph_destroy_client); void ceph_reset_client_addr(struct ceph_client *client) { ceph_messenger_reset_nonce(&client->msgr); ceph_monc_reopen_session(&client->monc); ceph_osdc_reopen_osds(&client->osdc); } EXPORT_SYMBOL(ceph_reset_client_addr); /* * true if we have the mon map (and have thus joined the cluster) */ static bool have_mon_and_osd_map(struct ceph_client *client) { return client->monc.monmap && client->monc.monmap->epoch && client->osdc.osdmap && client->osdc.osdmap->epoch; } /* * mount: join the ceph cluster, and open root directory. */ int __ceph_open_session(struct ceph_client *client, unsigned long started) { unsigned long timeout = client->options->mount_timeout; long err; /* open session, and wait for mon and osd maps */ err = ceph_monc_open_session(&client->monc); if (err < 0) return err; while (!have_mon_and_osd_map(client)) { if (timeout && time_after_eq(jiffies, started + timeout)) return -ETIMEDOUT; /* wait */ dout("mount waiting for mon_map\n"); err = wait_event_interruptible_timeout(client->auth_wq, have_mon_and_osd_map(client) || (client->auth_err < 0), ceph_timeout_jiffies(timeout)); if (err < 0) return err; if (client->auth_err < 0) return client->auth_err; } pr_info("client%llu fsid %pU\n", ceph_client_gid(client), &client->fsid); ceph_debugfs_client_init(client); return 0; } EXPORT_SYMBOL(__ceph_open_session); int ceph_open_session(struct ceph_client *client) { int ret; unsigned long started = jiffies; /* note the start time */ dout("open_session start\n"); mutex_lock(&client->mount_mutex); ret = __ceph_open_session(client, started); mutex_unlock(&client->mount_mutex); return ret; } EXPORT_SYMBOL(ceph_open_session); int ceph_wait_for_latest_osdmap(struct ceph_client *client, unsigned long timeout) { u64 newest_epoch; int ret; ret = ceph_monc_get_version(&client->monc, "osdmap", &newest_epoch); if (ret) return ret; if (client->osdc.osdmap->epoch >= newest_epoch) return 0; ceph_osdc_maybe_request_map(&client->osdc); return ceph_monc_wait_osdmap(&client->monc, newest_epoch, timeout); } EXPORT_SYMBOL(ceph_wait_for_latest_osdmap); static int __init init_ceph_lib(void) { int ret = 0; ceph_debugfs_init(); ret = ceph_crypto_init(); if (ret < 0) goto out_debugfs; ret = ceph_msgr_init(); if (ret < 0) goto out_crypto; ret = ceph_osdc_setup(); if (ret < 0) goto out_msgr; pr_info("loaded (mon/osd proto %d/%d)\n", CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL); return 0; out_msgr: ceph_msgr_exit(); out_crypto: ceph_crypto_shutdown(); out_debugfs: ceph_debugfs_cleanup(); return ret; } static void __exit exit_ceph_lib(void) { dout("exit_ceph_lib\n"); WARN_ON(!ceph_strings_empty()); ceph_osdc_cleanup(); ceph_msgr_exit(); ceph_crypto_shutdown(); ceph_debugfs_cleanup(); } module_init(init_ceph_lib); module_exit(exit_ceph_lib); MODULE_AUTHOR("Sage Weil <sage@newdream.net>"); MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>"); MODULE_AUTHOR("Patience Warnick <patience@newdream.net>"); MODULE_DESCRIPTION("Ceph core library"); MODULE_LICENSE("GPL"); |
| 33 289 289 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Checksumming functions for IPv6 * * Authors: Jorge Cwik, <jorge@laser.satlink.net> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> * Borrows very liberally from tcp.c and ip.c, see those * files for more names. */ /* * Fixes: * * Ralf Baechle : generic ipv6 checksum * <ralf@waldorf-gmbh.de> */ #ifndef _CHECKSUM_IPV6_H #define _CHECKSUM_IPV6_H #include <asm/types.h> #include <asm/byteorder.h> #include <net/ip.h> #include <asm/checksum.h> #include <linux/in6.h> #include <linux/tcp.h> #include <linux/ipv6.h> #ifndef _HAVE_ARCH_IPV6_CSUM __sum16 csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr, __u32 len, __u8 proto, __wsum csum); #endif static inline __wsum ip6_compute_pseudo(struct sk_buff *skb, int proto) { return ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->len, proto, 0)); } static __inline__ __sum16 tcp_v6_check(int len, const struct in6_addr *saddr, const struct in6_addr *daddr, __wsum base) { return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base); } static inline void __tcp_v6_send_check(struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr) { struct tcphdr *th = tcp_hdr(skb); th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0); skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct tcphdr, check); } static inline void tcp_v6_gso_csum_prep(struct sk_buff *skb) { struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct tcphdr *th = tcp_hdr(skb); ipv6h->payload_len = 0; th->check = ~tcp_v6_check(0, &ipv6h->saddr, &ipv6h->daddr, 0); } static inline __sum16 udp_v6_check(int len, const struct in6_addr *saddr, const struct in6_addr *daddr, __wsum base) { return csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, base); } void udp6_set_csum(bool nocheck, struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr, int len); int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto); #endif |
| 14 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __KVM_IO_APIC_H #define __KVM_IO_APIC_H #include <linux/kvm_host.h> #include <kvm/iodev.h> #include "irq.h" struct kvm; struct kvm_vcpu; #define IOAPIC_NUM_PINS KVM_IOAPIC_NUM_PINS #define MAX_NR_RESERVED_IOAPIC_PINS KVM_MAX_IRQ_ROUTES #define IOAPIC_VERSION_ID 0x11 /* IOAPIC version */ #define IOAPIC_EDGE_TRIG 0 #define IOAPIC_LEVEL_TRIG 1 #define IOAPIC_DEFAULT_BASE_ADDRESS 0xfec00000 #define IOAPIC_MEM_LENGTH 0x100 /* Direct registers. */ #define IOAPIC_REG_SELECT 0x00 #define IOAPIC_REG_WINDOW 0x10 /* Indirect registers. */ #define IOAPIC_REG_APIC_ID 0x00 /* x86 IOAPIC only */ #define IOAPIC_REG_VERSION 0x01 #define IOAPIC_REG_ARB_ID 0x02 /* x86 IOAPIC only */ /*ioapic delivery mode*/ #define IOAPIC_FIXED 0x0 #define IOAPIC_LOWEST_PRIORITY 0x1 #define IOAPIC_PMI 0x2 #define IOAPIC_NMI 0x4 #define IOAPIC_INIT 0x5 #define IOAPIC_EXTINT 0x7 #define RTC_GSI 8 struct dest_map { /* vcpu bitmap where IRQ has been sent */ DECLARE_BITMAP(map, KVM_MAX_VCPU_IDS); /* * Vector sent to a given vcpu, only valid when * the vcpu's bit in map is set */ u8 vectors[KVM_MAX_VCPU_IDS]; }; struct rtc_status { int pending_eoi; struct dest_map dest_map; }; union kvm_ioapic_redirect_entry { u64 bits; struct { u8 vector; u8 delivery_mode:3; u8 dest_mode:1; u8 delivery_status:1; u8 polarity:1; u8 remote_irr:1; u8 trig_mode:1; u8 mask:1; u8 reserve:7; u8 reserved[4]; u8 dest_id; } fields; }; struct kvm_ioapic { u64 base_address; u32 ioregsel; u32 id; u32 irr; u32 pad; union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS]; unsigned long irq_states[IOAPIC_NUM_PINS]; struct kvm_io_device dev; struct kvm *kvm; spinlock_t lock; struct rtc_status rtc_status; struct delayed_work eoi_inject; u32 irq_eoi[IOAPIC_NUM_PINS]; u32 irr_delivered; }; #ifdef DEBUG #define ASSERT(x) \ do { \ if (!(x)) { \ printk(KERN_EMERG "assertion failed %s: %d: %s\n", \ __FILE__, __LINE__, #x); \ BUG(); \ } \ } while (0) #else #define ASSERT(x) do { } while (0) #endif static inline int ioapic_in_kernel(struct kvm *kvm) { return irqchip_kernel(kvm); } void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode); int kvm_ioapic_init(struct kvm *kvm); void kvm_ioapic_destroy(struct kvm *kvm); int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, int level, bool line_status); void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors); void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors); #endif |
| 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 21 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 | // SPDX-License-Identifier: GPL-2.0-only /* * net/sunrpc/cache.c * * Generic code for various authentication-related caches * used by sunrpc clients and servers. * * Copyright (C) 2002 Neil Brown <neilb@cse.unsw.edu.au> */ #include <linux/types.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/slab.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/kmod.h> #include <linux/list.h> #include <linux/module.h> #include <linux/ctype.h> #include <linux/string_helpers.h> #include <linux/uaccess.h> #include <linux/poll.h> #include <linux/seq_file.h> #include <linux/proc_fs.h> #include <linux/net.h> #include <linux/workqueue.h> #include <linux/mutex.h> #include <linux/pagemap.h> #include <asm/ioctls.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/cache.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/rpc_pipe_fs.h> #include <trace/events/sunrpc.h> #include "netns.h" #include "fail.h" #define RPCDBG_FACILITY RPCDBG_CACHE static bool cache_defer_req(struct cache_req *req, struct cache_head *item); static void cache_revisit_request(struct cache_head *item); static void cache_init(struct cache_head *h, struct cache_detail *detail) { time64_t now = seconds_since_boot(); INIT_HLIST_NODE(&h->cache_list); h->flags = 0; kref_init(&h->ref); h->expiry_time = now + CACHE_NEW_EXPIRY; if (now <= detail->flush_time) /* ensure it isn't already expired */ now = detail->flush_time + 1; h->last_refresh = now; } static void cache_fresh_unlocked(struct cache_head *head, struct cache_detail *detail); static struct cache_head *sunrpc_cache_find_rcu(struct cache_detail *detail, struct cache_head *key, int hash) { struct hlist_head *head = &detail->hash_table[hash]; struct cache_head *tmp; rcu_read_lock(); hlist_for_each_entry_rcu(tmp, head, cache_list) { if (!detail->match(tmp, key)) continue; if (test_bit(CACHE_VALID, &tmp->flags) && cache_is_expired(detail, tmp)) continue; tmp = cache_get_rcu(tmp); rcu_read_unlock(); return tmp; } rcu_read_unlock(); return NULL; } static void sunrpc_begin_cache_remove_entry(struct cache_head *ch, struct cache_detail *cd) { /* Must be called under cd->hash_lock */ hlist_del_init_rcu(&ch->cache_list); set_bit(CACHE_CLEANED, &ch->flags); cd->entries --; } static void sunrpc_end_cache_remove_entry(struct cache_head *ch, struct cache_detail *cd) { cache_fresh_unlocked(ch, cd); cache_put(ch, cd); } static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail, struct cache_head *key, int hash) { struct cache_head *new, *tmp, *freeme = NULL; struct hlist_head *head = &detail->hash_table[hash]; new = detail->alloc(); if (!new) return NULL; /* must fully initialise 'new', else * we might get lose if we need to * cache_put it soon. */ cache_init(new, detail); detail->init(new, key); spin_lock(&detail->hash_lock); /* check if entry appeared while we slept */ hlist_for_each_entry_rcu(tmp, head, cache_list, lockdep_is_held(&detail->hash_lock)) { if (!detail->match(tmp, key)) continue; if (test_bit(CACHE_VALID, &tmp->flags) && cache_is_expired(detail, tmp)) { sunrpc_begin_cache_remove_entry(tmp, detail); trace_cache_entry_expired(detail, tmp); freeme = tmp; break; } cache_get(tmp); spin_unlock(&detail->hash_lock); cache_put(new, detail); return tmp; } hlist_add_head_rcu(&new->cache_list, head); detail->entries++; cache_get(new); spin_unlock(&detail->hash_lock); if (freeme) sunrpc_end_cache_remove_entry(freeme, detail); return new; } struct cache_head *sunrpc_cache_lookup_rcu(struct cache_detail *detail, struct cache_head *key, int hash) { struct cache_head *ret; ret = sunrpc_cache_find_rcu(detail, key, hash); if (ret) return ret; /* Didn't find anything, insert an empty entry */ return sunrpc_cache_add_entry(detail, key, hash); } EXPORT_SYMBOL_GPL(sunrpc_cache_lookup_rcu); static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch); static void cache_fresh_locked(struct cache_head *head, time64_t expiry, struct cache_detail *detail) { time64_t now = seconds_since_boot(); if (now <= detail->flush_time) /* ensure it isn't immediately treated as expired */ now = detail->flush_time + 1; head->expiry_time = expiry; head->last_refresh = now; smp_wmb(); /* paired with smp_rmb() in cache_is_valid() */ set_bit(CACHE_VALID, &head->flags); } static void cache_fresh_unlocked(struct cache_head *head, struct cache_detail *detail) { if (test_and_clear_bit(CACHE_PENDING, &head->flags)) { cache_revisit_request(head); cache_dequeue(detail, head); } } static void cache_make_negative(struct cache_detail *detail, struct cache_head *h) { set_bit(CACHE_NEGATIVE, &h->flags); trace_cache_entry_make_negative(detail, h); } static void cache_entry_update(struct cache_detail *detail, struct cache_head *h, struct cache_head *new) { if (!test_bit(CACHE_NEGATIVE, &new->flags)) { detail->update(h, new); trace_cache_entry_update(detail, h); } else { cache_make_negative(detail, h); } } struct cache_head *sunrpc_cache_update(struct cache_detail *detail, struct cache_head *new, struct cache_head *old, int hash) { /* The 'old' entry is to be replaced by 'new'. * If 'old' is not VALID, we update it directly, * otherwise we need to replace it */ struct cache_head *tmp; if (!test_bit(CACHE_VALID, &old->flags)) { spin_lock(&detail->hash_lock); if (!test_bit(CACHE_VALID, &old->flags)) { cache_entry_update(detail, old, new); cache_fresh_locked(old, new->expiry_time, detail); spin_unlock(&detail->hash_lock); cache_fresh_unlocked(old, detail); return old; } spin_unlock(&detail->hash_lock); } /* We need to insert a new entry */ tmp = detail->alloc(); if (!tmp) { cache_put(old, detail); return NULL; } cache_init(tmp, detail); detail->init(tmp, old); spin_lock(&detail->hash_lock); cache_entry_update(detail, tmp, new); hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]); detail->entries++; cache_get(tmp); cache_fresh_locked(tmp, new->expiry_time, detail); cache_fresh_locked(old, 0, detail); spin_unlock(&detail->hash_lock); cache_fresh_unlocked(tmp, detail); cache_fresh_unlocked(old, detail); cache_put(old, detail); return tmp; } EXPORT_SYMBOL_GPL(sunrpc_cache_update); static inline int cache_is_valid(struct cache_head *h) { if (!test_bit(CACHE_VALID, &h->flags)) return -EAGAIN; else { /* entry is valid */ if (test_bit(CACHE_NEGATIVE, &h->flags)) return -ENOENT; else { /* * In combination with write barrier in * sunrpc_cache_update, ensures that anyone * using the cache entry after this sees the * updated contents: */ smp_rmb(); return 0; } } } static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h) { int rv; spin_lock(&detail->hash_lock); rv = cache_is_valid(h); if (rv == -EAGAIN) { cache_make_negative(detail, h); cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY, detail); rv = -ENOENT; } spin_unlock(&detail->hash_lock); cache_fresh_unlocked(h, detail); return rv; } /* * This is the generic cache management routine for all * the authentication caches. * It checks the currency of a cache item and will (later) * initiate an upcall to fill it if needed. * * * Returns 0 if the cache_head can be used, or cache_puts it and returns * -EAGAIN if upcall is pending and request has been queued * -ETIMEDOUT if upcall failed or request could not be queue or * upcall completed but item is still invalid (implying that * the cache item has been replaced with a newer one). * -ENOENT if cache entry was negative */ int cache_check(struct cache_detail *detail, struct cache_head *h, struct cache_req *rqstp) { int rv; time64_t refresh_age, age; /* First decide return status as best we can */ rv = cache_is_valid(h); /* now see if we want to start an upcall */ refresh_age = (h->expiry_time - h->last_refresh); age = seconds_since_boot() - h->last_refresh; if (rqstp == NULL) { if (rv == -EAGAIN) rv = -ENOENT; } else if (rv == -EAGAIN || (h->expiry_time != 0 && age > refresh_age/2)) { dprintk("RPC: Want update, refage=%lld, age=%lld\n", refresh_age, age); switch (detail->cache_upcall(detail, h)) { case -EINVAL: rv = try_to_negate_entry(detail, h); break; case -EAGAIN: cache_fresh_unlocked(h, detail); break; } } if (rv == -EAGAIN) { if (!cache_defer_req(rqstp, h)) { /* * Request was not deferred; handle it as best * we can ourselves: */ rv = cache_is_valid(h); if (rv == -EAGAIN) rv = -ETIMEDOUT; } } if (rv) cache_put(h, detail); return rv; } EXPORT_SYMBOL_GPL(cache_check); /* * caches need to be periodically cleaned. * For this we maintain a list of cache_detail and * a current pointer into that list and into the table * for that entry. * * Each time cache_clean is called it finds the next non-empty entry * in the current table and walks the list in that entry * looking for entries that can be removed. * * An entry gets removed if: * - The expiry is before current time * - The last_refresh time is before the flush_time for that cache * * later we might drop old entries with non-NEVER expiry if that table * is getting 'full' for some definition of 'full' * * The question of "how often to scan a table" is an interesting one * and is answered in part by the use of the "nextcheck" field in the * cache_detail. * When a scan of a table begins, the nextcheck field is set to a time * that is well into the future. * While scanning, if an expiry time is found that is earlier than the * current nextcheck time, nextcheck is set to that expiry time. * If the flush_time is ever set to a time earlier than the nextcheck * time, the nextcheck time is then set to that flush_time. * * A table is then only scanned if the current time is at least * the nextcheck time. * */ static LIST_HEAD(cache_list); static DEFINE_SPINLOCK(cache_list_lock); static struct cache_detail *current_detail; static int current_index; static void do_cache_clean(struct work_struct *work); static struct delayed_work cache_cleaner; void sunrpc_init_cache_detail(struct cache_detail *cd) { spin_lock_init(&cd->hash_lock); INIT_LIST_HEAD(&cd->queue); spin_lock(&cache_list_lock); cd->nextcheck = 0; cd->entries = 0; atomic_set(&cd->writers, 0); cd->last_close = 0; cd->last_warn = -1; list_add(&cd->others, &cache_list); spin_unlock(&cache_list_lock); /* start the cleaning process */ queue_delayed_work(system_power_efficient_wq, &cache_cleaner, 0); } EXPORT_SYMBOL_GPL(sunrpc_init_cache_detail); void sunrpc_destroy_cache_detail(struct cache_detail *cd) { cache_purge(cd); spin_lock(&cache_list_lock); spin_lock(&cd->hash_lock); if (current_detail == cd) current_detail = NULL; list_del_init(&cd->others); spin_unlock(&cd->hash_lock); spin_unlock(&cache_list_lock); if (list_empty(&cache_list)) { /* module must be being unloaded so its safe to kill the worker */ cancel_delayed_work_sync(&cache_cleaner); } } EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail); /* clean cache tries to find something to clean * and cleans it. * It returns 1 if it cleaned something, * 0 if it didn't find anything this time * -1 if it fell off the end of the list. */ static int cache_clean(void) { int rv = 0; struct list_head *next; spin_lock(&cache_list_lock); /* find a suitable table if we don't already have one */ while (current_detail == NULL || current_index >= current_detail->hash_size) { if (current_detail) next = current_detail->others.next; else next = cache_list.next; if (next == &cache_list) { current_detail = NULL; spin_unlock(&cache_list_lock); return -1; } current_detail = list_entry(next, struct cache_detail, others); if (current_detail->nextcheck > seconds_since_boot()) current_index = current_detail->hash_size; else { current_index = 0; current_detail->nextcheck = seconds_since_boot()+30*60; } } /* find a non-empty bucket in the table */ while (current_detail && current_index < current_detail->hash_size && hlist_empty(¤t_detail->hash_table[current_index])) current_index++; /* find a cleanable entry in the bucket and clean it, or set to next bucket */ if (current_detail && current_index < current_detail->hash_size) { struct cache_head *ch = NULL; struct cache_detail *d; struct hlist_head *head; struct hlist_node *tmp; spin_lock(¤t_detail->hash_lock); /* Ok, now to clean this strand */ head = ¤t_detail->hash_table[current_index]; hlist_for_each_entry_safe(ch, tmp, head, cache_list) { if (current_detail->nextcheck > ch->expiry_time) current_detail->nextcheck = ch->expiry_time+1; if (!cache_is_expired(current_detail, ch)) continue; sunrpc_begin_cache_remove_entry(ch, current_detail); trace_cache_entry_expired(current_detail, ch); rv = 1; break; } spin_unlock(¤t_detail->hash_lock); d = current_detail; if (!ch) current_index ++; spin_unlock(&cache_list_lock); if (ch) sunrpc_end_cache_remove_entry(ch, d); } else spin_unlock(&cache_list_lock); return rv; } /* * We want to regularly clean the cache, so we need to schedule some work ... */ static void do_cache_clean(struct work_struct *work) { int delay; if (list_empty(&cache_list)) return; if (cache_clean() == -1) delay = round_jiffies_relative(30*HZ); else delay = 5; queue_delayed_work(system_power_efficient_wq, &cache_cleaner, delay); } /* * Clean all caches promptly. This just calls cache_clean * repeatedly until we are sure that every cache has had a chance to * be fully cleaned */ void cache_flush(void) { while (cache_clean() != -1) cond_resched(); while (cache_clean() != -1) cond_resched(); } EXPORT_SYMBOL_GPL(cache_flush); void cache_purge(struct cache_detail *detail) { struct cache_head *ch = NULL; struct hlist_head *head = NULL; int i = 0; spin_lock(&detail->hash_lock); if (!detail->entries) { spin_unlock(&detail->hash_lock); return; } dprintk("RPC: %d entries in %s cache\n", detail->entries, detail->name); for (i = 0; i < detail->hash_size; i++) { head = &detail->hash_table[i]; while (!hlist_empty(head)) { ch = hlist_entry(head->first, struct cache_head, cache_list); sunrpc_begin_cache_remove_entry(ch, detail); spin_unlock(&detail->hash_lock); sunrpc_end_cache_remove_entry(ch, detail); spin_lock(&detail->hash_lock); } } spin_unlock(&detail->hash_lock); } EXPORT_SYMBOL_GPL(cache_purge); /* * Deferral and Revisiting of Requests. * * If a cache lookup finds a pending entry, we * need to defer the request and revisit it later. * All deferred requests are stored in a hash table, * indexed by "struct cache_head *". * As it may be wasteful to store a whole request * structure, we allow the request to provide a * deferred form, which must contain a * 'struct cache_deferred_req' * This cache_deferred_req contains a method to allow * it to be revisited when cache info is available */ #define DFR_HASHSIZE (PAGE_SIZE/sizeof(struct list_head)) #define DFR_HASH(item) ((((long)item)>>4 ^ (((long)item)>>13)) % DFR_HASHSIZE) #define DFR_MAX 300 /* ??? */ static DEFINE_SPINLOCK(cache_defer_lock); static LIST_HEAD(cache_defer_list); static struct hlist_head cache_defer_hash[DFR_HASHSIZE]; static int cache_defer_cnt; static void __unhash_deferred_req(struct cache_deferred_req *dreq) { hlist_del_init(&dreq->hash); if (!list_empty(&dreq->recent)) { list_del_init(&dreq->recent); cache_defer_cnt--; } } static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_head *item) { int hash = DFR_HASH(item); INIT_LIST_HEAD(&dreq->recent); hlist_add_head(&dreq->hash, &cache_defer_hash[hash]); } static void setup_deferral(struct cache_deferred_req *dreq, struct cache_head *item, int count_me) { dreq->item = item; spin_lock(&cache_defer_lock); __hash_deferred_req(dreq, item); if (count_me) { cache_defer_cnt++; list_add(&dreq->recent, &cache_defer_list); } spin_unlock(&cache_defer_lock); } struct thread_deferred_req { struct cache_deferred_req handle; struct completion completion; }; static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many) { struct thread_deferred_req *dr = container_of(dreq, struct thread_deferred_req, handle); complete(&dr->completion); } static void cache_wait_req(struct cache_req *req, struct cache_head *item) { struct thread_deferred_req sleeper; struct cache_deferred_req *dreq = &sleeper.handle; sleeper.completion = COMPLETION_INITIALIZER_ONSTACK(sleeper.completion); dreq->revisit = cache_restart_thread; setup_deferral(dreq, item, 0); if (!test_bit(CACHE_PENDING, &item->flags) || wait_for_completion_interruptible_timeout( &sleeper.completion, req->thread_wait) <= 0) { /* The completion wasn't completed, so we need * to clean up */ spin_lock(&cache_defer_lock); if (!hlist_unhashed(&sleeper.handle.hash)) { __unhash_deferred_req(&sleeper.handle); spin_unlock(&cache_defer_lock); } else { /* cache_revisit_request already removed * this from the hash table, but hasn't * called ->revisit yet. It will very soon * and we need to wait for it. */ spin_unlock(&cache_defer_lock); wait_for_completion(&sleeper.completion); } } } static void cache_limit_defers(void) { /* Make sure we haven't exceed the limit of allowed deferred * requests. */ struct cache_deferred_req *discard = NULL; if (cache_defer_cnt <= DFR_MAX) return; spin_lock(&cache_defer_lock); /* Consider removing either the first or the last */ if (cache_defer_cnt > DFR_MAX) { if (get_random_u32_below(2)) discard = list_entry(cache_defer_list.next, struct cache_deferred_req, recent); else discard = list_entry(cache_defer_list.prev, struct cache_deferred_req, recent); __unhash_deferred_req(discard); } spin_unlock(&cache_defer_lock); if (discard) discard->revisit(discard, 1); } #if IS_ENABLED(CONFIG_FAIL_SUNRPC) static inline bool cache_defer_immediately(void) { return !fail_sunrpc.ignore_cache_wait && should_fail(&fail_sunrpc.attr, 1); } #else static inline bool cache_defer_immediately(void) { return false; } #endif /* Return true if and only if a deferred request is queued. */ static bool cache_defer_req(struct cache_req *req, struct cache_head *item) { struct cache_deferred_req *dreq; if (!cache_defer_immediately()) { cache_wait_req(req, item); if (!test_bit(CACHE_PENDING, &item->flags)) return false; } dreq = req->defer(req); if (dreq == NULL) return false; setup_deferral(dreq, item, 1); if (!test_bit(CACHE_PENDING, &item->flags)) /* Bit could have been cleared before we managed to * set up the deferral, so need to revisit just in case */ cache_revisit_request(item); cache_limit_defers(); return true; } static void cache_revisit_request(struct cache_head *item) { struct cache_deferred_req *dreq; struct list_head pending; struct hlist_node *tmp; int hash = DFR_HASH(item); INIT_LIST_HEAD(&pending); spin_lock(&cache_defer_lock); hlist_for_each_entry_safe(dreq, tmp, &cache_defer_hash[hash], hash) if (dreq->item == item) { __unhash_deferred_req(dreq); list_add(&dreq->recent, &pending); } spin_unlock(&cache_defer_lock); while (!list_empty(&pending)) { dreq = list_entry(pending.next, struct cache_deferred_req, recent); list_del_init(&dreq->recent); dreq->revisit(dreq, 0); } } void cache_clean_deferred(void *owner) { struct cache_deferred_req *dreq, *tmp; struct list_head pending; INIT_LIST_HEAD(&pending); spin_lock(&cache_defer_lock); list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) { if (dreq->owner == owner) { __unhash_deferred_req(dreq); list_add(&dreq->recent, &pending); } } spin_unlock(&cache_defer_lock); while (!list_empty(&pending)) { dreq = list_entry(pending.next, struct cache_deferred_req, recent); list_del_init(&dreq->recent); dreq->revisit(dreq, 1); } } /* * communicate with user-space * * We have a magic /proc file - /proc/net/rpc/<cachename>/channel. * On read, you get a full request, or block. * On write, an update request is processed. * Poll works if anything to read, and always allows write. * * Implemented by linked list of requests. Each open file has * a ->private that also exists in this list. New requests are added * to the end and may wakeup and preceding readers. * New readers are added to the head. If, on read, an item is found with * CACHE_UPCALLING clear, we free it from the list. * */ static DEFINE_SPINLOCK(queue_lock); struct cache_queue { struct list_head list; int reader; /* if 0, then request */ }; struct cache_request { struct cache_queue q; struct cache_head *item; char * buf; int len; int readers; }; struct cache_reader { struct cache_queue q; int offset; /* if non-0, we have a refcnt on next request */ }; static int cache_request(struct cache_detail *detail, struct cache_request *crq) { char *bp = crq->buf; int len = PAGE_SIZE; detail->cache_request(detail, crq->item, &bp, &len); if (len < 0) return -E2BIG; return PAGE_SIZE - len; } static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos, struct cache_detail *cd) { struct cache_reader *rp = filp->private_data; struct cache_request *rq; struct inode *inode = file_inode(filp); int err; if (count == 0) return 0; inode_lock(inode); /* protect against multiple concurrent * readers on this file */ again: spin_lock(&queue_lock); /* need to find next request */ while (rp->q.list.next != &cd->queue && list_entry(rp->q.list.next, struct cache_queue, list) ->reader) { struct list_head *next = rp->q.list.next; list_move(&rp->q.list, next); } if (rp->q.list.next == &cd->queue) { spin_unlock(&queue_lock); inode_unlock(inode); WARN_ON_ONCE(rp->offset); return 0; } rq = container_of(rp->q.list.next, struct cache_request, q.list); WARN_ON_ONCE(rq->q.reader); if (rp->offset == 0) rq->readers++; spin_unlock(&queue_lock); if (rq->len == 0) { err = cache_request(cd, rq); if (err < 0) goto out; rq->len = err; } if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) { err = -EAGAIN; spin_lock(&queue_lock); list_move(&rp->q.list, &rq->q.list); spin_unlock(&queue_lock); } else { if (rp->offset + count > rq->len) count = rq->len - rp->offset; err = -EFAULT; if (copy_to_user(buf, rq->buf + rp->offset, count)) goto out; rp->offset += count; if (rp->offset >= rq->len) { rp->offset = 0; spin_lock(&queue_lock); list_move(&rp->q.list, &rq->q.list); spin_unlock(&queue_lock); } err = 0; } out: if (rp->offset == 0) { /* need to release rq */ spin_lock(&queue_lock); rq->readers--; if (rq->readers == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) { list_del(&rq->q.list); spin_unlock(&queue_lock); cache_put(rq->item, cd); kfree(rq->buf); kfree(rq); } else spin_unlock(&queue_lock); } if (err == -EAGAIN) goto again; inode_unlock(inode); return err ? err : count; } static ssize_t cache_do_downcall(char *kaddr, const char __user *buf, size_t count, struct cache_detail *cd) { ssize_t ret; if (count == 0) return -EINVAL; if (copy_from_user(kaddr, buf, count)) return -EFAULT; kaddr[count] = '\0'; ret = cd->cache_parse(cd, kaddr, count); if (!ret) ret = count; return ret; } static ssize_t cache_downcall(struct address_space *mapping, const char __user *buf, size_t count, struct cache_detail *cd) { char *write_buf; ssize_t ret = -ENOMEM; if (count >= 32768) { /* 32k is max userland buffer, lets check anyway */ ret = -EINVAL; goto out; } write_buf = kvmalloc(count + 1, GFP_KERNEL); if (!write_buf) goto out; ret = cache_do_downcall(write_buf, buf, count, cd); kvfree(write_buf); out: return ret; } static ssize_t cache_write(struct file *filp, const char __user *buf, size_t count, loff_t *ppos, struct cache_detail *cd) { struct address_space *mapping = filp->f_mapping; struct inode *inode = file_inode(filp); ssize_t ret = -EINVAL; if (!cd->cache_parse) goto out; inode_lock(inode); ret = cache_downcall(mapping, buf, count, cd); inode_unlock(inode); out: return ret; } static DECLARE_WAIT_QUEUE_HEAD(queue_wait); static __poll_t cache_poll(struct file *filp, poll_table *wait, struct cache_detail *cd) { __poll_t mask; struct cache_reader *rp = filp->private_data; struct cache_queue *cq; poll_wait(filp, &queue_wait, wait); /* alway allow write */ mask = EPOLLOUT | EPOLLWRNORM; if (!rp) return mask; spin_lock(&queue_lock); for (cq= &rp->q; &cq->list != &cd->queue; cq = list_entry(cq->list.next, struct cache_queue, list)) if (!cq->reader) { mask |= EPOLLIN | EPOLLRDNORM; break; } spin_unlock(&queue_lock); return mask; } static int cache_ioctl(struct inode *ino, struct file *filp, unsigned int cmd, unsigned long arg, struct cache_detail *cd) { int len = 0; struct cache_reader *rp = filp->private_data; struct cache_queue *cq; if (cmd != FIONREAD || !rp) return -EINVAL; spin_lock(&queue_lock); /* only find the length remaining in current request, * or the length of the next request */ for (cq= &rp->q; &cq->list != &cd->queue; cq = list_entry(cq->list.next, struct cache_queue, list)) if (!cq->reader) { struct cache_request *cr = container_of(cq, struct cache_request, q); len = cr->len - rp->offset; break; } spin_unlock(&queue_lock); return put_user(len, (int __user *)arg); } static int cache_open(struct inode *inode, struct file *filp, struct cache_detail *cd) { struct cache_reader *rp = NULL; if (!cd || !try_module_get(cd->owner)) return -EACCES; nonseekable_open(inode, filp); if (filp->f_mode & FMODE_READ) { rp = kmalloc(sizeof(*rp), GFP_KERNEL); if (!rp) { module_put(cd->owner); return -ENOMEM; } rp->offset = 0; rp->q.reader = 1; spin_lock(&queue_lock); list_add(&rp->q.list, &cd->queue); spin_unlock(&queue_lock); } if (filp->f_mode & FMODE_WRITE) atomic_inc(&cd->writers); filp->private_data = rp; return 0; } static int cache_release(struct inode *inode, struct file *filp, struct cache_detail *cd) { struct cache_reader *rp = filp->private_data; if (rp) { spin_lock(&queue_lock); if (rp->offset) { struct cache_queue *cq; for (cq= &rp->q; &cq->list != &cd->queue; cq = list_entry(cq->list.next, struct cache_queue, list)) if (!cq->reader) { container_of(cq, struct cache_request, q) ->readers--; break; } rp->offset = 0; } list_del(&rp->q.list); spin_unlock(&queue_lock); filp->private_data = NULL; kfree(rp); } if (filp->f_mode & FMODE_WRITE) { atomic_dec(&cd->writers); cd->last_close = seconds_since_boot(); } module_put(cd->owner); return 0; } static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch) { struct cache_queue *cq, *tmp; struct cache_request *cr; struct list_head dequeued; INIT_LIST_HEAD(&dequeued); spin_lock(&queue_lock); list_for_each_entry_safe(cq, tmp, &detail->queue, list) if (!cq->reader) { cr = container_of(cq, struct cache_request, q); if (cr->item != ch) continue; if (test_bit(CACHE_PENDING, &ch->flags)) /* Lost a race and it is pending again */ break; if (cr->readers != 0) continue; list_move(&cr->q.list, &dequeued); } spin_unlock(&queue_lock); while (!list_empty(&dequeued)) { cr = list_entry(dequeued.next, struct cache_request, q.list); list_del(&cr->q.list); cache_put(cr->item, detail); kfree(cr->buf); kfree(cr); } } /* * Support routines for text-based upcalls. * Fields are separated by spaces. * Fields are either mangled to quote space tab newline slosh with slosh * or a hexified with a leading \x * Record is terminated with newline. * */ void qword_add(char **bpp, int *lp, char *str) { char *bp = *bpp; int len = *lp; int ret; if (len < 0) return; ret = string_escape_str(str, bp, len, ESCAPE_OCTAL, "\\ \n\t"); if (ret >= len) { bp += len; len = -1; } else { bp += ret; len -= ret; *bp++ = ' '; len--; } *bpp = bp; *lp = len; } EXPORT_SYMBOL_GPL(qword_add); void qword_addhex(char **bpp, int *lp, char *buf, int blen) { char *bp = *bpp; int len = *lp; if (len < 0) return; if (len > 2) { *bp++ = '\\'; *bp++ = 'x'; len -= 2; while (blen && len >= 2) { bp = hex_byte_pack(bp, *buf++); len -= 2; blen--; } } if (blen || len<1) len = -1; else { *bp++ = ' '; len--; } *bpp = bp; *lp = len; } EXPORT_SYMBOL_GPL(qword_addhex); static void warn_no_listener(struct cache_detail *detail) { if (detail->last_warn != detail->last_close) { detail->last_warn = detail->last_close; if (detail->warn_no_listener) detail->warn_no_listener(detail, detail->last_close != 0); } } static bool cache_listeners_exist(struct cache_detail *detail) { if (atomic_read(&detail->writers)) return true; if (detail->last_close == 0) /* This cache was never opened */ return false; if (detail->last_close < seconds_since_boot() - 30) /* * We allow for the possibility that someone might * restart a userspace daemon without restarting the * server; but after 30 seconds, we give up. */ return false; return true; } /* * register an upcall request to user-space and queue it up for read() by the * upcall daemon. * * Each request is at most one page long. */ static int cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) { char *buf; struct cache_request *crq; int ret = 0; if (test_bit(CACHE_CLEANED, &h->flags)) /* Too late to make an upcall */ return -EAGAIN; buf = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!buf) return -EAGAIN; crq = kmalloc(sizeof (*crq), GFP_KERNEL); if (!crq) { kfree(buf); return -EAGAIN; } crq->q.reader = 0; crq->buf = buf; crq->len = 0; crq->readers = 0; spin_lock(&queue_lock); if (test_bit(CACHE_PENDING, &h->flags)) { crq->item = cache_get(h); list_add_tail(&crq->q.list, &detail->queue); trace_cache_entry_upcall(detail, h); } else /* Lost a race, no longer PENDING, so don't enqueue */ ret = -EAGAIN; spin_unlock(&queue_lock); wake_up(&queue_wait); if (ret == -EAGAIN) { kfree(buf); kfree(crq); } return ret; } int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) { if (test_and_set_bit(CACHE_PENDING, &h->flags)) return 0; return cache_pipe_upcall(detail, h); } EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall); int sunrpc_cache_pipe_upcall_timeout(struct cache_detail *detail, struct cache_head *h) { if (!cache_listeners_exist(detail)) { warn_no_listener(detail); trace_cache_entry_no_listener(detail, h); return -EINVAL; } return sunrpc_cache_pipe_upcall(detail, h); } EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall_timeout); /* * parse a message from user-space and pass it * to an appropriate cache * Messages are, like requests, separated into fields by * spaces and dequotes as \xHEXSTRING or embedded \nnn octal * * Message is * reply cachename expiry key ... content.... * * key and content are both parsed by cache */ int qword_get(char **bpp, char *dest, int bufsize) { /* return bytes copied, or -1 on error */ char *bp = *bpp; int len = 0; while (*bp == ' ') bp++; if (bp[0] == '\\' && bp[1] == 'x') { /* HEX STRING */ bp += 2; while (len < bufsize - 1) { int h, l; h = hex_to_bin(bp[0]); if (h < 0) break; l = hex_to_bin(bp[1]); if (l < 0) break; *dest++ = (h << 4) | l; bp += 2; len++; } } else { /* text with \nnn octal quoting */ while (*bp != ' ' && *bp != '\n' && *bp && len < bufsize-1) { if (*bp == '\\' && isodigit(bp[1]) && (bp[1] <= '3') && isodigit(bp[2]) && isodigit(bp[3])) { int byte = (*++bp -'0'); bp++; byte = (byte << 3) | (*bp++ - '0'); byte = (byte << 3) | (*bp++ - '0'); *dest++ = byte; len++; } else { *dest++ = *bp++; len++; } } } if (*bp != ' ' && *bp != '\n' && *bp != '\0') return -1; while (*bp == ' ') bp++; *bpp = bp; *dest = '\0'; return len; } EXPORT_SYMBOL_GPL(qword_get); /* * support /proc/net/rpc/$CACHENAME/content * as a seqfile. * We call ->cache_show passing NULL for the item to * get a header, then pass each real item in the cache */ static void *__cache_seq_start(struct seq_file *m, loff_t *pos) { loff_t n = *pos; unsigned int hash, entry; struct cache_head *ch; struct cache_detail *cd = m->private; if (!n--) return SEQ_START_TOKEN; hash = n >> 32; entry = n & ((1LL<<32) - 1); hlist_for_each_entry_rcu(ch, &cd->hash_table[hash], cache_list) if (!entry--) return ch; n &= ~((1LL<<32) - 1); do { hash++; n += 1LL<<32; } while(hash < cd->hash_size && hlist_empty(&cd->hash_table[hash])); if (hash >= cd->hash_size) return NULL; *pos = n+1; return hlist_entry_safe(rcu_dereference_raw( hlist_first_rcu(&cd->hash_table[hash])), struct cache_head, cache_list); } static void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos) { struct cache_head *ch = p; int hash = (*pos >> 32); struct cache_detail *cd = m->private; if (p == SEQ_START_TOKEN) hash = 0; else if (ch->cache_list.next == NULL) { hash++; *pos += 1LL<<32; } else { ++*pos; return hlist_entry_safe(rcu_dereference_raw( hlist_next_rcu(&ch->cache_list)), struct cache_head, cache_list); } *pos &= ~((1LL<<32) - 1); while (hash < cd->hash_size && hlist_empty(&cd->hash_table[hash])) { hash++; *pos += 1LL<<32; } if (hash >= cd->hash_size) return NULL; ++*pos; return hlist_entry_safe(rcu_dereference_raw( hlist_first_rcu(&cd->hash_table[hash])), struct cache_head, cache_list); } void *cache_seq_start_rcu(struct seq_file *m, loff_t *pos) __acquires(RCU) { rcu_read_lock(); return __cache_seq_start(m, pos); } EXPORT_SYMBOL_GPL(cache_seq_start_rcu); void *cache_seq_next_rcu(struct seq_file *file, void *p, loff_t *pos) { return cache_seq_next(file, p, pos); } EXPORT_SYMBOL_GPL(cache_seq_next_rcu); void cache_seq_stop_rcu(struct seq_file *m, void *p) __releases(RCU) { rcu_read_unlock(); } EXPORT_SYMBOL_GPL(cache_seq_stop_rcu); static int c_show(struct seq_file *m, void *p) { struct cache_head *cp = p; struct cache_detail *cd = m->private; if (p == SEQ_START_TOKEN) return cd->cache_show(m, cd, NULL); ifdebug(CACHE) seq_printf(m, "# expiry=%lld refcnt=%d flags=%lx\n", convert_to_wallclock(cp->expiry_time), kref_read(&cp->ref), cp->flags); cache_get(cp); if (cache_check(cd, cp, NULL)) /* cache_check does a cache_put on failure */ seq_puts(m, "# "); else { if (cache_is_expired(cd, cp)) seq_puts(m, "# "); cache_put(cp, cd); } return cd->cache_show(m, cd, cp); } static const struct seq_operations cache_content_op = { .start = cache_seq_start_rcu, .next = cache_seq_next_rcu, .stop = cache_seq_stop_rcu, .show = c_show, }; static int content_open(struct inode *inode, struct file *file, struct cache_detail *cd) { struct seq_file *seq; int err; if (!cd || !try_module_get(cd->owner)) return -EACCES; err = seq_open(file, &cache_content_op); if (err) { module_put(cd->owner); return err; } seq = file->private_data; seq->private = cd; return 0; } static int content_release(struct inode *inode, struct file *file, struct cache_detail *cd) { int ret = seq_release(inode, file); module_put(cd->owner); return ret; } static int open_flush(struct inode *inode, struct file *file, struct cache_detail *cd) { if (!cd || !try_module_get(cd->owner)) return -EACCES; return nonseekable_open(inode, file); } static int release_flush(struct inode *inode, struct file *file, struct cache_detail *cd) { module_put(cd->owner); return 0; } static ssize_t read_flush(struct file *file, char __user *buf, size_t count, loff_t *ppos, struct cache_detail *cd) { char tbuf[22]; size_t len; len = snprintf(tbuf, sizeof(tbuf), "%llu\n", convert_to_wallclock(cd->flush_time)); return simple_read_from_buffer(buf, count, ppos, tbuf, len); } static ssize_t write_flush(struct file *file, const char __user *buf, size_t count, loff_t *ppos, struct cache_detail *cd) { char tbuf[20]; char *ep; time64_t now; if (*ppos || count > sizeof(tbuf)-1) return -EINVAL; if (copy_from_user(tbuf, buf, count)) return -EFAULT; tbuf[count] = 0; simple_strtoul(tbuf, &ep, 0); if (*ep && *ep != '\n') return -EINVAL; /* Note that while we check that 'buf' holds a valid number, * we always ignore the value and just flush everything. * Making use of the number leads to races. */ now = seconds_since_boot(); /* Always flush everything, so behave like cache_purge() * Do this by advancing flush_time to the current time, * or by one second if it has already reached the current time. * Newly added cache entries will always have ->last_refresh greater * that ->flush_time, so they don't get flushed prematurely. */ if (cd->flush_time >= now) now = cd->flush_time + 1; cd->flush_time = now; cd->nextcheck = now; cache_flush(); if (cd->flush) cd->flush(); *ppos += count; return count; } static ssize_t cache_read_procfs(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { struct cache_detail *cd = pde_data(file_inode(filp)); return cache_read(filp, buf, count, ppos, cd); } static ssize_t cache_write_procfs(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { struct cache_detail *cd = pde_data(file_inode(filp)); return cache_write(filp, buf, count, ppos, cd); } static __poll_t cache_poll_procfs(struct file *filp, poll_table *wait) { struct cache_detail *cd = pde_data(file_inode(filp)); return cache_poll(filp, wait, cd); } static long cache_ioctl_procfs(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); struct cache_detail *cd = pde_data(inode); return cache_ioctl(inode, filp, cmd, arg, cd); } static int cache_open_procfs(struct inode *inode, struct file *filp) { struct cache_detail *cd = pde_data(inode); return cache_open(inode, filp, cd); } static int cache_release_procfs(struct inode *inode, struct file *filp) { struct cache_detail *cd = pde_data(inode); return cache_release(inode, filp, cd); } static const struct proc_ops cache_channel_proc_ops = { .proc_lseek = no_llseek, .proc_read = cache_read_procfs, .proc_write = cache_write_procfs, .proc_poll = cache_poll_procfs, .proc_ioctl = cache_ioctl_procfs, /* for FIONREAD */ .proc_open = cache_open_procfs, .proc_release = cache_release_procfs, }; static int content_open_procfs(struct inode *inode, struct file *filp) { struct cache_detail *cd = pde_data(inode); return content_open(inode, filp, cd); } static int content_release_procfs(struct inode *inode, struct file *filp) { struct cache_detail *cd = pde_data(inode); return content_release(inode, filp, cd); } static const struct proc_ops content_proc_ops = { .proc_open = content_open_procfs, .proc_read = seq_read, .proc_lseek = seq_lseek, .proc_release = content_release_procfs, }; static int open_flush_procfs(struct inode *inode, struct file *filp) { struct cache_detail *cd = pde_data(inode); return open_flush(inode, filp, cd); } static int release_flush_procfs(struct inode *inode, struct file *filp) { struct cache_detail *cd = pde_data(inode); return release_flush(inode, filp, cd); } static ssize_t read_flush_procfs(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { struct cache_detail *cd = pde_data(file_inode(filp)); return read_flush(filp, buf, count, ppos, cd); } static ssize_t write_flush_procfs(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { struct cache_detail *cd = pde_data(file_inode(filp)); return write_flush(filp, buf, count, ppos, cd); } static const struct proc_ops cache_flush_proc_ops = { .proc_open = open_flush_procfs, .proc_read = read_flush_procfs, .proc_write = write_flush_procfs, .proc_release = release_flush_procfs, .proc_lseek = no_llseek, }; static void remove_cache_proc_entries(struct cache_detail *cd) { if (cd->procfs) { proc_remove(cd->procfs); cd->procfs = NULL; } } #ifdef CONFIG_PROC_FS static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) { struct proc_dir_entry *p; struct sunrpc_net *sn; sn = net_generic(net, sunrpc_net_id); cd->procfs = proc_mkdir(cd->name, sn->proc_net_rpc); if (cd->procfs == NULL) goto out_nomem; p = proc_create_data("flush", S_IFREG | 0600, cd->procfs, &cache_flush_proc_ops, cd); if (p == NULL) goto out_nomem; if (cd->cache_request || cd->cache_parse) { p = proc_create_data("channel", S_IFREG | 0600, cd->procfs, &cache_channel_proc_ops, cd); if (p == NULL) goto out_nomem; } if (cd->cache_show) { p = proc_create_data("content", S_IFREG | 0400, cd->procfs, &content_proc_ops, cd); if (p == NULL) goto out_nomem; } return 0; out_nomem: remove_cache_proc_entries(cd); return -ENOMEM; } #else /* CONFIG_PROC_FS */ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) { return 0; } #endif void __init cache_initialize(void) { INIT_DEFERRABLE_WORK(&cache_cleaner, do_cache_clean); } int cache_register_net(struct cache_detail *cd, struct net *net) { int ret; sunrpc_init_cache_detail(cd); ret = create_cache_proc_entries(cd, net); if (ret) sunrpc_destroy_cache_detail(cd); return ret; } EXPORT_SYMBOL_GPL(cache_register_net); void cache_unregister_net(struct cache_detail *cd, struct net *net) { remove_cache_proc_entries(cd); sunrpc_destroy_cache_detail(cd); } EXPORT_SYMBOL_GPL(cache_unregister_net); struct cache_detail *cache_create_net(const struct cache_detail *tmpl, struct net *net) { struct cache_detail *cd; int i; cd = kmemdup(tmpl, sizeof(struct cache_detail), GFP_KERNEL); if (cd == NULL) return ERR_PTR(-ENOMEM); cd->hash_table = kcalloc(cd->hash_size, sizeof(struct hlist_head), GFP_KERNEL); if (cd->hash_table == NULL) { kfree(cd); return ERR_PTR(-ENOMEM); } for (i = 0; i < cd->hash_size; i++) INIT_HLIST_HEAD(&cd->hash_table[i]); cd->net = net; return cd; } EXPORT_SYMBOL_GPL(cache_create_net); void cache_destroy_net(struct cache_detail *cd, struct net *net) { kfree(cd->hash_table); kfree(cd); } EXPORT_SYMBOL_GPL(cache_destroy_net); static ssize_t cache_read_pipefs(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { struct cache_detail *cd = RPC_I(file_inode(filp))->private; return cache_read(filp, buf, count, ppos, cd); } static ssize_t cache_write_pipefs(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { struct cache_detail *cd = RPC_I(file_inode(filp))->private; return cache_write(filp, buf, count, ppos, cd); } static __poll_t cache_poll_pipefs(struct file *filp, poll_table *wait) { struct cache_detail *cd = RPC_I(file_inode(filp))->private; return cache_poll(filp, wait, cd); } static long cache_ioctl_pipefs(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); struct cache_detail *cd = RPC_I(inode)->private; return cache_ioctl(inode, filp, cmd, arg, cd); } static int cache_open_pipefs(struct inode *inode, struct file *filp) { struct cache_detail *cd = RPC_I(inode)->private; return cache_open(inode, filp, cd); } static int cache_release_pipefs(struct inode *inode, struct file *filp) { struct cache_detail *cd = RPC_I(inode)->private; return cache_release(inode, filp, cd); } const struct file_operations cache_file_operations_pipefs = { .owner = THIS_MODULE, .llseek = no_llseek, .read = cache_read_pipefs, .write = cache_write_pipefs, .poll = cache_poll_pipefs, .unlocked_ioctl = cache_ioctl_pipefs, /* for FIONREAD */ .open = cache_open_pipefs, .release = cache_release_pipefs, }; static int content_open_pipefs(struct inode *inode, struct file *filp) { struct cache_detail *cd = RPC_I(inode)->private; return content_open(inode, filp, cd); } static int content_release_pipefs(struct inode *inode, struct file *filp) { struct cache_detail *cd = RPC_I(inode)->private; return content_release(inode, filp, cd); } const struct file_operations content_file_operations_pipefs = { .open = content_open_pipefs, .read = seq_read, .llseek = seq_lseek, .release = content_release_pipefs, }; static int open_flush_pipefs(struct inode *inode, struct file *filp) { struct cache_detail *cd = RPC_I(inode)->private; return open_flush(inode, filp, cd); } static int release_flush_pipefs(struct inode *inode, struct file *filp) { struct cache_detail *cd = RPC_I(inode)->private; return release_flush(inode, filp, cd); } static ssize_t read_flush_pipefs(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { struct cache_detail *cd = RPC_I(file_inode(filp))->private; return read_flush(filp, buf, count, ppos, cd); } static ssize_t write_flush_pipefs(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { struct cache_detail *cd = RPC_I(file_inode(filp))->private; return write_flush(filp, buf, count, ppos, cd); } const struct file_operations cache_flush_operations_pipefs = { .open = open_flush_pipefs, .read = read_flush_pipefs, .write = write_flush_pipefs, .release = release_flush_pipefs, .llseek = no_llseek, }; int sunrpc_cache_register_pipefs(struct dentry *parent, const char *name, umode_t umode, struct cache_detail *cd) { struct dentry *dir = rpc_create_cache_dir(parent, name, umode, cd); if (IS_ERR(dir)) return PTR_ERR(dir); cd->pipefs = dir; return 0; } EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs); void sunrpc_cache_unregister_pipefs(struct cache_detail *cd) { if (cd->pipefs) { rpc_remove_cache_dir(cd->pipefs); cd->pipefs = NULL; } } EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs); void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h) { spin_lock(&cd->hash_lock); if (!hlist_unhashed(&h->cache_list)){ sunrpc_begin_cache_remove_entry(h, cd); spin_unlock(&cd->hash_lock); sunrpc_end_cache_remove_entry(h, cd); } else spin_unlock(&cd->hash_lock); } EXPORT_SYMBOL_GPL(sunrpc_cache_unhash); |
| 20 155 138 19 8 17 9 1 8 1 155 118 20 81 80 69 119 6 69 118 86 118 69 69 69 69 119 119 119 118 117 71 4 69 69 69 69 118 119 1 1 81 81 81 81 81 81 81 83 81 1 80 80 4 163 68 68 65 68 119 119 119 99 119 2 119 118 78 78 77 78 78 78 77 62 78 6 6 1 1 1 1 1 1 1 1 1 1 6 1 6 1 8 2 2 2 2 2 2 2 2 8 2 6 8 1 1 1 1 8 8 1 1 177 179 2 179 177 177 180 180 179 72 20 20 201 23 23 23 19 19 4 2 4 4 4 182 198 199 252 224 254 250 253 12 11 7 11 9 11 12 163 21 163 21 1 7 100 98 1 78 76 76 58 78 77 1 77 78 78 78 74 77 3 83 77 77 77 76 77 73 83 3 192 192 192 198 249 102 99 99 255 253 2 253 1 252 1 3 253 258 1 1 1 1 253 248 248 192 250 203 248 3 118 99 118 252 152 106 153 255 258 253 5 250 252 250 152 119 252 119 99 152 258 250 77 77 76 78 77 78 77 77 78 75 82 83 82 81 11 83 81 83 80 83 3 2 2 2 3 2 1 2 1 1 1 1 1 1 4 4 3 1 2 2 2 2 2 2 4 4 3 2 3 2 2 3 2 3 14 14 5 14 1 9 4 1 2 5 2 3 2 7 79 79 3 3 22 79 76 75 253 2 253 255 100 100 275 267 276 274 7 267 252 11 3 276 10 9 9 8 7 10 12 6 12 10 3 7 5 1 6 1 6 6 1 6 6 6 6 5 1 5 5 1 1 6 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 | // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/act_api.c Packet action API. * * Author: Jamal Hadi Salim */ #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/kmod.h> #include <linux/err.h> #include <linux/module.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/sch_generic.h> #include <net/pkt_cls.h> #include <net/tc_act/tc_pedit.h> #include <net/act_api.h> #include <net/netlink.h> #include <net/flow_offload.h> #include <net/tc_wrapper.h> #ifdef CONFIG_INET DEFINE_STATIC_KEY_FALSE(tcf_frag_xmit_count); EXPORT_SYMBOL_GPL(tcf_frag_xmit_count); #endif int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)) { #ifdef CONFIG_INET if (static_branch_unlikely(&tcf_frag_xmit_count)) return sch_frag_xmit_hook(skb, xmit); #endif return xmit(skb); } EXPORT_SYMBOL_GPL(tcf_dev_queue_xmit); static void tcf_action_goto_chain_exec(const struct tc_action *a, struct tcf_result *res) { const struct tcf_chain *chain = rcu_dereference_bh(a->goto_chain); res->goto_tp = rcu_dereference_bh(chain->filter_chain); } static void tcf_free_cookie_rcu(struct rcu_head *p) { struct tc_cookie *cookie = container_of(p, struct tc_cookie, rcu); kfree(cookie->data); kfree(cookie); } static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie, struct tc_cookie *new_cookie) { struct tc_cookie *old; old = unrcu_pointer(xchg(old_cookie, RCU_INITIALIZER(new_cookie))); if (old) call_rcu(&old->rcu, tcf_free_cookie_rcu); } int tcf_action_check_ctrlact(int action, struct tcf_proto *tp, struct tcf_chain **newchain, struct netlink_ext_ack *extack) { int opcode = TC_ACT_EXT_OPCODE(action), ret = -EINVAL; u32 chain_index; if (!opcode) ret = action > TC_ACT_VALUE_MAX ? -EINVAL : 0; else if (opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC) ret = 0; if (ret) { NL_SET_ERR_MSG(extack, "invalid control action"); goto end; } if (TC_ACT_EXT_CMP(action, TC_ACT_GOTO_CHAIN)) { chain_index = action & TC_ACT_EXT_VAL_MASK; if (!tp || !newchain) { ret = -EINVAL; NL_SET_ERR_MSG(extack, "can't goto NULL proto/chain"); goto end; } *newchain = tcf_chain_get_by_act(tp->chain->block, chain_index); if (!*newchain) { ret = -ENOMEM; NL_SET_ERR_MSG(extack, "can't allocate goto_chain"); } } end: return ret; } EXPORT_SYMBOL(tcf_action_check_ctrlact); struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action, struct tcf_chain *goto_chain) { a->tcfa_action = action; goto_chain = rcu_replace_pointer(a->goto_chain, goto_chain, 1); return goto_chain; } EXPORT_SYMBOL(tcf_action_set_ctrlact); /* XXX: For standalone actions, we don't need a RCU grace period either, because * actions are always connected to filters and filters are already destroyed in * RCU callbacks, so after a RCU grace period actions are already disconnected * from filters. Readers later can not find us. */ static void free_tcf(struct tc_action *p) { struct tcf_chain *chain = rcu_dereference_protected(p->goto_chain, 1); free_percpu(p->cpu_bstats); free_percpu(p->cpu_bstats_hw); free_percpu(p->cpu_qstats); tcf_set_action_cookie(&p->user_cookie, NULL); if (chain) tcf_chain_put_by_act(chain); kfree(p); } static void offload_action_hw_count_set(struct tc_action *act, u32 hw_count) { act->in_hw_count = hw_count; } static void offload_action_hw_count_inc(struct tc_action *act, u32 hw_count) { act->in_hw_count += hw_count; } static void offload_action_hw_count_dec(struct tc_action *act, u32 hw_count) { act->in_hw_count = act->in_hw_count > hw_count ? act->in_hw_count - hw_count : 0; } static unsigned int tcf_offload_act_num_actions_single(struct tc_action *act) { if (is_tcf_pedit(act)) return tcf_pedit_nkeys(act); else return 1; } static bool tc_act_skip_hw(u32 flags) { return (flags & TCA_ACT_FLAGS_SKIP_HW) ? true : false; } static bool tc_act_skip_sw(u32 flags) { return (flags & TCA_ACT_FLAGS_SKIP_SW) ? true : false; } /* SKIP_HW and SKIP_SW are mutually exclusive flags. */ static bool tc_act_flags_valid(u32 flags) { flags &= TCA_ACT_FLAGS_SKIP_HW | TCA_ACT_FLAGS_SKIP_SW; return flags ^ (TCA_ACT_FLAGS_SKIP_HW | TCA_ACT_FLAGS_SKIP_SW); } static int offload_action_init(struct flow_offload_action *fl_action, struct tc_action *act, enum offload_act_command cmd, struct netlink_ext_ack *extack) { int err; fl_action->extack = extack; fl_action->command = cmd; fl_action->index = act->tcfa_index; fl_action->cookie = (unsigned long)act; if (act->ops->offload_act_setup) { spin_lock_bh(&act->tcfa_lock); err = act->ops->offload_act_setup(act, fl_action, NULL, false, extack); spin_unlock_bh(&act->tcfa_lock); return err; } return -EOPNOTSUPP; } static int tcf_action_offload_cmd_ex(struct flow_offload_action *fl_act, u32 *hw_count) { int err; err = flow_indr_dev_setup_offload(NULL, NULL, TC_SETUP_ACT, fl_act, NULL, NULL); if (err < 0) return err; if (hw_count) *hw_count = err; return 0; } static int tcf_action_offload_cmd_cb_ex(struct flow_offload_action *fl_act, u32 *hw_count, flow_indr_block_bind_cb_t *cb, void *cb_priv) { int err; err = cb(NULL, NULL, cb_priv, TC_SETUP_ACT, NULL, fl_act, NULL); if (err < 0) return err; if (hw_count) *hw_count = 1; return 0; } static int tcf_action_offload_cmd(struct flow_offload_action *fl_act, u32 *hw_count, flow_indr_block_bind_cb_t *cb, void *cb_priv) { return cb ? tcf_action_offload_cmd_cb_ex(fl_act, hw_count, cb, cb_priv) : tcf_action_offload_cmd_ex(fl_act, hw_count); } static int tcf_action_offload_add_ex(struct tc_action *action, struct netlink_ext_ack *extack, flow_indr_block_bind_cb_t *cb, void *cb_priv) { bool skip_sw = tc_act_skip_sw(action->tcfa_flags); struct tc_action *actions[TCA_ACT_MAX_PRIO] = { [0] = action, }; struct flow_offload_action *fl_action; u32 in_hw_count = 0; int num, err = 0; if (tc_act_skip_hw(action->tcfa_flags)) return 0; num = tcf_offload_act_num_actions_single(action); fl_action = offload_action_alloc(num); if (!fl_action) return -ENOMEM; err = offload_action_init(fl_action, action, FLOW_ACT_REPLACE, extack); if (err) goto fl_err; err = tc_setup_action(&fl_action->action, actions, 0, extack); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed to setup tc actions for offload"); goto fl_err; } err = tcf_action_offload_cmd(fl_action, &in_hw_count, cb, cb_priv); if (!err) cb ? offload_action_hw_count_inc(action, in_hw_count) : offload_action_hw_count_set(action, in_hw_count); if (skip_sw && !tc_act_in_hw(action)) err = -EINVAL; tc_cleanup_offload_action(&fl_action->action); fl_err: kfree(fl_action); return err; } /* offload the tc action after it is inserted */ static int tcf_action_offload_add(struct tc_action *action, struct netlink_ext_ack *extack) { return tcf_action_offload_add_ex(action, extack, NULL, NULL); } int tcf_action_update_hw_stats(struct tc_action *action) { struct flow_offload_action fl_act = {}; int err; err = offload_action_init(&fl_act, action, FLOW_ACT_STATS, NULL); if (err) return err; err = tcf_action_offload_cmd(&fl_act, NULL, NULL, NULL); if (!err) { preempt_disable(); tcf_action_stats_update(action, fl_act.stats.bytes, fl_act.stats.pkts, fl_act.stats.drops, fl_act.stats.lastused, true); preempt_enable(); action->used_hw_stats = fl_act.stats.used_hw_stats; action->used_hw_stats_valid = true; } else { return -EOPNOTSUPP; } return 0; } EXPORT_SYMBOL(tcf_action_update_hw_stats); static int tcf_action_offload_del_ex(struct tc_action *action, flow_indr_block_bind_cb_t *cb, void *cb_priv) { struct flow_offload_action fl_act = {}; u32 in_hw_count = 0; int err = 0; if (!tc_act_in_hw(action)) return 0; err = offload_action_init(&fl_act, action, FLOW_ACT_DESTROY, NULL); if (err) return err; err = tcf_action_offload_cmd(&fl_act, &in_hw_count, cb, cb_priv); if (err < 0) return err; if (!cb && action->in_hw_count != in_hw_count) return -EINVAL; /* do not need to update hw state when deleting action */ if (cb && in_hw_count) offload_action_hw_count_dec(action, in_hw_count); return 0; } static int tcf_action_offload_del(struct tc_action *action) { return tcf_action_offload_del_ex(action, NULL, NULL); } static void tcf_action_cleanup(struct tc_action *p) { tcf_action_offload_del(p); if (p->ops->cleanup) p->ops->cleanup(p); gen_kill_estimator(&p->tcfa_rate_est); free_tcf(p); } static int __tcf_action_put(struct tc_action *p, bool bind) { struct tcf_idrinfo *idrinfo = p->idrinfo; if (refcount_dec_and_mutex_lock(&p->tcfa_refcnt, &idrinfo->lock)) { if (bind) atomic_dec(&p->tcfa_bindcnt); idr_remove(&idrinfo->action_idr, p->tcfa_index); mutex_unlock(&idrinfo->lock); tcf_action_cleanup(p); return 1; } if (bind) atomic_dec(&p->tcfa_bindcnt); return 0; } static int __tcf_idr_release(struct tc_action *p, bool bind, bool strict) { int ret = 0; /* Release with strict==1 and bind==0 is only called through act API * interface (classifiers always bind). Only case when action with * positive reference count and zero bind count can exist is when it was * also created with act API (unbinding last classifier will destroy the * action if it was created by classifier). So only case when bind count * can be changed after initial check is when unbound action is * destroyed by act API while classifier binds to action with same id * concurrently. This result either creation of new action(same behavior * as before), or reusing existing action if concurrent process * increments reference count before action is deleted. Both scenarios * are acceptable. */ if (p) { if (!bind && strict && atomic_read(&p->tcfa_bindcnt) > 0) return -EPERM; if (__tcf_action_put(p, bind)) ret = ACT_P_DELETED; } return ret; } int tcf_idr_release(struct tc_action *a, bool bind) { const struct tc_action_ops *ops = a->ops; int ret; ret = __tcf_idr_release(a, bind, false); if (ret == ACT_P_DELETED) module_put(ops->owner); return ret; } EXPORT_SYMBOL(tcf_idr_release); static size_t tcf_action_shared_attrs_size(const struct tc_action *act) { struct tc_cookie *user_cookie; u32 cookie_len = 0; rcu_read_lock(); user_cookie = rcu_dereference(act->user_cookie); if (user_cookie) cookie_len = nla_total_size(user_cookie->len); rcu_read_unlock(); return nla_total_size(0) /* action number nested */ + nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */ + cookie_len /* TCA_ACT_COOKIE */ + nla_total_size(sizeof(struct nla_bitfield32)) /* TCA_ACT_HW_STATS */ + nla_total_size(0) /* TCA_ACT_STATS nested */ + nla_total_size(sizeof(struct nla_bitfield32)) /* TCA_ACT_FLAGS */ /* TCA_STATS_BASIC */ + nla_total_size_64bit(sizeof(struct gnet_stats_basic)) /* TCA_STATS_PKT64 */ + nla_total_size_64bit(sizeof(u64)) /* TCA_STATS_QUEUE */ + nla_total_size_64bit(sizeof(struct gnet_stats_queue)) + nla_total_size(0) /* TCA_ACT_OPTIONS nested */ + nla_total_size(sizeof(struct tcf_t)); /* TCA_GACT_TM */ } static size_t tcf_action_full_attrs_size(size_t sz) { return NLMSG_HDRLEN /* struct nlmsghdr */ + sizeof(struct tcamsg) + nla_total_size(0) /* TCA_ACT_TAB nested */ + sz; } static size_t tcf_action_fill_size(const struct tc_action *act) { size_t sz = tcf_action_shared_attrs_size(act); if (act->ops->get_fill_size) return act->ops->get_fill_size(act) + sz; return sz; } static int tcf_action_dump_terse(struct sk_buff *skb, struct tc_action *a, bool from_act) { unsigned char *b = skb_tail_pointer(skb); struct tc_cookie *cookie; if (nla_put_string(skb, TCA_ACT_KIND, a->ops->kind)) goto nla_put_failure; if (tcf_action_copy_stats(skb, a, 0)) goto nla_put_failure; if (from_act && nla_put_u32(skb, TCA_ACT_INDEX, a->tcfa_index)) goto nla_put_failure; rcu_read_lock(); cookie = rcu_dereference(a->user_cookie); if (cookie) { if (nla_put(skb, TCA_ACT_COOKIE, cookie->len, cookie->data)) { rcu_read_unlock(); goto nla_put_failure; } } rcu_read_unlock(); return 0; nla_put_failure: nlmsg_trim(skb, b); return -1; } static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, struct netlink_callback *cb) { int err = 0, index = -1, s_i = 0, n_i = 0; u32 act_flags = cb->args[2]; unsigned long jiffy_since = cb->args[3]; struct nlattr *nest; struct idr *idr = &idrinfo->action_idr; struct tc_action *p; unsigned long id = 1; unsigned long tmp; mutex_lock(&idrinfo->lock); s_i = cb->args[0]; idr_for_each_entry_ul(idr, p, tmp, id) { index++; if (index < s_i) continue; if (IS_ERR(p)) continue; if (jiffy_since && time_after(jiffy_since, (unsigned long)p->tcfa_tm.lastuse)) continue; tcf_action_update_hw_stats(p); nest = nla_nest_start_noflag(skb, n_i); if (!nest) { index--; goto nla_put_failure; } err = (act_flags & TCA_ACT_FLAG_TERSE_DUMP) ? tcf_action_dump_terse(skb, p, true) : tcf_action_dump_1(skb, p, 0, 0); if (err < 0) { index--; nlmsg_trim(skb, nest); goto done; } nla_nest_end(skb, nest); n_i++; if (!(act_flags & TCA_ACT_FLAG_LARGE_DUMP_ON) && n_i >= TCA_ACT_MAX_PRIO) goto done; } done: if (index >= 0) cb->args[0] = index + 1; mutex_unlock(&idrinfo->lock); if (n_i) { if (act_flags & TCA_ACT_FLAG_LARGE_DUMP_ON) cb->args[1] = n_i; } return n_i; nla_put_failure: nla_nest_cancel(skb, nest); goto done; } static int tcf_idr_release_unsafe(struct tc_action *p) { if (atomic_read(&p->tcfa_bindcnt) > 0) return -EPERM; if (refcount_dec_and_test(&p->tcfa_refcnt)) { idr_remove(&p->idrinfo->action_idr, p->tcfa_index); tcf_action_cleanup(p); return ACT_P_DELETED; } return 0; } static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { struct nlattr *nest; int n_i = 0; int ret = -EINVAL; struct idr *idr = &idrinfo->action_idr; struct tc_action *p; unsigned long id = 1; unsigned long tmp; nest = nla_nest_start_noflag(skb, 0); if (nest == NULL) goto nla_put_failure; if (nla_put_string(skb, TCA_ACT_KIND, ops->kind)) goto nla_put_failure; ret = 0; mutex_lock(&idrinfo->lock); idr_for_each_entry_ul(idr, p, tmp, id) { if (IS_ERR(p)) continue; ret = tcf_idr_release_unsafe(p); if (ret == ACT_P_DELETED) module_put(ops->owner); else if (ret < 0) break; n_i++; } mutex_unlock(&idrinfo->lock); if (ret < 0) { if (n_i) NL_SET_ERR_MSG(extack, "Unable to flush all TC actions"); else goto nla_put_failure; } ret = nla_put_u32(skb, TCA_FCNT, n_i); if (ret) goto nla_put_failure; nla_nest_end(skb, nest); return n_i; nla_put_failure: nla_nest_cancel(skb, nest); return ret; } int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct netlink_callback *cb, int type, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { struct tcf_idrinfo *idrinfo = tn->idrinfo; if (type == RTM_DELACTION) { return tcf_del_walker(idrinfo, skb, ops, extack); } else if (type == RTM_GETACTION) { return tcf_dump_walker(idrinfo, skb, cb); } else { WARN(1, "tcf_generic_walker: unknown command %d\n", type); NL_SET_ERR_MSG(extack, "tcf_generic_walker: unknown command"); return -EINVAL; } } EXPORT_SYMBOL(tcf_generic_walker); int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index) { struct tcf_idrinfo *idrinfo = tn->idrinfo; struct tc_action *p; mutex_lock(&idrinfo->lock); p = idr_find(&idrinfo->action_idr, index); if (IS_ERR(p)) p = NULL; else if (p) refcount_inc(&p->tcfa_refcnt); mutex_unlock(&idrinfo->lock); if (p) { *a = p; return true; } return false; } EXPORT_SYMBOL(tcf_idr_search); static int __tcf_generic_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ops->net_id); if (unlikely(ops->walk)) return ops->walk(net, skb, cb, type, ops, extack); return tcf_generic_walker(tn, skb, cb, type, ops, extack); } static int __tcf_idr_search(struct net *net, const struct tc_action_ops *ops, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, ops->net_id); if (unlikely(ops->lookup)) return ops->lookup(net, a, index); return tcf_idr_search(tn, a, index); } static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index) { struct tc_action *p; int ret = 0; mutex_lock(&idrinfo->lock); p = idr_find(&idrinfo->action_idr, index); if (!p) { mutex_unlock(&idrinfo->lock); return -ENOENT; } if (!atomic_read(&p->tcfa_bindcnt)) { if (refcount_dec_and_test(&p->tcfa_refcnt)) { struct module *owner = p->ops->owner; WARN_ON(p != idr_remove(&idrinfo->action_idr, p->tcfa_index)); mutex_unlock(&idrinfo->lock); tcf_action_cleanup(p); module_put(owner); return 0; } ret = 0; } else { ret = -EPERM; } mutex_unlock(&idrinfo->lock); return ret; } int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, int bind, bool cpustats, u32 flags) { struct tc_action *p = kzalloc(ops->size, GFP_KERNEL); struct tcf_idrinfo *idrinfo = tn->idrinfo; int err = -ENOMEM; if (unlikely(!p)) return -ENOMEM; refcount_set(&p->tcfa_refcnt, 1); if (bind) atomic_set(&p->tcfa_bindcnt, 1); if (cpustats) { p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync); if (!p->cpu_bstats) goto err1; p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync); if (!p->cpu_bstats_hw) goto err2; p->cpu_qstats = alloc_percpu(struct gnet_stats_queue); if (!p->cpu_qstats) goto err3; } gnet_stats_basic_sync_init(&p->tcfa_bstats); gnet_stats_basic_sync_init(&p->tcfa_bstats_hw); spin_lock_init(&p->tcfa_lock); p->tcfa_index = index; p->tcfa_tm.install = jiffies; p->tcfa_tm.lastuse = jiffies; p->tcfa_tm.firstuse = 0; p->tcfa_flags = flags; if (est) { err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats, &p->tcfa_rate_est, &p->tcfa_lock, false, est); if (err) goto err4; } p->idrinfo = idrinfo; __module_get(ops->owner); p->ops = ops; *a = p; return 0; err4: free_percpu(p->cpu_qstats); err3: free_percpu(p->cpu_bstats_hw); err2: free_percpu(p->cpu_bstats); err1: kfree(p); return err; } EXPORT_SYMBOL(tcf_idr_create); int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, int bind, u32 flags) { /* Set cpustats according to actions flags. */ return tcf_idr_create(tn, index, est, a, ops, bind, !(flags & TCA_ACT_FLAGS_NO_PERCPU_STATS), flags); } EXPORT_SYMBOL(tcf_idr_create_from_flags); /* Cleanup idr index that was allocated but not initialized. */ void tcf_idr_cleanup(struct tc_action_net *tn, u32 index) { struct tcf_idrinfo *idrinfo = tn->idrinfo; mutex_lock(&idrinfo->lock); /* Remove ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */ WARN_ON(!IS_ERR(idr_remove(&idrinfo->action_idr, index))); mutex_unlock(&idrinfo->lock); } EXPORT_SYMBOL(tcf_idr_cleanup); /* Check if action with specified index exists. If actions is found, increments * its reference and bind counters, and return 1. Otherwise insert temporary * error pointer (to prevent concurrent users from inserting actions with same * index) and return 0. * * May return -EAGAIN for binding actions in case of a parallel add/delete on * the requested index. */ int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, struct tc_action **a, int bind) { struct tcf_idrinfo *idrinfo = tn->idrinfo; struct tc_action *p; int ret; u32 max; if (*index) { rcu_read_lock(); p = idr_find(&idrinfo->action_idr, *index); if (IS_ERR(p)) { /* This means that another process allocated * index but did not assign the pointer yet. */ rcu_read_unlock(); return -EAGAIN; } if (!p) { /* Empty slot, try to allocate it */ max = *index; rcu_read_unlock(); goto new; } if (!refcount_inc_not_zero(&p->tcfa_refcnt)) { /* Action was deleted in parallel */ rcu_read_unlock(); return -EAGAIN; } if (bind) atomic_inc(&p->tcfa_bindcnt); *a = p; rcu_read_unlock(); return 1; } else { /* Find a slot */ *index = 1; max = UINT_MAX; } new: *a = NULL; mutex_lock(&idrinfo->lock); ret = idr_alloc_u32(&idrinfo->action_idr, ERR_PTR(-EBUSY), index, max, GFP_KERNEL); mutex_unlock(&idrinfo->lock); /* N binds raced for action allocation, * retry for all the ones that failed. */ if (ret == -ENOSPC && *index == max) ret = -EAGAIN; return ret; } EXPORT_SYMBOL(tcf_idr_check_alloc); void tcf_idrinfo_destroy(const struct tc_action_ops *ops, struct tcf_idrinfo *idrinfo) { struct idr *idr = &idrinfo->action_idr; struct tc_action *p; int ret; unsigned long id = 1; unsigned long tmp; idr_for_each_entry_ul(idr, p, tmp, id) { ret = __tcf_idr_release(p, false, true); if (ret == ACT_P_DELETED) module_put(ops->owner); else if (ret < 0) return; } idr_destroy(&idrinfo->action_idr); } EXPORT_SYMBOL(tcf_idrinfo_destroy); static LIST_HEAD(act_base); static DEFINE_RWLOCK(act_mod_lock); /* since act ops id is stored in pernet subsystem list, * then there is no way to walk through only all the action * subsystem, so we keep tc action pernet ops id for * reoffload to walk through. */ static LIST_HEAD(act_pernet_id_list); static DEFINE_MUTEX(act_id_mutex); struct tc_act_pernet_id { struct list_head list; unsigned int id; }; static int tcf_pernet_add_id_list(unsigned int id) { struct tc_act_pernet_id *id_ptr; int ret = 0; mutex_lock(&act_id_mutex); list_for_each_entry(id_ptr, &act_pernet_id_list, list) { if (id_ptr->id == id) { ret = -EEXIST; goto err_out; } } id_ptr = kzalloc(sizeof(*id_ptr), GFP_KERNEL); if (!id_ptr) { ret = -ENOMEM; goto err_out; } id_ptr->id = id; list_add_tail(&id_ptr->list, &act_pernet_id_list); err_out: mutex_unlock(&act_id_mutex); return ret; } static void tcf_pernet_del_id_list(unsigned int id) { struct tc_act_pernet_id *id_ptr; mutex_lock(&act_id_mutex); list_for_each_entry(id_ptr, &act_pernet_id_list, list) { if (id_ptr->id == id) { list_del(&id_ptr->list); kfree(id_ptr); break; } } mutex_unlock(&act_id_mutex); } int tcf_register_action(struct tc_action_ops *act, struct pernet_operations *ops) { struct tc_action_ops *a; int ret; if (!act->act || !act->dump || !act->init) return -EINVAL; /* We have to register pernet ops before making the action ops visible, * otherwise tcf_action_init_1() could get a partially initialized * netns. */ ret = register_pernet_subsys(ops); if (ret) return ret; if (ops->id) { ret = tcf_pernet_add_id_list(*ops->id); if (ret) goto err_id; } write_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { if (act->id == a->id || (strcmp(act->kind, a->kind) == 0)) { ret = -EEXIST; goto err_out; } } list_add_tail(&act->head, &act_base); write_unlock(&act_mod_lock); return 0; err_out: write_unlock(&act_mod_lock); if (ops->id) tcf_pernet_del_id_list(*ops->id); err_id: unregister_pernet_subsys(ops); return ret; } EXPORT_SYMBOL(tcf_register_action); int tcf_unregister_action(struct tc_action_ops *act, struct pernet_operations *ops) { struct tc_action_ops *a; int err = -ENOENT; write_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { if (a == act) { list_del(&act->head); err = 0; break; } } write_unlock(&act_mod_lock); if (!err) { unregister_pernet_subsys(ops); if (ops->id) tcf_pernet_del_id_list(*ops->id); } return err; } EXPORT_SYMBOL(tcf_unregister_action); /* lookup by name */ static struct tc_action_ops *tc_lookup_action_n(char *kind) { struct tc_action_ops *a, *res = NULL; if (kind) { read_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { if (strcmp(kind, a->kind) == 0) { if (try_module_get(a->owner)) res = a; break; } } read_unlock(&act_mod_lock); } return res; } /* lookup by nlattr */ static struct tc_action_ops *tc_lookup_action(struct nlattr *kind) { struct tc_action_ops *a, *res = NULL; if (kind) { read_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { if (nla_strcmp(kind, a->kind) == 0) { if (try_module_get(a->owner)) res = a; break; } } read_unlock(&act_mod_lock); } return res; } /*TCA_ACT_MAX_PRIO is 32, there count up to 32 */ #define TCA_ACT_MAX_PRIO_MASK 0x1FF int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int nr_actions, struct tcf_result *res) { u32 jmp_prgcnt = 0; u32 jmp_ttl = TCA_ACT_MAX_PRIO; /*matches actions per filter */ int i; int ret = TC_ACT_OK; if (skb_skip_tc_classify(skb)) return TC_ACT_OK; restart_act_graph: for (i = 0; i < nr_actions; i++) { const struct tc_action *a = actions[i]; int repeat_ttl; if (jmp_prgcnt > 0) { jmp_prgcnt -= 1; continue; } if (tc_act_skip_sw(a->tcfa_flags)) continue; repeat_ttl = 32; repeat: ret = tc_act(skb, a, res); if (unlikely(ret == TC_ACT_REPEAT)) { if (--repeat_ttl != 0) goto repeat; /* suspicious opcode, stop pipeline */ net_warn_ratelimited("TC_ACT_REPEAT abuse ?\n"); return TC_ACT_OK; } if (TC_ACT_EXT_CMP(ret, TC_ACT_JUMP)) { jmp_prgcnt = ret & TCA_ACT_MAX_PRIO_MASK; if (!jmp_prgcnt || (jmp_prgcnt > nr_actions)) { /* faulty opcode, stop pipeline */ return TC_ACT_OK; } else { jmp_ttl -= 1; if (jmp_ttl > 0) goto restart_act_graph; else /* faulty graph, stop pipeline */ return TC_ACT_OK; } } else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) { if (unlikely(!rcu_access_pointer(a->goto_chain))) { tcf_set_drop_reason(skb, SKB_DROP_REASON_TC_CHAIN_NOTFOUND); return TC_ACT_SHOT; } tcf_action_goto_chain_exec(a, res); } if (ret != TC_ACT_PIPE) break; } return ret; } EXPORT_SYMBOL(tcf_action_exec); int tcf_action_destroy(struct tc_action *actions[], int bind) { const struct tc_action_ops *ops; struct tc_action *a; int ret = 0, i; tcf_act_for_each_action(i, a, actions) { actions[i] = NULL; ops = a->ops; ret = __tcf_idr_release(a, bind, true); if (ret == ACT_P_DELETED) module_put(ops->owner); else if (ret < 0) return ret; } return ret; } static int tcf_action_put(struct tc_action *p) { return __tcf_action_put(p, false); } static void tcf_action_put_many(struct tc_action *actions[]) { struct tc_action *a; int i; tcf_act_for_each_action(i, a, actions) { const struct tc_action_ops *ops = a->ops; if (tcf_action_put(a)) module_put(ops->owner); } } static void tca_put_bound_many(struct tc_action *actions[], int init_res[]) { struct tc_action *a; int i; tcf_act_for_each_action(i, a, actions) { const struct tc_action_ops *ops = a->ops; if (init_res[i] == ACT_P_CREATED) continue; if (tcf_action_put(a)) module_put(ops->owner); } } int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { return a->ops->dump(skb, a, bind, ref); } int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { int err = -EINVAL; unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; u32 flags; if (tcf_action_dump_terse(skb, a, false)) goto nla_put_failure; if (a->hw_stats != TCA_ACT_HW_STATS_ANY && nla_put_bitfield32(skb, TCA_ACT_HW_STATS, a->hw_stats, TCA_ACT_HW_STATS_ANY)) goto nla_put_failure; if (a->used_hw_stats_valid && nla_put_bitfield32(skb, TCA_ACT_USED_HW_STATS, a->used_hw_stats, TCA_ACT_HW_STATS_ANY)) goto nla_put_failure; flags = a->tcfa_flags & TCA_ACT_FLAGS_USER_MASK; if (flags && nla_put_bitfield32(skb, TCA_ACT_FLAGS, flags, flags)) goto nla_put_failure; if (nla_put_u32(skb, TCA_ACT_IN_HW_COUNT, a->in_hw_count)) goto nla_put_failure; nest = nla_nest_start_noflag(skb, TCA_ACT_OPTIONS); if (nest == NULL) goto nla_put_failure; err = tcf_action_dump_old(skb, a, bind, ref); if (err > 0) { nla_nest_end(skb, nest); return err; } nla_put_failure: nlmsg_trim(skb, b); return -1; } EXPORT_SYMBOL(tcf_action_dump_1); int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind, int ref, bool terse) { struct tc_action *a; int err = -EINVAL, i; struct nlattr *nest; tcf_act_for_each_action(i, a, actions) { nest = nla_nest_start_noflag(skb, i + 1); if (nest == NULL) goto nla_put_failure; err = terse ? tcf_action_dump_terse(skb, a, false) : tcf_action_dump_1(skb, a, bind, ref); if (err < 0) goto errout; nla_nest_end(skb, nest); } return 0; nla_put_failure: err = -EINVAL; errout: nla_nest_cancel(skb, nest); return err; } static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb) { struct tc_cookie *c = kzalloc(sizeof(*c), GFP_KERNEL); if (!c) return NULL; c->data = nla_memdup(tb[TCA_ACT_COOKIE], GFP_KERNEL); if (!c->data) { kfree(c); return NULL; } c->len = nla_len(tb[TCA_ACT_COOKIE]); return c; } static u8 tcf_action_hw_stats_get(struct nlattr *hw_stats_attr) { struct nla_bitfield32 hw_stats_bf; /* If the user did not pass the attr, that means he does * not care about the type. Return "any" in that case * which is setting on all supported types. */ if (!hw_stats_attr) return TCA_ACT_HW_STATS_ANY; hw_stats_bf = nla_get_bitfield32(hw_stats_attr); return hw_stats_bf.value; } static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = { [TCA_ACT_KIND] = { .type = NLA_STRING }, [TCA_ACT_INDEX] = { .type = NLA_U32 }, [TCA_ACT_COOKIE] = { .type = NLA_BINARY, .len = TC_COOKIE_MAX_SIZE }, [TCA_ACT_OPTIONS] = { .type = NLA_NESTED }, [TCA_ACT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_ACT_FLAGS_NO_PERCPU_STATS | TCA_ACT_FLAGS_SKIP_HW | TCA_ACT_FLAGS_SKIP_SW), [TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY), }; void tcf_idr_insert_many(struct tc_action *actions[], int init_res[]) { struct tc_action *a; int i; tcf_act_for_each_action(i, a, actions) { struct tcf_idrinfo *idrinfo; if (init_res[i] == ACT_P_BOUND) continue; idrinfo = a->idrinfo; mutex_lock(&idrinfo->lock); /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */ idr_replace(&idrinfo->action_idr, a, a->tcfa_index); mutex_unlock(&idrinfo->lock); } } struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, u32 flags, struct netlink_ext_ack *extack) { bool police = flags & TCA_ACT_FLAGS_POLICE; struct nlattr *tb[TCA_ACT_MAX + 1]; struct tc_action_ops *a_o; char act_name[IFNAMSIZ]; struct nlattr *kind; int err; if (!police) { err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, tcf_action_policy, extack); if (err < 0) return ERR_PTR(err); err = -EINVAL; kind = tb[TCA_ACT_KIND]; if (!kind) { NL_SET_ERR_MSG(extack, "TC action kind must be specified"); return ERR_PTR(err); } if (nla_strscpy(act_name, kind, IFNAMSIZ) < 0) { NL_SET_ERR_MSG(extack, "TC action name too long"); return ERR_PTR(err); } } else { if (strscpy(act_name, "police", IFNAMSIZ) < 0) { NL_SET_ERR_MSG(extack, "TC action name too long"); return ERR_PTR(-EINVAL); } } a_o = tc_lookup_action_n(act_name); if (a_o == NULL) { #ifdef CONFIG_MODULES bool rtnl_held = !(flags & TCA_ACT_FLAGS_NO_RTNL); if (rtnl_held) rtnl_unlock(); request_module(NET_ACT_ALIAS_PREFIX "%s", act_name); if (rtnl_held) rtnl_lock(); a_o = tc_lookup_action_n(act_name); /* We dropped the RTNL semaphore in order to * perform the module load. So, even if we * succeeded in loading the module we have to * tell the caller to replay the request. We * indicate this using -EAGAIN. */ if (a_o != NULL) { module_put(a_o->owner); return ERR_PTR(-EAGAIN); } #endif NL_SET_ERR_MSG(extack, "Failed to load TC action module"); return ERR_PTR(-ENOENT); } return a_o; } struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, struct tc_action_ops *a_o, int *init_res, u32 flags, struct netlink_ext_ack *extack) { bool police = flags & TCA_ACT_FLAGS_POLICE; struct nla_bitfield32 userflags = { 0, 0 }; struct tc_cookie *user_cookie = NULL; u8 hw_stats = TCA_ACT_HW_STATS_ANY; struct nlattr *tb[TCA_ACT_MAX + 1]; struct tc_action *a; int err; /* backward compatibility for policer */ if (!police) { err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, tcf_action_policy, extack); if (err < 0) return ERR_PTR(err); if (tb[TCA_ACT_COOKIE]) { user_cookie = nla_memdup_cookie(tb); if (!user_cookie) { NL_SET_ERR_MSG(extack, "No memory to generate TC cookie"); err = -ENOMEM; goto err_out; } } hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]); if (tb[TCA_ACT_FLAGS]) { userflags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]); if (!tc_act_flags_valid(userflags.value)) { err = -EINVAL; goto err_out; } } err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, tp, userflags.value | flags, extack); } else { err = a_o->init(net, nla, est, &a, tp, userflags.value | flags, extack); } if (err < 0) goto err_out; *init_res = err; if (!police && tb[TCA_ACT_COOKIE]) tcf_set_action_cookie(&a->user_cookie, user_cookie); if (!police) a->hw_stats = hw_stats; return a; err_out: if (user_cookie) { kfree(user_cookie->data); kfree(user_cookie); } return ERR_PTR(err); } static bool tc_act_bind(u32 flags) { return !!(flags & TCA_ACT_FLAGS_BIND); } /* Returns numbers of initialized actions or negative error. */ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, struct tc_action *actions[], int init_res[], size_t *attr_size, u32 flags, u32 fl_flags, struct netlink_ext_ack *extack) { struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {}; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; size_t sz = 0; int err; int i; err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack); if (err < 0) return err; for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { struct tc_action_ops *a_o; a_o = tc_action_load_ops(tb[i], flags, extack); if (IS_ERR(a_o)) { err = PTR_ERR(a_o); goto err_mod; } ops[i - 1] = a_o; } for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { act = tcf_action_init_1(net, tp, tb[i], est, ops[i - 1], &init_res[i - 1], flags, extack); if (IS_ERR(act)) { err = PTR_ERR(act); goto err; } sz += tcf_action_fill_size(act); /* Start from index 0 */ actions[i - 1] = act; if (tc_act_bind(flags)) { bool skip_sw = tc_skip_sw(fl_flags); bool skip_hw = tc_skip_hw(fl_flags); if (tc_act_bind(act->tcfa_flags)) continue; if (skip_sw != tc_act_skip_sw(act->tcfa_flags) || skip_hw != tc_act_skip_hw(act->tcfa_flags)) { NL_SET_ERR_MSG(extack, "Mismatch between action and filter offload flags"); err = -EINVAL; goto err; } } else { err = tcf_action_offload_add(act, extack); if (tc_act_skip_sw(act->tcfa_flags) && err) goto err; } } /* We have to commit them all together, because if any error happened in * between, we could not handle the failure gracefully. */ tcf_idr_insert_many(actions, init_res); *attr_size = tcf_action_full_attrs_size(sz); err = i - 1; goto err_mod; err: tcf_action_destroy(actions, flags & TCA_ACT_FLAGS_BIND); err_mod: for (i = 0; i < TCA_ACT_MAX_PRIO && ops[i]; i++) module_put(ops[i]->owner); return err; } void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets, u64 drops, bool hw) { if (a->cpu_bstats) { _bstats_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); this_cpu_ptr(a->cpu_qstats)->drops += drops; if (hw) _bstats_update(this_cpu_ptr(a->cpu_bstats_hw), bytes, packets); return; } _bstats_update(&a->tcfa_bstats, bytes, packets); a->tcfa_qstats.drops += drops; if (hw) _bstats_update(&a->tcfa_bstats_hw, bytes, packets); } EXPORT_SYMBOL(tcf_action_update_stats); int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p, int compat_mode) { int err = 0; struct gnet_dump d; if (p == NULL) goto errout; /* compat_mode being true specifies a call that is supposed * to add additional backward compatibility statistic TLVs. */ if (compat_mode) { if (p->type == TCA_OLD_COMPAT) err = gnet_stats_start_copy_compat(skb, 0, TCA_STATS, TCA_XSTATS, &p->tcfa_lock, &d, TCA_PAD); else return 0; } else err = gnet_stats_start_copy(skb, TCA_ACT_STATS, &p->tcfa_lock, &d, TCA_ACT_PAD); if (err < 0) goto errout; if (gnet_stats_copy_basic(&d, p->cpu_bstats, &p->tcfa_bstats, false) < 0 || gnet_stats_copy_basic_hw(&d, p->cpu_bstats_hw, &p->tcfa_bstats_hw, false) < 0 || gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 || gnet_stats_copy_queue(&d, p->cpu_qstats, &p->tcfa_qstats, p->tcfa_qstats.qlen) < 0) goto errout; if (gnet_stats_finish_copy(&d) < 0) goto errout; return 0; errout: return -1; } static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[], u32 portid, u32 seq, u16 flags, int event, int bind, int ref, struct netlink_ext_ack *extack) { struct tcamsg *t; struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*t), flags); if (!nlh) goto out_nlmsg_trim; t = nlmsg_data(nlh); t->tca_family = AF_UNSPEC; t->tca__pad1 = 0; t->tca__pad2 = 0; if (extack && extack->_msg && nla_put_string(skb, TCA_ROOT_EXT_WARN_MSG, extack->_msg)) goto out_nlmsg_trim; nest = nla_nest_start_noflag(skb, TCA_ACT_TAB); if (!nest) goto out_nlmsg_trim; if (tcf_action_dump(skb, actions, bind, ref, false) < 0) goto out_nlmsg_trim; nla_nest_end(skb, nest); nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; out_nlmsg_trim: nlmsg_trim(skb, b); return -1; } static int tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n, struct tc_action *actions[], int event, struct netlink_ext_ack *extack) { struct sk_buff *skb; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event, 0, 1, NULL) <= 0) { NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action"); kfree_skb(skb); return -EINVAL; } return rtnl_unicast(skb, net, portid); } static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_ACT_MAX + 1]; const struct tc_action_ops *ops; struct tc_action *a; int index; int err; err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, tcf_action_policy, extack); if (err < 0) goto err_out; err = -EINVAL; if (tb[TCA_ACT_INDEX] == NULL || nla_len(tb[TCA_ACT_INDEX]) < sizeof(index)) { NL_SET_ERR_MSG(extack, "Invalid TC action index value"); goto err_out; } index = nla_get_u32(tb[TCA_ACT_INDEX]); err = -EINVAL; ops = tc_lookup_action(tb[TCA_ACT_KIND]); if (!ops) { /* could happen in batch of actions */ NL_SET_ERR_MSG(extack, "Specified TC action kind not found"); goto err_out; } err = -ENOENT; if (__tcf_idr_search(net, ops, &a, index) == 0) { NL_SET_ERR_MSG(extack, "TC action with specified index not found"); goto err_mod; } module_put(ops->owner); return a; err_mod: module_put(ops->owner); err_out: return ERR_PTR(err); } static int tca_action_flush(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid, struct netlink_ext_ack *extack) { struct sk_buff *skb; unsigned char *b; struct nlmsghdr *nlh; struct tcamsg *t; struct netlink_callback dcb; struct nlattr *nest; struct nlattr *tb[TCA_ACT_MAX + 1]; const struct tc_action_ops *ops; struct nlattr *kind; int err = -ENOMEM; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return err; b = skb_tail_pointer(skb); err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, tcf_action_policy, extack); if (err < 0) goto err_out; err = -EINVAL; kind = tb[TCA_ACT_KIND]; ops = tc_lookup_action(kind); if (!ops) { /*some idjot trying to flush unknown action */ NL_SET_ERR_MSG(extack, "Cannot flush unknown TC action"); goto err_out; } nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0); if (!nlh) { NL_SET_ERR_MSG(extack, "Failed to create TC action flush notification"); goto out_module_put; } t = nlmsg_data(nlh); t->tca_family = AF_UNSPEC; t->tca__pad1 = 0; t->tca__pad2 = 0; nest = nla_nest_start_noflag(skb, TCA_ACT_TAB); if (!nest) { NL_SET_ERR_MSG(extack, "Failed to add new netlink message"); goto out_module_put; } err = __tcf_generic_walker(net, skb, &dcb, RTM_DELACTION, ops, extack); if (err <= 0) { nla_nest_cancel(skb, nest); goto out_module_put; } nla_nest_end(skb, nest); nlh->nlmsg_len = skb_tail_pointer(skb) - b; nlh->nlmsg_flags |= NLM_F_ROOT; module_put(ops->owner); err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); if (err < 0) NL_SET_ERR_MSG(extack, "Failed to send TC action flush notification"); return err; out_module_put: module_put(ops->owner); err_out: kfree_skb(skb); return err; } static int tcf_action_delete(struct net *net, struct tc_action *actions[]) { struct tc_action *a; int i; tcf_act_for_each_action(i, a, actions) { const struct tc_action_ops *ops = a->ops; /* Actions can be deleted concurrently so we must save their * type and id to search again after reference is released. */ struct tcf_idrinfo *idrinfo = a->idrinfo; u32 act_index = a->tcfa_index; actions[i] = NULL; if (tcf_action_put(a)) { /* last reference, action was deleted concurrently */ module_put(ops->owner); } else { int ret; /* now do the delete */ ret = tcf_idr_delete_index(idrinfo, act_index); if (ret < 0) return ret; } } return 0; } static struct sk_buff *tcf_reoffload_del_notify_msg(struct net *net, struct tc_action *action) { size_t attr_size = tcf_action_fill_size(action); struct tc_action *actions[TCA_ACT_MAX_PRIO] = { [0] = action, }; struct sk_buff *skb; skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL); if (!skb) return ERR_PTR(-ENOBUFS); if (tca_get_fill(skb, actions, 0, 0, 0, RTM_DELACTION, 0, 1, NULL) <= 0) { kfree_skb(skb); return ERR_PTR(-EINVAL); } return skb; } static int tcf_reoffload_del_notify(struct net *net, struct tc_action *action) { const struct tc_action_ops *ops = action->ops; struct sk_buff *skb; int ret; if (!rtnl_notify_needed(net, 0, RTNLGRP_TC)) { skb = NULL; } else { skb = tcf_reoffload_del_notify_msg(net, action); if (IS_ERR(skb)) return PTR_ERR(skb); } ret = tcf_idr_release_unsafe(action); if (ret == ACT_P_DELETED) { module_put(ops->owner); ret = rtnetlink_maybe_send(skb, net, 0, RTNLGRP_TC, 0); } else { kfree_skb(skb); } return ret; } int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb, void *cb_priv, bool add) { struct tc_act_pernet_id *id_ptr; struct tcf_idrinfo *idrinfo; struct tc_action_net *tn; struct tc_action *p; unsigned int act_id; unsigned long tmp; unsigned long id; struct idr *idr; struct net *net; int ret; if (!cb) return -EINVAL; down_read(&net_rwsem); mutex_lock(&act_id_mutex); for_each_net(net) { list_for_each_entry(id_ptr, &act_pernet_id_list, list) { act_id = id_ptr->id; tn = net_generic(net, act_id); if (!tn) continue; idrinfo = tn->idrinfo; if (!idrinfo) continue; mutex_lock(&idrinfo->lock); idr = &idrinfo->action_idr; idr_for_each_entry_ul(idr, p, tmp, id) { if (IS_ERR(p) || tc_act_bind(p->tcfa_flags)) continue; if (add) { tcf_action_offload_add_ex(p, NULL, cb, cb_priv); continue; } /* cb unregister to update hw count */ ret = tcf_action_offload_del_ex(p, cb, cb_priv); if (ret < 0) continue; if (tc_act_skip_sw(p->tcfa_flags) && !tc_act_in_hw(p)) tcf_reoffload_del_notify(net, p); } mutex_unlock(&idrinfo->lock); } } mutex_unlock(&act_id_mutex); up_read(&net_rwsem); return 0; } static struct sk_buff *tcf_del_notify_msg(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], u32 portid, size_t attr_size, struct netlink_ext_ack *extack) { struct sk_buff *skb; skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL); if (!skb) return ERR_PTR(-ENOBUFS); if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION, 0, 2, extack) <= 0) { NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes"); kfree_skb(skb); return ERR_PTR(-EINVAL); } return skb; } static int tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], u32 portid, size_t attr_size, struct netlink_ext_ack *extack) { struct sk_buff *skb; int ret; if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) { skb = NULL; } else { skb = tcf_del_notify_msg(net, n, actions, portid, attr_size, extack); if (IS_ERR(skb)) return PTR_ERR(skb); } /* now do the delete */ ret = tcf_action_delete(net, actions); if (ret < 0) { NL_SET_ERR_MSG(extack, "Failed to delete TC action"); kfree_skb(skb); return ret; } return rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); } static int tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid, int event, struct netlink_ext_ack *extack) { int i, ret; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; size_t attr_size = 0; struct tc_action *actions[TCA_ACT_MAX_PRIO] = {}; ret = nla_parse_nested_deprecated(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack); if (ret < 0) return ret; if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) { if (tb[1]) return tca_action_flush(net, tb[1], n, portid, extack); NL_SET_ERR_MSG(extack, "Invalid netlink attributes while flushing TC action"); return -EINVAL; } for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { act = tcf_action_get_1(net, tb[i], n, portid, extack); if (IS_ERR(act)) { ret = PTR_ERR(act); goto err; } attr_size += tcf_action_fill_size(act); actions[i - 1] = act; } attr_size = tcf_action_full_attrs_size(attr_size); if (event == RTM_GETACTION) ret = tcf_get_notify(net, portid, n, actions, event, extack); else { /* delete */ ret = tcf_del_notify(net, n, actions, portid, attr_size, extack); if (ret) goto err; return 0; } err: tcf_action_put_many(actions); return ret; } static struct sk_buff *tcf_add_notify_msg(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], u32 portid, size_t attr_size, struct netlink_ext_ack *extack) { struct sk_buff *skb; skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL); if (!skb) return ERR_PTR(-ENOBUFS); if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags, RTM_NEWACTION, 0, 0, extack) <= 0) { NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action"); kfree_skb(skb); return ERR_PTR(-EINVAL); } return skb; } static int tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], u32 portid, size_t attr_size, struct netlink_ext_ack *extack) { struct sk_buff *skb; if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) { skb = NULL; } else { skb = tcf_add_notify_msg(net, n, actions, portid, attr_size, extack); if (IS_ERR(skb)) return PTR_ERR(skb); } return rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); } static int tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid, u32 flags, struct netlink_ext_ack *extack) { size_t attr_size = 0; int loop, ret; struct tc_action *actions[TCA_ACT_MAX_PRIO] = {}; int init_res[TCA_ACT_MAX_PRIO] = {}; for (loop = 0; loop < 10; loop++) { ret = tcf_action_init(net, NULL, nla, NULL, actions, init_res, &attr_size, flags, 0, extack); if (ret != -EAGAIN) break; } if (ret < 0) return ret; ret = tcf_add_notify(net, n, actions, portid, attr_size, extack); /* only put bound actions */ tca_put_bound_many(actions, init_res); return ret; } static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = { [TCA_ROOT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_ACT_FLAG_LARGE_DUMP_ON | TCA_ACT_FLAG_TERSE_DUMP), [TCA_ROOT_TIME_DELTA] = { .type = NLA_U32 }, }; static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_ROOT_MAX + 1]; u32 portid = NETLINK_CB(skb).portid; u32 flags = 0; int ret = 0; if ((n->nlmsg_type != RTM_GETACTION) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; ret = nlmsg_parse_deprecated(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL, extack); if (ret < 0) return ret; if (tca[TCA_ACT_TAB] == NULL) { NL_SET_ERR_MSG(extack, "Netlink action attributes missing"); return -EINVAL; } /* n->nlmsg_flags & NLM_F_CREATE */ switch (n->nlmsg_type) { case RTM_NEWACTION: /* we are going to assume all other flags * imply create only if it doesn't exist * Note that CREATE | EXCL implies that * but since we want avoid ambiguity (eg when flags * is zero) then just set this */ if (n->nlmsg_flags & NLM_F_REPLACE) flags = TCA_ACT_FLAGS_REPLACE; ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, flags, extack); break; case RTM_DELACTION: ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, portid, RTM_DELACTION, extack); break; case RTM_GETACTION: ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, portid, RTM_GETACTION, extack); break; default: BUG(); } return ret; } static struct nlattr *find_dump_kind(struct nlattr **nla) { struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1]; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct nlattr *kind; tb1 = nla[TCA_ACT_TAB]; if (tb1 == NULL) return NULL; if (nla_parse_deprecated(tb, TCA_ACT_MAX_PRIO, nla_data(tb1), NLMSG_ALIGN(nla_len(tb1)), NULL, NULL) < 0) return NULL; if (tb[1] == NULL) return NULL; if (nla_parse_nested_deprecated(tb2, TCA_ACT_MAX, tb[1], tcf_action_policy, NULL) < 0) return NULL; kind = tb2[TCA_ACT_KIND]; return kind; } static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; struct tc_action_ops *a_o; int ret = 0; struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh); struct nlattr *tb[TCA_ROOT_MAX + 1]; struct nlattr *count_attr = NULL; unsigned long jiffy_since = 0; struct nlattr *kind = NULL; struct nla_bitfield32 bf; u32 msecs_since = 0; u32 act_count = 0; ret = nlmsg_parse_deprecated(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX, tcaa_policy, cb->extack); if (ret < 0) return ret; kind = find_dump_kind(tb); if (kind == NULL) { pr_info("tc_dump_action: action bad kind\n"); return 0; } a_o = tc_lookup_action(kind); if (a_o == NULL) return 0; cb->args[2] = 0; if (tb[TCA_ROOT_FLAGS]) { bf = nla_get_bitfield32(tb[TCA_ROOT_FLAGS]); cb->args[2] = bf.value; } if (tb[TCA_ROOT_TIME_DELTA]) { msecs_since = nla_get_u32(tb[TCA_ROOT_TIME_DELTA]); } nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, sizeof(*t), 0); if (!nlh) goto out_module_put; if (msecs_since) jiffy_since = jiffies - msecs_to_jiffies(msecs_since); t = nlmsg_data(nlh); t->tca_family = AF_UNSPEC; t->tca__pad1 = 0; t->tca__pad2 = 0; cb->args[3] = jiffy_since; count_attr = nla_reserve(skb, TCA_ROOT_COUNT, sizeof(u32)); if (!count_attr) goto out_module_put; nest = nla_nest_start_noflag(skb, TCA_ACT_TAB); if (nest == NULL) goto out_module_put; ret = __tcf_generic_walker(net, skb, cb, RTM_GETACTION, a_o, NULL); if (ret < 0) goto out_module_put; if (ret > 0) { nla_nest_end(skb, nest); ret = skb->len; act_count = cb->args[1]; memcpy(nla_data(count_attr), &act_count, sizeof(u32)); cb->args[1] = 0; } else nlmsg_trim(skb, b); nlh->nlmsg_len = skb_tail_pointer(skb) - b; if (NETLINK_CB(cb->skb).portid && ret) nlh->nlmsg_flags |= NLM_F_MULTI; module_put(a_o->owner); return skb->len; out_module_put: module_put(a_o->owner); nlmsg_trim(skb, b); return skb->len; } static int __init tc_action_init(void) { rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0); rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action, 0); return 0; } subsys_initcall(tc_action_init); |
| 70 66 65 66 66 66 66 66 66 66 66 66 66 66 66 233 71 71 71 67 68 8 222 221 222 168 221 97 187 166 167 167 166 167 63 63 1 1 1 1 1 66 66 66 66 66 66 66 66 66 66 66 66 66 66 66 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 5 3 3 3 3 3 3 3 3 3 3 3 67 68 68 3 3 3 68 3 3 3 3 3 3 3 2 2 3 3 3 3 3 2 2 2 2 2 3 3 3 3 1 70 70 70 69 70 70 70 70 70 70 70 70 70 70 74 74 73 70 72 73 3 73 71 71 70 70 72 3 70 70 4 4 73 3 3 3 71 71 71 3 3 3 73 5 70 70 70 70 70 70 70 70 70 70 70 70 5 5 4 4 4 1 1 1 37 35 37 37 9 76 76 76 1 76 66 66 66 66 66 76 66 66 66 62 62 72 82 83 73 73 5 5 5 5 73 5 71 71 71 71 71 70 71 71 71 71 71 71 71 71 71 71 73 73 1 73 73 1 73 73 73 73 73 73 72 73 73 73 73 72 73 73 73 73 73 73 73 73 73 73 73 73 73 73 73 72 73 1 1 73 73 73 73 73 73 73 73 72 73 4 73 4 73 5 73 71 73 73 73 73 3 73 73 73 73 73 3 73 3 73 73 73 73 73 73 73 89 89 89 89 4 89 73 21 21 21 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH * Copyright (C) 2018-2023 Intel Corporation */ #include <linux/module.h> #include <linux/init.h> #include <linux/etherdevice.h> #include <linux/netdevice.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/skbuff.h> #include <linux/if_arp.h> #include <linux/timer.h> #include <linux/rtnetlink.h> #include <net/codel.h> #include <net/mac80211.h> #include "ieee80211_i.h" #include "driver-ops.h" #include "rate.h" #include "sta_info.h" #include "debugfs_sta.h" #include "mesh.h" #include "wme.h" /** * DOC: STA information lifetime rules * * STA info structures (&struct sta_info) are managed in a hash table * for faster lookup and a list for iteration. They are managed using * RCU, i.e. access to the list and hash table is protected by RCU. * * Upon allocating a STA info structure with sta_info_alloc(), the caller * owns that structure. It must then insert it into the hash table using * either sta_info_insert() or sta_info_insert_rcu(); only in the latter * case (which acquires an rcu read section but must not be called from * within one) will the pointer still be valid after the call. Note that * the caller may not do much with the STA info before inserting it; in * particular, it may not start any mesh peer link management or add * encryption keys. * * When the insertion fails (sta_info_insert()) returns non-zero), the * structure will have been freed by sta_info_insert()! * * Station entries are added by mac80211 when you establish a link with a * peer. This means different things for the different type of interfaces * we support. For a regular station this mean we add the AP sta when we * receive an association response from the AP. For IBSS this occurs when * get to know about a peer on the same IBSS. For WDS we add the sta for * the peer immediately upon device open. When using AP mode we add stations * for each respective station upon request from userspace through nl80211. * * In order to remove a STA info structure, various sta_info_destroy_*() * calls are available. * * There is no concept of ownership on a STA entry; each structure is * owned by the global hash table/list until it is removed. All users of * the structure need to be RCU protected so that the structure won't be * freed before they are done using it. */ struct sta_link_alloc { struct link_sta_info info; struct ieee80211_link_sta sta; struct rcu_head rcu_head; }; static const struct rhashtable_params sta_rht_params = { .nelem_hint = 3, /* start small */ .automatic_shrinking = true, .head_offset = offsetof(struct sta_info, hash_node), .key_offset = offsetof(struct sta_info, addr), .key_len = ETH_ALEN, .max_size = CONFIG_MAC80211_STA_HASH_MAX_SIZE, }; static const struct rhashtable_params link_sta_rht_params = { .nelem_hint = 3, /* start small */ .automatic_shrinking = true, .head_offset = offsetof(struct link_sta_info, link_hash_node), .key_offset = offsetof(struct link_sta_info, addr), .key_len = ETH_ALEN, .max_size = CONFIG_MAC80211_STA_HASH_MAX_SIZE, }; static int sta_info_hash_del(struct ieee80211_local *local, struct sta_info *sta) { return rhltable_remove(&local->sta_hash, &sta->hash_node, sta_rht_params); } static int link_sta_info_hash_add(struct ieee80211_local *local, struct link_sta_info *link_sta) { lockdep_assert_wiphy(local->hw.wiphy); return rhltable_insert(&local->link_sta_hash, &link_sta->link_hash_node, link_sta_rht_params); } static int link_sta_info_hash_del(struct ieee80211_local *local, struct link_sta_info *link_sta) { lockdep_assert_wiphy(local->hw.wiphy); return rhltable_remove(&local->link_sta_hash, &link_sta->link_hash_node, link_sta_rht_params); } void ieee80211_purge_sta_txqs(struct sta_info *sta) { struct ieee80211_local *local = sta->sdata->local; int i; for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { struct txq_info *txqi; if (!sta->sta.txq[i]) continue; txqi = to_txq_info(sta->sta.txq[i]); ieee80211_txq_purge(local, txqi); } } static void __cleanup_single_sta(struct sta_info *sta) { int ac, i; struct tid_ampdu_tx *tid_tx; struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; struct ps_data *ps; if (test_sta_flag(sta, WLAN_STA_PS_STA) || test_sta_flag(sta, WLAN_STA_PS_DRIVER) || test_sta_flag(sta, WLAN_STA_PS_DELIVER)) { if (sta->sdata->vif.type == NL80211_IFTYPE_AP || sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) ps = &sdata->bss->ps; else if (ieee80211_vif_is_mesh(&sdata->vif)) ps = &sdata->u.mesh.ps; else return; clear_sta_flag(sta, WLAN_STA_PS_STA); clear_sta_flag(sta, WLAN_STA_PS_DRIVER); clear_sta_flag(sta, WLAN_STA_PS_DELIVER); atomic_dec(&ps->num_sta_ps); } ieee80211_purge_sta_txqs(sta); for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { local->total_ps_buffered -= skb_queue_len(&sta->ps_tx_buf[ac]); ieee80211_purge_tx_queue(&local->hw, &sta->ps_tx_buf[ac]); ieee80211_purge_tx_queue(&local->hw, &sta->tx_filtered[ac]); } if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_sta_cleanup(sta); cancel_work_sync(&sta->drv_deliver_wk); /* * Destroy aggregation state here. It would be nice to wait for the * driver to finish aggregation stop and then clean up, but for now * drivers have to handle aggregation stop being requested, followed * directly by station destruction. */ for (i = 0; i < IEEE80211_NUM_TIDS; i++) { kfree(sta->ampdu_mlme.tid_start_tx[i]); tid_tx = rcu_dereference_raw(sta->ampdu_mlme.tid_tx[i]); if (!tid_tx) continue; ieee80211_purge_tx_queue(&local->hw, &tid_tx->pending); kfree(tid_tx); } } static void cleanup_single_sta(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; __cleanup_single_sta(sta); sta_info_free(local, sta); } struct rhlist_head *sta_info_hash_lookup(struct ieee80211_local *local, const u8 *addr) { return rhltable_lookup(&local->sta_hash, addr, sta_rht_params); } /* protected by RCU */ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct ieee80211_local *local = sdata->local; struct rhlist_head *tmp; struct sta_info *sta; rcu_read_lock(); for_each_sta_info(local, addr, sta, tmp) { if (sta->sdata == sdata) { rcu_read_unlock(); /* this is safe as the caller must already hold * another rcu read section or the mutex */ return sta; } } rcu_read_unlock(); return NULL; } /* * Get sta info either from the specified interface * or from one of its vlans */ struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct ieee80211_local *local = sdata->local; struct rhlist_head *tmp; struct sta_info *sta; rcu_read_lock(); for_each_sta_info(local, addr, sta, tmp) { if (sta->sdata == sdata || (sta->sdata->bss && sta->sdata->bss == sdata->bss)) { rcu_read_unlock(); /* this is safe as the caller must already hold * another rcu read section or the mutex */ return sta; } } rcu_read_unlock(); return NULL; } struct rhlist_head *link_sta_info_hash_lookup(struct ieee80211_local *local, const u8 *addr) { return rhltable_lookup(&local->link_sta_hash, addr, link_sta_rht_params); } struct link_sta_info * link_sta_info_get_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct ieee80211_local *local = sdata->local; struct rhlist_head *tmp; struct link_sta_info *link_sta; rcu_read_lock(); for_each_link_sta_info(local, addr, link_sta, tmp) { struct sta_info *sta = link_sta->sta; if (sta->sdata == sdata || (sta->sdata->bss && sta->sdata->bss == sdata->bss)) { rcu_read_unlock(); /* this is safe as the caller must already hold * another rcu read section or the mutex */ return link_sta; } } rcu_read_unlock(); return NULL; } struct ieee80211_sta * ieee80211_find_sta_by_link_addrs(struct ieee80211_hw *hw, const u8 *addr, const u8 *localaddr, unsigned int *link_id) { struct ieee80211_local *local = hw_to_local(hw); struct link_sta_info *link_sta; struct rhlist_head *tmp; for_each_link_sta_info(local, addr, link_sta, tmp) { struct sta_info *sta = link_sta->sta; struct ieee80211_link_data *link; u8 _link_id = link_sta->link_id; if (!localaddr) { if (link_id) *link_id = _link_id; return &sta->sta; } link = rcu_dereference(sta->sdata->link[_link_id]); if (!link) continue; if (memcmp(link->conf->addr, localaddr, ETH_ALEN)) continue; if (link_id) *link_id = _link_id; return &sta->sta; } return NULL; } EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_link_addrs); struct sta_info *sta_info_get_by_addrs(struct ieee80211_local *local, const u8 *sta_addr, const u8 *vif_addr) { struct rhlist_head *tmp; struct sta_info *sta; for_each_sta_info(local, sta_addr, sta, tmp) { if (ether_addr_equal(vif_addr, sta->sdata->vif.addr)) return sta; } return NULL; } struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata, int idx) { struct ieee80211_local *local = sdata->local; struct sta_info *sta; int i = 0; list_for_each_entry_rcu(sta, &local->sta_list, list, lockdep_is_held(&local->hw.wiphy->mtx)) { if (sdata != sta->sdata) continue; if (i < idx) { ++i; continue; } return sta; } return NULL; } static void sta_info_free_link(struct link_sta_info *link_sta) { free_percpu(link_sta->pcpu_rx_stats); } static void sta_remove_link(struct sta_info *sta, unsigned int link_id, bool unhash) { struct sta_link_alloc *alloc = NULL; struct link_sta_info *link_sta; lockdep_assert_wiphy(sta->local->hw.wiphy); link_sta = rcu_access_pointer(sta->link[link_id]); if (WARN_ON(!link_sta)) return; if (unhash) link_sta_info_hash_del(sta->local, link_sta); if (test_sta_flag(sta, WLAN_STA_INSERTED)) ieee80211_link_sta_debugfs_remove(link_sta); if (link_sta != &sta->deflink) alloc = container_of(link_sta, typeof(*alloc), info); sta->sta.valid_links &= ~BIT(link_id); RCU_INIT_POINTER(sta->link[link_id], NULL); RCU_INIT_POINTER(sta->sta.link[link_id], NULL); if (alloc) { sta_info_free_link(&alloc->info); kfree_rcu(alloc, rcu_head); } ieee80211_sta_recalc_aggregates(&sta->sta); } /** * sta_info_free - free STA * * @local: pointer to the global information * @sta: STA info to free * * This function must undo everything done by sta_info_alloc() * that may happen before sta_info_insert(). It may only be * called when sta_info_insert() has not been attempted (and * if that fails, the station is freed anyway.) */ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) { int i; for (i = 0; i < ARRAY_SIZE(sta->link); i++) { struct link_sta_info *link_sta; link_sta = rcu_access_pointer(sta->link[i]); if (!link_sta) continue; sta_remove_link(sta, i, false); } /* * If we had used sta_info_pre_move_state() then we might not * have gone through the state transitions down again, so do * it here now (and warn if it's inserted). * * This will clear state such as fast TX/RX that may have been * allocated during state transitions. */ while (sta->sta_state > IEEE80211_STA_NONE) { int ret; WARN_ON_ONCE(test_sta_flag(sta, WLAN_STA_INSERTED)); ret = sta_info_move_state(sta, sta->sta_state - 1); if (WARN_ONCE(ret, "sta_info_move_state() returned %d\n", ret)) break; } if (sta->rate_ctrl) rate_control_free_sta(sta); sta_dbg(sta->sdata, "Destroyed STA %pM\n", sta->sta.addr); kfree(to_txq_info(sta->sta.txq[0])); kfree(rcu_dereference_raw(sta->sta.rates)); #ifdef CONFIG_MAC80211_MESH kfree(sta->mesh); #endif sta_info_free_link(&sta->deflink); kfree(sta); } static int sta_info_hash_add(struct ieee80211_local *local, struct sta_info *sta) { return rhltable_insert(&local->sta_hash, &sta->hash_node, sta_rht_params); } static void sta_deliver_ps_frames(struct work_struct *wk) { struct sta_info *sta; sta = container_of(wk, struct sta_info, drv_deliver_wk); if (sta->dead) return; local_bh_disable(); if (!test_sta_flag(sta, WLAN_STA_PS_STA)) ieee80211_sta_ps_deliver_wakeup(sta); else if (test_and_clear_sta_flag(sta, WLAN_STA_PSPOLL)) ieee80211_sta_ps_deliver_poll_response(sta); else if (test_and_clear_sta_flag(sta, WLAN_STA_UAPSD)) ieee80211_sta_ps_deliver_uapsd(sta); local_bh_enable(); } static int sta_prepare_rate_control(struct ieee80211_local *local, struct sta_info *sta, gfp_t gfp) { if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) return 0; sta->rate_ctrl = local->rate_ctrl; sta->rate_ctrl_priv = rate_control_alloc_sta(sta->rate_ctrl, sta, gfp); if (!sta->rate_ctrl_priv) return -ENOMEM; return 0; } static int sta_info_alloc_link(struct ieee80211_local *local, struct link_sta_info *link_info, gfp_t gfp) { struct ieee80211_hw *hw = &local->hw; int i; if (ieee80211_hw_check(hw, USES_RSS)) { link_info->pcpu_rx_stats = alloc_percpu_gfp(struct ieee80211_sta_rx_stats, gfp); if (!link_info->pcpu_rx_stats) return -ENOMEM; } link_info->rx_stats.last_rx = jiffies; u64_stats_init(&link_info->rx_stats.syncp); ewma_signal_init(&link_info->rx_stats_avg.signal); ewma_avg_signal_init(&link_info->status_stats.avg_ack_signal); for (i = 0; i < ARRAY_SIZE(link_info->rx_stats_avg.chain_signal); i++) ewma_signal_init(&link_info->rx_stats_avg.chain_signal[i]); return 0; } static void sta_info_add_link(struct sta_info *sta, unsigned int link_id, struct link_sta_info *link_info, struct ieee80211_link_sta *link_sta) { link_info->sta = sta; link_info->link_id = link_id; link_info->pub = link_sta; link_info->pub->sta = &sta->sta; link_sta->link_id = link_id; rcu_assign_pointer(sta->link[link_id], link_info); rcu_assign_pointer(sta->sta.link[link_id], link_sta); link_sta->smps_mode = IEEE80211_SMPS_OFF; link_sta->agg.max_rc_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_BA; } static struct sta_info * __sta_info_alloc(struct ieee80211_sub_if_data *sdata, const u8 *addr, int link_id, const u8 *link_addr, gfp_t gfp) { struct ieee80211_local *local = sdata->local; struct ieee80211_hw *hw = &local->hw; struct sta_info *sta; void *txq_data; int size; int i; sta = kzalloc(sizeof(*sta) + hw->sta_data_size, gfp); if (!sta) return NULL; sta->local = local; sta->sdata = sdata; if (sta_info_alloc_link(local, &sta->deflink, gfp)) goto free; if (link_id >= 0) { sta_info_add_link(sta, link_id, &sta->deflink, &sta->sta.deflink); sta->sta.valid_links = BIT(link_id); } else { sta_info_add_link(sta, 0, &sta->deflink, &sta->sta.deflink); } sta->sta.cur = &sta->sta.deflink.agg; spin_lock_init(&sta->lock); spin_lock_init(&sta->ps_lock); INIT_WORK(&sta->drv_deliver_wk, sta_deliver_ps_frames); wiphy_work_init(&sta->ampdu_mlme.work, ieee80211_ba_session_work); #ifdef CONFIG_MAC80211_MESH if (ieee80211_vif_is_mesh(&sdata->vif)) { sta->mesh = kzalloc(sizeof(*sta->mesh), gfp); if (!sta->mesh) goto free; sta->mesh->plink_sta = sta; spin_lock_init(&sta->mesh->plink_lock); if (!sdata->u.mesh.user_mpm) timer_setup(&sta->mesh->plink_timer, mesh_plink_timer, 0); sta->mesh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE; } #endif memcpy(sta->addr, addr, ETH_ALEN); memcpy(sta->sta.addr, addr, ETH_ALEN); memcpy(sta->deflink.addr, link_addr, ETH_ALEN); memcpy(sta->sta.deflink.addr, link_addr, ETH_ALEN); sta->sta.max_rx_aggregation_subframes = local->hw.max_rx_aggregation_subframes; /* TODO link specific alloc and assignments for MLO Link STA */ /* Extended Key ID needs to install keys for keyid 0 and 1 Rx-only. * The Tx path starts to use a key as soon as the key slot ptk_idx * references to is not NULL. To not use the initial Rx-only key * prematurely for Tx initialize ptk_idx to an impossible PTK keyid * which always will refer to a NULL key. */ BUILD_BUG_ON(ARRAY_SIZE(sta->ptk) <= INVALID_PTK_KEYIDX); sta->ptk_idx = INVALID_PTK_KEYIDX; ieee80211_init_frag_cache(&sta->frags); sta->sta_state = IEEE80211_STA_NONE; if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT) sta->amsdu_mesh_control = -1; /* Mark TID as unreserved */ sta->reserved_tid = IEEE80211_TID_UNRESERVED; sta->last_connected = ktime_get_seconds(); size = sizeof(struct txq_info) + ALIGN(hw->txq_data_size, sizeof(void *)); txq_data = kcalloc(ARRAY_SIZE(sta->sta.txq), size, gfp); if (!txq_data) goto free; for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { struct txq_info *txq = txq_data + i * size; /* might not do anything for the (bufferable) MMPDU TXQ */ ieee80211_txq_init(sdata, sta, txq, i); } if (sta_prepare_rate_control(local, sta, gfp)) goto free_txq; sta->airtime_weight = IEEE80211_DEFAULT_AIRTIME_WEIGHT; for (i = 0; i < IEEE80211_NUM_ACS; i++) { skb_queue_head_init(&sta->ps_tx_buf[i]); skb_queue_head_init(&sta->tx_filtered[i]); sta->airtime[i].deficit = sta->airtime_weight; atomic_set(&sta->airtime[i].aql_tx_pending, 0); sta->airtime[i].aql_limit_low = local->aql_txq_limit_low[i]; sta->airtime[i].aql_limit_high = local->aql_txq_limit_high[i]; } for (i = 0; i < IEEE80211_NUM_TIDS; i++) sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX); for (i = 0; i < NUM_NL80211_BANDS; i++) { u32 mandatory = 0; int r; if (!hw->wiphy->bands[i]) continue; switch (i) { case NL80211_BAND_2GHZ: case NL80211_BAND_LC: /* * We use both here, even if we cannot really know for * sure the station will support both, but the only use * for this is when we don't know anything yet and send * management frames, and then we'll pick the lowest * possible rate anyway. * If we don't include _G here, we cannot find a rate * in P2P, and thus trigger the WARN_ONCE() in rate.c */ mandatory = IEEE80211_RATE_MANDATORY_B | IEEE80211_RATE_MANDATORY_G; break; case NL80211_BAND_5GHZ: mandatory = IEEE80211_RATE_MANDATORY_A; break; case NL80211_BAND_60GHZ: WARN_ON(1); mandatory = 0; break; } for (r = 0; r < hw->wiphy->bands[i]->n_bitrates; r++) { struct ieee80211_rate *rate; rate = &hw->wiphy->bands[i]->bitrates[r]; if (!(rate->flags & mandatory)) continue; sta->sta.deflink.supp_rates[i] |= BIT(r); } } sta->cparams.ce_threshold = CODEL_DISABLED_THRESHOLD; sta->cparams.target = MS2TIME(20); sta->cparams.interval = MS2TIME(100); sta->cparams.ecn = true; sta->cparams.ce_threshold_selector = 0; sta->cparams.ce_threshold_mask = 0; sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); return sta; free_txq: kfree(to_txq_info(sta->sta.txq[0])); free: sta_info_free_link(&sta->deflink); #ifdef CONFIG_MAC80211_MESH kfree(sta->mesh); #endif kfree(sta); return NULL; } struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, const u8 *addr, gfp_t gfp) { return __sta_info_alloc(sdata, addr, -1, addr, gfp); } struct sta_info *sta_info_alloc_with_link(struct ieee80211_sub_if_data *sdata, const u8 *mld_addr, unsigned int link_id, const u8 *link_addr, gfp_t gfp) { return __sta_info_alloc(sdata, mld_addr, link_id, link_addr, gfp); } static int sta_info_insert_check(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; lockdep_assert_wiphy(sdata->local->hw.wiphy); /* * Can't be a WARN_ON because it can be triggered through a race: * something inserts a STA (on one CPU) without holding the RTNL * and another CPU turns off the net device. */ if (unlikely(!ieee80211_sdata_running(sdata))) return -ENETDOWN; if (WARN_ON(ether_addr_equal(sta->sta.addr, sdata->vif.addr) || !is_valid_ether_addr(sta->sta.addr))) return -EINVAL; /* The RCU read lock is required by rhashtable due to * asynchronous resize/rehash. We also require the mutex * for correctness. */ rcu_read_lock(); if (ieee80211_hw_check(&sdata->local->hw, NEEDS_UNIQUE_STA_ADDR) && ieee80211_find_sta_by_ifaddr(&sdata->local->hw, sta->addr, NULL)) { rcu_read_unlock(); return -ENOTUNIQ; } rcu_read_unlock(); return 0; } static int sta_info_insert_drv_state(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct sta_info *sta) { enum ieee80211_sta_state state; int err = 0; for (state = IEEE80211_STA_NOTEXIST; state < sta->sta_state; state++) { err = drv_sta_state(local, sdata, sta, state, state + 1); if (err) break; } if (!err) { /* * Drivers using legacy sta_add/sta_remove callbacks only * get uploaded set to true after sta_add is called. */ if (!local->ops->sta_add) sta->uploaded = true; return 0; } if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { sdata_info(sdata, "failed to move IBSS STA %pM to state %d (%d) - keeping it anyway\n", sta->sta.addr, state + 1, err); err = 0; } /* unwind on error */ for (; state > IEEE80211_STA_NOTEXIST; state--) WARN_ON(drv_sta_state(local, sdata, sta, state, state - 1)); return err; } static void ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; bool allow_p2p_go_ps = sdata->vif.p2p; struct sta_info *sta; rcu_read_lock(); list_for_each_entry_rcu(sta, &local->sta_list, list) { if (sdata != sta->sdata || !test_sta_flag(sta, WLAN_STA_ASSOC)) continue; if (!sta->sta.support_p2p_ps) { allow_p2p_go_ps = false; break; } } rcu_read_unlock(); if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) { sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps; ieee80211_link_info_change_notify(sdata, &sdata->deflink, BSS_CHANGED_P2P_PS); } } static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) { struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; struct station_info *sinfo = NULL; int err = 0; lockdep_assert_wiphy(local->hw.wiphy); /* check if STA exists already */ if (sta_info_get_bss(sdata, sta->sta.addr)) { err = -EEXIST; goto out_cleanup; } sinfo = kzalloc(sizeof(struct station_info), GFP_KERNEL); if (!sinfo) { err = -ENOMEM; goto out_cleanup; } local->num_sta++; local->sta_generation++; smp_mb(); /* simplify things and don't accept BA sessions yet */ set_sta_flag(sta, WLAN_STA_BLOCK_BA); /* make the station visible */ err = sta_info_hash_add(local, sta); if (err) goto out_drop_sta; if (sta->sta.valid_links) { err = link_sta_info_hash_add(local, &sta->deflink); if (err) { sta_info_hash_del(local, sta); goto out_drop_sta; } } list_add_tail_rcu(&sta->list, &local->sta_list); /* update channel context before notifying the driver about state * change, this enables driver using the updated channel context right away. */ if (sta->sta_state >= IEEE80211_STA_ASSOC) { ieee80211_recalc_min_chandef(sta->sdata, -1); if (!sta->sta.support_p2p_ps) ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); } /* notify driver */ err = sta_info_insert_drv_state(local, sdata, sta); if (err) goto out_remove; set_sta_flag(sta, WLAN_STA_INSERTED); /* accept BA sessions now */ clear_sta_flag(sta, WLAN_STA_BLOCK_BA); ieee80211_sta_debugfs_add(sta); rate_control_add_sta_debugfs(sta); if (sta->sta.valid_links) { int i; for (i = 0; i < ARRAY_SIZE(sta->link); i++) { struct link_sta_info *link_sta; link_sta = rcu_dereference_protected(sta->link[i], lockdep_is_held(&local->hw.wiphy->mtx)); if (!link_sta) continue; ieee80211_link_sta_debugfs_add(link_sta); if (sdata->vif.active_links & BIT(i)) ieee80211_link_sta_debugfs_drv_add(link_sta); } } else { ieee80211_link_sta_debugfs_add(&sta->deflink); ieee80211_link_sta_debugfs_drv_add(&sta->deflink); } sinfo->generation = local->sta_generation; cfg80211_new_sta(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL); kfree(sinfo); sta_dbg(sdata, "Inserted STA %pM\n", sta->sta.addr); /* move reference to rcu-protected */ rcu_read_lock(); if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_accept_plinks_update(sdata); ieee80211_check_fast_xmit(sta); return 0; out_remove: if (sta->sta.valid_links) link_sta_info_hash_del(local, &sta->deflink); sta_info_hash_del(local, sta); list_del_rcu(&sta->list); out_drop_sta: local->num_sta--; synchronize_net(); out_cleanup: cleanup_single_sta(sta); kfree(sinfo); rcu_read_lock(); return err; } int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) { struct ieee80211_local *local = sta->local; int err; might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); err = sta_info_insert_check(sta); if (err) { sta_info_free(local, sta); rcu_read_lock(); return err; } return sta_info_insert_finish(sta); } int sta_info_insert(struct sta_info *sta) { int err = sta_info_insert_rcu(sta); rcu_read_unlock(); return err; } static inline void __bss_tim_set(u8 *tim, u16 id) { /* * This format has been mandated by the IEEE specifications, * so this line may not be changed to use the __set_bit() format. */ tim[id / 8] |= (1 << (id % 8)); } static inline void __bss_tim_clear(u8 *tim, u16 id) { /* * This format has been mandated by the IEEE specifications, * so this line may not be changed to use the __clear_bit() format. */ tim[id / 8] &= ~(1 << (id % 8)); } static inline bool __bss_tim_get(u8 *tim, u16 id) { /* * This format has been mandated by the IEEE specifications, * so this line may not be changed to use the test_bit() format. */ return tim[id / 8] & (1 << (id % 8)); } static unsigned long ieee80211_tids_for_ac(int ac) { /* If we ever support TIDs > 7, this obviously needs to be adjusted */ switch (ac) { case IEEE80211_AC_VO: return BIT(6) | BIT(7); case IEEE80211_AC_VI: return BIT(4) | BIT(5); case IEEE80211_AC_BE: return BIT(0) | BIT(3); case IEEE80211_AC_BK: return BIT(1) | BIT(2); default: WARN_ON(1); return 0; } } static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending) { struct ieee80211_local *local = sta->local; struct ps_data *ps; bool indicate_tim = false; u8 ignore_for_tim = sta->sta.uapsd_queues; int ac; u16 id = sta->sta.aid; if (sta->sdata->vif.type == NL80211_IFTYPE_AP || sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { if (WARN_ON_ONCE(!sta->sdata->bss)) return; ps = &sta->sdata->bss->ps; #ifdef CONFIG_MAC80211_MESH } else if (ieee80211_vif_is_mesh(&sta->sdata->vif)) { ps = &sta->sdata->u.mesh.ps; #endif } else { return; } /* No need to do anything if the driver does all */ if (ieee80211_hw_check(&local->hw, AP_LINK_PS) && !local->ops->set_tim) return; if (sta->dead) goto done; /* * If all ACs are delivery-enabled then we should build * the TIM bit for all ACs anyway; if only some are then * we ignore those and build the TIM bit using only the * non-enabled ones. */ if (ignore_for_tim == BIT(IEEE80211_NUM_ACS) - 1) ignore_for_tim = 0; if (ignore_pending) ignore_for_tim = BIT(IEEE80211_NUM_ACS) - 1; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { unsigned long tids; if (ignore_for_tim & ieee80211_ac_to_qos_mask[ac]) continue; indicate_tim |= !skb_queue_empty(&sta->tx_filtered[ac]) || !skb_queue_empty(&sta->ps_tx_buf[ac]); if (indicate_tim) break; tids = ieee80211_tids_for_ac(ac); indicate_tim |= sta->driver_buffered_tids & tids; indicate_tim |= sta->txq_buffered_tids & tids; } done: spin_lock_bh(&local->tim_lock); if (indicate_tim == __bss_tim_get(ps->tim, id)) goto out_unlock; if (indicate_tim) __bss_tim_set(ps->tim, id); else __bss_tim_clear(ps->tim, id); if (local->ops->set_tim && !WARN_ON(sta->dead)) { local->tim_in_locked_section = true; drv_set_tim(local, &sta->sta, indicate_tim); local->tim_in_locked_section = false; } out_unlock: spin_unlock_bh(&local->tim_lock); } void sta_info_recalc_tim(struct sta_info *sta) { __sta_info_recalc_tim(sta, false); } static bool sta_info_buffer_expired(struct sta_info *sta, struct sk_buff *skb) { struct ieee80211_tx_info *info; int timeout; if (!skb) return false; info = IEEE80211_SKB_CB(skb); /* Timeout: (2 * listen_interval * beacon_int * 1024 / 1000000) sec */ timeout = (sta->listen_interval * sta->sdata->vif.bss_conf.beacon_int * 32 / 15625) * HZ; if (timeout < STA_TX_BUFFER_EXPIRE) timeout = STA_TX_BUFFER_EXPIRE; return time_after(jiffies, info->control.jiffies + timeout); } static bool sta_info_cleanup_expire_buffered_ac(struct ieee80211_local *local, struct sta_info *sta, int ac) { unsigned long flags; struct sk_buff *skb; /* * First check for frames that should expire on the filtered * queue. Frames here were rejected by the driver and are on * a separate queue to avoid reordering with normal PS-buffered * frames. They also aren't accounted for right now in the * total_ps_buffered counter. */ for (;;) { spin_lock_irqsave(&sta->tx_filtered[ac].lock, flags); skb = skb_peek(&sta->tx_filtered[ac]); if (sta_info_buffer_expired(sta, skb)) skb = __skb_dequeue(&sta->tx_filtered[ac]); else skb = NULL; spin_unlock_irqrestore(&sta->tx_filtered[ac].lock, flags); /* * Frames are queued in order, so if this one * hasn't expired yet we can stop testing. If * we actually reached the end of the queue we * also need to stop, of course. */ if (!skb) break; ieee80211_free_txskb(&local->hw, skb); } /* * Now also check the normal PS-buffered queue, this will * only find something if the filtered queue was emptied * since the filtered frames are all before the normal PS * buffered frames. */ for (;;) { spin_lock_irqsave(&sta->ps_tx_buf[ac].lock, flags); skb = skb_peek(&sta->ps_tx_buf[ac]); if (sta_info_buffer_expired(sta, skb)) skb = __skb_dequeue(&sta->ps_tx_buf[ac]); else skb = NULL; spin_unlock_irqrestore(&sta->ps_tx_buf[ac].lock, flags); /* * frames are queued in order, so if this one * hasn't expired yet (or we reached the end of * the queue) we can stop testing */ if (!skb) break; local->total_ps_buffered--; ps_dbg(sta->sdata, "Buffered frame expired (STA %pM)\n", sta->sta.addr); ieee80211_free_txskb(&local->hw, skb); } /* * Finally, recalculate the TIM bit for this station -- it might * now be clear because the station was too slow to retrieve its * frames. */ sta_info_recalc_tim(sta); /* * Return whether there are any frames still buffered, this is * used to check whether the cleanup timer still needs to run, * if there are no frames we don't need to rearm the timer. */ return !(skb_queue_empty(&sta->ps_tx_buf[ac]) && skb_queue_empty(&sta->tx_filtered[ac])); } static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local, struct sta_info *sta) { bool have_buffered = false; int ac; /* This is only necessary for stations on BSS/MBSS interfaces */ if (!sta->sdata->bss && !ieee80211_vif_is_mesh(&sta->sdata->vif)) return false; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) have_buffered |= sta_info_cleanup_expire_buffered_ac(local, sta, ac); return have_buffered; } static int __must_check __sta_info_destroy_part1(struct sta_info *sta) { struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; int ret, i; might_sleep(); if (!sta) return -ENOENT; local = sta->local; sdata = sta->sdata; lockdep_assert_wiphy(local->hw.wiphy); /* * Before removing the station from the driver and * rate control, it might still start new aggregation * sessions -- block that to make sure the tear-down * will be sufficient. */ set_sta_flag(sta, WLAN_STA_BLOCK_BA); ieee80211_sta_tear_down_BA_sessions(sta, AGG_STOP_DESTROY_STA); /* * Before removing the station from the driver there might be pending * rx frames on RSS queues sent prior to the disassociation - wait for * all such frames to be processed. */ drv_sync_rx_queues(local, sta); for (i = 0; i < ARRAY_SIZE(sta->link); i++) { struct link_sta_info *link_sta; if (!(sta->sta.valid_links & BIT(i))) continue; link_sta = rcu_dereference_protected(sta->link[i], lockdep_is_held(&local->hw.wiphy->mtx)); link_sta_info_hash_del(local, link_sta); } ret = sta_info_hash_del(local, sta); if (WARN_ON(ret)) return ret; /* * for TDLS peers, make sure to return to the base channel before * removal. */ if (test_sta_flag(sta, WLAN_STA_TDLS_OFF_CHANNEL)) { drv_tdls_cancel_channel_switch(local, sdata, &sta->sta); clear_sta_flag(sta, WLAN_STA_TDLS_OFF_CHANNEL); } list_del_rcu(&sta->list); sta->removed = true; if (sta->uploaded) drv_sta_pre_rcu_remove(local, sta->sdata, sta); if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && rcu_access_pointer(sdata->u.vlan.sta) == sta) RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); return 0; } static int _sta_info_move_state(struct sta_info *sta, enum ieee80211_sta_state new_state, bool recalc) { struct ieee80211_local *local = sta->local; might_sleep(); if (sta->sta_state == new_state) return 0; /* check allowed transitions first */ switch (new_state) { case IEEE80211_STA_NONE: if (sta->sta_state != IEEE80211_STA_AUTH) return -EINVAL; break; case IEEE80211_STA_AUTH: if (sta->sta_state != IEEE80211_STA_NONE && sta->sta_state != IEEE80211_STA_ASSOC) return -EINVAL; break; case IEEE80211_STA_ASSOC: if (sta->sta_state != IEEE80211_STA_AUTH && sta->sta_state != IEEE80211_STA_AUTHORIZED) return -EINVAL; break; case IEEE80211_STA_AUTHORIZED: if (sta->sta_state != IEEE80211_STA_ASSOC) return -EINVAL; break; default: WARN(1, "invalid state %d", new_state); return -EINVAL; } sta_dbg(sta->sdata, "moving STA %pM to state %d\n", sta->sta.addr, new_state); /* notify the driver before the actual changes so it can * fail the transition */ if (test_sta_flag(sta, WLAN_STA_INSERTED)) { int err = drv_sta_state(sta->local, sta->sdata, sta, sta->sta_state, new_state); if (err) return err; } /* reflect the change in all state variables */ switch (new_state) { case IEEE80211_STA_NONE: if (sta->sta_state == IEEE80211_STA_AUTH) clear_bit(WLAN_STA_AUTH, &sta->_flags); break; case IEEE80211_STA_AUTH: if (sta->sta_state == IEEE80211_STA_NONE) { set_bit(WLAN_STA_AUTH, &sta->_flags); } else if (sta->sta_state == IEEE80211_STA_ASSOC) { clear_bit(WLAN_STA_ASSOC, &sta->_flags); if (recalc) { ieee80211_recalc_min_chandef(sta->sdata, -1); if (!sta->sta.support_p2p_ps) ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); } } break; case IEEE80211_STA_ASSOC: if (sta->sta_state == IEEE80211_STA_AUTH) { set_bit(WLAN_STA_ASSOC, &sta->_flags); sta->assoc_at = ktime_get_boottime_ns(); if (recalc) { ieee80211_recalc_min_chandef(sta->sdata, -1); if (!sta->sta.support_p2p_ps) ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); } } else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) { ieee80211_vif_dec_num_mcast(sta->sdata); clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags); /* * If we have encryption offload, flush (station) queues * (after ensuring concurrent TX completed) so we won't * transmit anything later unencrypted if/when keys are * also removed, which might otherwise happen depending * on how the hardware offload works. */ if (local->ops->set_key) { synchronize_net(); if (local->ops->flush_sta) drv_flush_sta(local, sta->sdata, sta); else ieee80211_flush_queues(local, sta->sdata, false); } ieee80211_clear_fast_xmit(sta); ieee80211_clear_fast_rx(sta); } break; case IEEE80211_STA_AUTHORIZED: if (sta->sta_state == IEEE80211_STA_ASSOC) { ieee80211_vif_inc_num_mcast(sta->sdata); set_bit(WLAN_STA_AUTHORIZED, &sta->_flags); ieee80211_check_fast_xmit(sta); ieee80211_check_fast_rx(sta); } if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN || sta->sdata->vif.type == NL80211_IFTYPE_AP) cfg80211_send_layer2_update(sta->sdata->dev, sta->sta.addr); break; default: break; } sta->sta_state = new_state; return 0; } int sta_info_move_state(struct sta_info *sta, enum ieee80211_sta_state new_state) { return _sta_info_move_state(sta, new_state, true); } static void __sta_info_destroy_part2(struct sta_info *sta, bool recalc) { struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; struct station_info *sinfo; int ret; /* * NOTE: This assumes at least synchronize_net() was done * after _part1 and before _part2! */ /* * There's a potential race in _part1 where we set WLAN_STA_BLOCK_BA * but someone might have just gotten past a check, and not yet into * queuing the work/creating the data/etc. * * Do another round of destruction so that the worker is certainly * canceled before we later free the station. * * Since this is after synchronize_rcu()/synchronize_net() we're now * certain that nobody can actually hold a reference to the STA and * be calling e.g. ieee80211_start_tx_ba_session(). */ ieee80211_sta_tear_down_BA_sessions(sta, AGG_STOP_DESTROY_STA); might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); if (sta->sta_state == IEEE80211_STA_AUTHORIZED) { ret = _sta_info_move_state(sta, IEEE80211_STA_ASSOC, recalc); WARN_ON_ONCE(ret); } /* now keys can no longer be reached */ ieee80211_free_sta_keys(local, sta); /* disable TIM bit - last chance to tell driver */ __sta_info_recalc_tim(sta, true); sta->dead = true; local->num_sta--; local->sta_generation++; while (sta->sta_state > IEEE80211_STA_NONE) { ret = _sta_info_move_state(sta, sta->sta_state - 1, recalc); if (ret) { WARN_ON_ONCE(1); break; } } if (sta->uploaded) { ret = drv_sta_state(local, sdata, sta, IEEE80211_STA_NONE, IEEE80211_STA_NOTEXIST); WARN_ON_ONCE(ret != 0); } sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr); sinfo = kzalloc(sizeof(*sinfo), GFP_KERNEL); if (sinfo) sta_set_sinfo(sta, sinfo, true); cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL); kfree(sinfo); ieee80211_sta_debugfs_remove(sta); ieee80211_destroy_frag_cache(&sta->frags); cleanup_single_sta(sta); } int __must_check __sta_info_destroy(struct sta_info *sta) { int err = __sta_info_destroy_part1(sta); if (err) return err; synchronize_net(); __sta_info_destroy_part2(sta, true); return 0; } int sta_info_destroy_addr(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct sta_info *sta; lockdep_assert_wiphy(sdata->local->hw.wiphy); sta = sta_info_get(sdata, addr); return __sta_info_destroy(sta); } int sta_info_destroy_addr_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct sta_info *sta; lockdep_assert_wiphy(sdata->local->hw.wiphy); sta = sta_info_get_bss(sdata, addr); return __sta_info_destroy(sta); } static void sta_info_cleanup(struct timer_list *t) { struct ieee80211_local *local = from_timer(local, t, sta_cleanup); struct sta_info *sta; bool timer_needed = false; rcu_read_lock(); list_for_each_entry_rcu(sta, &local->sta_list, list) if (sta_info_cleanup_expire_buffered(local, sta)) timer_needed = true; rcu_read_unlock(); if (local->quiescing) return; if (!timer_needed) return; mod_timer(&local->sta_cleanup, round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL)); } int sta_info_init(struct ieee80211_local *local) { int err; err = rhltable_init(&local->sta_hash, &sta_rht_params); if (err) return err; err = rhltable_init(&local->link_sta_hash, &link_sta_rht_params); if (err) { rhltable_destroy(&local->sta_hash); return err; } spin_lock_init(&local->tim_lock); INIT_LIST_HEAD(&local->sta_list); timer_setup(&local->sta_cleanup, sta_info_cleanup, 0); return 0; } void sta_info_stop(struct ieee80211_local *local) { del_timer_sync(&local->sta_cleanup); rhltable_destroy(&local->sta_hash); rhltable_destroy(&local->link_sta_hash); } int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans, int link_id) { struct ieee80211_local *local = sdata->local; struct sta_info *sta, *tmp; LIST_HEAD(free_list); int ret = 0; might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); WARN_ON(vlans && sdata->vif.type != NL80211_IFTYPE_AP); WARN_ON(vlans && !sdata->bss); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { if (sdata != sta->sdata && (!vlans || sdata->bss != sta->sdata->bss)) continue; if (link_id >= 0 && sta->sta.valid_links && !(sta->sta.valid_links & BIT(link_id))) continue; if (!WARN_ON(__sta_info_destroy_part1(sta))) list_add(&sta->free_list, &free_list); ret++; } if (!list_empty(&free_list)) { bool support_p2p_ps = true; synchronize_net(); list_for_each_entry_safe(sta, tmp, &free_list, free_list) { if (!sta->sta.support_p2p_ps) support_p2p_ps = false; __sta_info_destroy_part2(sta, false); } ieee80211_recalc_min_chandef(sdata, -1); if (!support_p2p_ps) ieee80211_recalc_p2p_go_ps_allowed(sdata); } return ret; } void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, unsigned long exp_time) { struct ieee80211_local *local = sdata->local; struct sta_info *sta, *tmp; lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { unsigned long last_active = ieee80211_sta_last_active(sta); if (sdata != sta->sdata) continue; if (time_is_before_jiffies(last_active + exp_time)) { sta_dbg(sta->sdata, "expiring inactive STA %pM\n", sta->sta.addr); if (ieee80211_vif_is_mesh(&sdata->vif) && test_sta_flag(sta, WLAN_STA_PS_STA)) atomic_dec(&sdata->u.mesh.ps.num_sta_ps); WARN_ON(__sta_info_destroy(sta)); } } } struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw, const u8 *addr, const u8 *localaddr) { struct ieee80211_local *local = hw_to_local(hw); struct rhlist_head *tmp; struct sta_info *sta; /* * Just return a random station if localaddr is NULL * ... first in list. */ for_each_sta_info(local, addr, sta, tmp) { if (localaddr && !ether_addr_equal(sta->sdata->vif.addr, localaddr)) continue; if (!sta->uploaded) return NULL; return &sta->sta; } return NULL; } EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_ifaddr); struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif, const u8 *addr) { struct sta_info *sta; if (!vif) return NULL; sta = sta_info_get_bss(vif_to_sdata(vif), addr); if (!sta) return NULL; if (!sta->uploaded) return NULL; return &sta->sta; } EXPORT_SYMBOL(ieee80211_find_sta); /* powersave support code */ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; struct sk_buff_head pending; int filtered = 0, buffered = 0, ac, i; unsigned long flags; struct ps_data *ps; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); if (sdata->vif.type == NL80211_IFTYPE_AP) ps = &sdata->bss->ps; else if (ieee80211_vif_is_mesh(&sdata->vif)) ps = &sdata->u.mesh.ps; else return; clear_sta_flag(sta, WLAN_STA_SP); BUILD_BUG_ON(BITS_TO_LONGS(IEEE80211_NUM_TIDS) > 1); sta->driver_buffered_tids = 0; sta->txq_buffered_tids = 0; if (!ieee80211_hw_check(&local->hw, AP_LINK_PS)) drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta); for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { if (!sta->sta.txq[i] || !txq_has_queue(sta->sta.txq[i])) continue; schedule_and_wake_txq(local, to_txq_info(sta->sta.txq[i])); } skb_queue_head_init(&pending); /* sync with ieee80211_tx_h_unicast_ps_buf */ spin_lock_bh(&sta->ps_lock); /* Send all buffered frames to the station */ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { int count = skb_queue_len(&pending), tmp; spin_lock_irqsave(&sta->tx_filtered[ac].lock, flags); skb_queue_splice_tail_init(&sta->tx_filtered[ac], &pending); spin_unlock_irqrestore(&sta->tx_filtered[ac].lock, flags); tmp = skb_queue_len(&pending); filtered += tmp - count; count = tmp; spin_lock_irqsave(&sta->ps_tx_buf[ac].lock, flags); skb_queue_splice_tail_init(&sta->ps_tx_buf[ac], &pending); spin_unlock_irqrestore(&sta->ps_tx_buf[ac].lock, flags); tmp = skb_queue_len(&pending); buffered += tmp - count; } ieee80211_add_pending_skbs(local, &pending); /* now we're no longer in the deliver code */ clear_sta_flag(sta, WLAN_STA_PS_DELIVER); /* The station might have polled and then woken up before we responded, * so clear these flags now to avoid them sticking around. */ clear_sta_flag(sta, WLAN_STA_PSPOLL); clear_sta_flag(sta, WLAN_STA_UAPSD); spin_unlock_bh(&sta->ps_lock); atomic_dec(&ps->num_sta_ps); local->total_ps_buffered -= buffered; sta_info_recalc_tim(sta); ps_dbg(sdata, "STA %pM aid %d sending %d filtered/%d PS frames since STA woke up\n", sta->sta.addr, sta->sta.aid, filtered, buffered); ieee80211_check_fast_xmit(sta); } static void ieee80211_send_null_response(struct sta_info *sta, int tid, enum ieee80211_frame_release_type reason, bool call_driver, bool more_data) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; struct ieee80211_qos_hdr *nullfunc; struct sk_buff *skb; int size = sizeof(*nullfunc); __le16 fc; bool qos = sta->sta.wme; struct ieee80211_tx_info *info; struct ieee80211_chanctx_conf *chanctx_conf; if (qos) { fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC | IEEE80211_FCTL_FROMDS); } else { size -= 2; fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | IEEE80211_FCTL_FROMDS); } skb = dev_alloc_skb(local->hw.extra_tx_headroom + size); if (!skb) return; skb_reserve(skb, local->hw.extra_tx_headroom); nullfunc = skb_put(skb, size); nullfunc->frame_control = fc; nullfunc->duration_id = 0; memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN); memcpy(nullfunc->addr3, sdata->vif.addr, ETH_ALEN); nullfunc->seq_ctrl = 0; skb->priority = tid; skb_set_queue_mapping(skb, ieee802_1d_to_ac[tid]); if (qos) { nullfunc->qos_ctrl = cpu_to_le16(tid); if (reason == IEEE80211_FRAME_RELEASE_UAPSD) { nullfunc->qos_ctrl |= cpu_to_le16(IEEE80211_QOS_CTL_EOSP); if (more_data) nullfunc->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA); } } info = IEEE80211_SKB_CB(skb); /* * Tell TX path to send this frame even though the * STA may still remain is PS mode after this frame * exchange. Also set EOSP to indicate this packet * ends the poll/service period. */ info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER | IEEE80211_TX_STATUS_EOSP | IEEE80211_TX_CTL_REQ_TX_STATUS; info->control.flags |= IEEE80211_TX_CTRL_PS_RESPONSE; if (call_driver) drv_allow_buffered_frames(local, sta, BIT(tid), 1, reason, false); skb->dev = sdata->dev; rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (WARN_ON(!chanctx_conf)) { rcu_read_unlock(); kfree_skb(skb); return; } info->band = chanctx_conf->def.chan->band; ieee80211_xmit(sdata, sta, skb); rcu_read_unlock(); } static int find_highest_prio_tid(unsigned long tids) { /* lower 3 TIDs aren't ordered perfectly */ if (tids & 0xF8) return fls(tids) - 1; /* TID 0 is BE just like TID 3 */ if (tids & BIT(0)) return 0; return fls(tids) - 1; } /* Indicates if the MORE_DATA bit should be set in the last * frame obtained by ieee80211_sta_ps_get_frames. * Note that driver_release_tids is relevant only if * reason = IEEE80211_FRAME_RELEASE_PSPOLL */ static bool ieee80211_sta_ps_more_data(struct sta_info *sta, u8 ignored_acs, enum ieee80211_frame_release_type reason, unsigned long driver_release_tids) { int ac; /* If the driver has data on more than one TID then * certainly there's more data if we release just a * single frame now (from a single TID). This will * only happen for PS-Poll. */ if (reason == IEEE80211_FRAME_RELEASE_PSPOLL && hweight16(driver_release_tids) > 1) return true; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { if (ignored_acs & ieee80211_ac_to_qos_mask[ac]) continue; if (!skb_queue_empty(&sta->tx_filtered[ac]) || !skb_queue_empty(&sta->ps_tx_buf[ac])) return true; } return false; } static void ieee80211_sta_ps_get_frames(struct sta_info *sta, int n_frames, u8 ignored_acs, enum ieee80211_frame_release_type reason, struct sk_buff_head *frames, unsigned long *driver_release_tids) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; int ac; /* Get response frame(s) and more data bit for the last one. */ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { unsigned long tids; if (ignored_acs & ieee80211_ac_to_qos_mask[ac]) continue; tids = ieee80211_tids_for_ac(ac); /* if we already have frames from software, then we can't also * release from hardware queues */ if (skb_queue_empty(frames)) { *driver_release_tids |= sta->driver_buffered_tids & tids; *driver_release_tids |= sta->txq_buffered_tids & tids; } if (!*driver_release_tids) { struct sk_buff *skb; while (n_frames > 0) { skb = skb_dequeue(&sta->tx_filtered[ac]); if (!skb) { skb = skb_dequeue( &sta->ps_tx_buf[ac]); if (skb) local->total_ps_buffered--; } if (!skb) break; n_frames--; __skb_queue_tail(frames, skb); } } /* If we have more frames buffered on this AC, then abort the * loop since we can't send more data from other ACs before * the buffered frames from this. */ if (!skb_queue_empty(&sta->tx_filtered[ac]) || !skb_queue_empty(&sta->ps_tx_buf[ac])) break; } } static void ieee80211_sta_ps_deliver_response(struct sta_info *sta, int n_frames, u8 ignored_acs, enum ieee80211_frame_release_type reason) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; unsigned long driver_release_tids = 0; struct sk_buff_head frames; bool more_data; /* Service or PS-Poll period starts */ set_sta_flag(sta, WLAN_STA_SP); __skb_queue_head_init(&frames); ieee80211_sta_ps_get_frames(sta, n_frames, ignored_acs, reason, &frames, &driver_release_tids); more_data = ieee80211_sta_ps_more_data(sta, ignored_acs, reason, driver_release_tids); if (driver_release_tids && reason == IEEE80211_FRAME_RELEASE_PSPOLL) driver_release_tids = BIT(find_highest_prio_tid(driver_release_tids)); if (skb_queue_empty(&frames) && !driver_release_tids) { int tid, ac; /* * For PS-Poll, this can only happen due to a race condition * when we set the TIM bit and the station notices it, but * before it can poll for the frame we expire it. * * For uAPSD, this is said in the standard (11.2.1.5 h): * At each unscheduled SP for a non-AP STA, the AP shall * attempt to transmit at least one MSDU or MMPDU, but no * more than the value specified in the Max SP Length field * in the QoS Capability element from delivery-enabled ACs, * that are destined for the non-AP STA. * * Since we have no other MSDU/MMPDU, transmit a QoS null frame. */ /* This will evaluate to 1, 3, 5 or 7. */ for (ac = IEEE80211_AC_VO; ac < IEEE80211_NUM_ACS; ac++) if (!(ignored_acs & ieee80211_ac_to_qos_mask[ac])) break; tid = 7 - 2 * ac; ieee80211_send_null_response(sta, tid, reason, true, false); } else if (!driver_release_tids) { struct sk_buff_head pending; struct sk_buff *skb; int num = 0; u16 tids = 0; bool need_null = false; skb_queue_head_init(&pending); while ((skb = __skb_dequeue(&frames))) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_hdr *hdr = (void *) skb->data; u8 *qoshdr = NULL; num++; /* * Tell TX path to send this frame even though the * STA may still remain is PS mode after this frame * exchange. */ info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER; info->control.flags |= IEEE80211_TX_CTRL_PS_RESPONSE; /* * Use MoreData flag to indicate whether there are * more buffered frames for this STA */ if (more_data || !skb_queue_empty(&frames)) hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA); else hdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREDATA); if (ieee80211_is_data_qos(hdr->frame_control) || ieee80211_is_qos_nullfunc(hdr->frame_control)) qoshdr = ieee80211_get_qos_ctl(hdr); tids |= BIT(skb->priority); __skb_queue_tail(&pending, skb); /* end service period after last frame or add one */ if (!skb_queue_empty(&frames)) continue; if (reason != IEEE80211_FRAME_RELEASE_UAPSD) { /* for PS-Poll, there's only one frame */ info->flags |= IEEE80211_TX_STATUS_EOSP | IEEE80211_TX_CTL_REQ_TX_STATUS; break; } /* For uAPSD, things are a bit more complicated. If the * last frame has a QoS header (i.e. is a QoS-data or * QoS-nulldata frame) then just set the EOSP bit there * and be done. * If the frame doesn't have a QoS header (which means * it should be a bufferable MMPDU) then we can't set * the EOSP bit in the QoS header; add a QoS-nulldata * frame to the list to send it after the MMPDU. * * Note that this code is only in the mac80211-release * code path, we assume that the driver will not buffer * anything but QoS-data frames, or if it does, will * create the QoS-nulldata frame by itself if needed. * * Cf. 802.11-2012 10.2.1.10 (c). */ if (qoshdr) { *qoshdr |= IEEE80211_QOS_CTL_EOSP; info->flags |= IEEE80211_TX_STATUS_EOSP | IEEE80211_TX_CTL_REQ_TX_STATUS; } else { /* The standard isn't completely clear on this * as it says the more-data bit should be set * if there are more BUs. The QoS-Null frame * we're about to send isn't buffered yet, we * only create it below, but let's pretend it * was buffered just in case some clients only * expect more-data=0 when eosp=1. */ hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA); need_null = true; num++; } break; } drv_allow_buffered_frames(local, sta, tids, num, reason, more_data); ieee80211_add_pending_skbs(local, &pending); if (need_null) ieee80211_send_null_response( sta, find_highest_prio_tid(tids), reason, false, false); sta_info_recalc_tim(sta); } else { int tid; /* * We need to release a frame that is buffered somewhere in the * driver ... it'll have to handle that. * Note that the driver also has to check the number of frames * on the TIDs we're releasing from - if there are more than * n_frames it has to set the more-data bit (if we didn't ask * it to set it anyway due to other buffered frames); if there * are fewer than n_frames it has to make sure to adjust that * to allow the service period to end properly. */ drv_release_buffered_frames(local, sta, driver_release_tids, n_frames, reason, more_data); /* * Note that we don't recalculate the TIM bit here as it would * most likely have no effect at all unless the driver told us * that the TID(s) became empty before returning here from the * release function. * Either way, however, when the driver tells us that the TID(s) * became empty or we find that a txq became empty, we'll do the * TIM recalculation. */ for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) { if (!sta->sta.txq[tid] || !(driver_release_tids & BIT(tid)) || txq_has_queue(sta->sta.txq[tid])) continue; sta_info_recalc_tim(sta); break; } } } void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta) { u8 ignore_for_response = sta->sta.uapsd_queues; /* * If all ACs are delivery-enabled then we should reply * from any of them, if only some are enabled we reply * only from the non-enabled ones. */ if (ignore_for_response == BIT(IEEE80211_NUM_ACS) - 1) ignore_for_response = 0; ieee80211_sta_ps_deliver_response(sta, 1, ignore_for_response, IEEE80211_FRAME_RELEASE_PSPOLL); } void ieee80211_sta_ps_deliver_uapsd(struct sta_info *sta) { int n_frames = sta->sta.max_sp; u8 delivery_enabled = sta->sta.uapsd_queues; /* * If we ever grow support for TSPEC this might happen if * the TSPEC update from hostapd comes in between a trigger * frame setting WLAN_STA_UAPSD in the RX path and this * actually getting called. */ if (!delivery_enabled) return; switch (sta->sta.max_sp) { case 1: n_frames = 2; break; case 2: n_frames = 4; break; case 3: n_frames = 6; break; case 0: /* XXX: what is a good value? */ n_frames = 128; break; } ieee80211_sta_ps_deliver_response(sta, n_frames, ~delivery_enabled, IEEE80211_FRAME_RELEASE_UAPSD); } void ieee80211_sta_block_awake(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, bool block) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); trace_api_sta_block_awake(sta->local, pubsta, block); if (block) { set_sta_flag(sta, WLAN_STA_PS_DRIVER); ieee80211_clear_fast_xmit(sta); return; } if (!test_sta_flag(sta, WLAN_STA_PS_DRIVER)) return; if (!test_sta_flag(sta, WLAN_STA_PS_STA)) { set_sta_flag(sta, WLAN_STA_PS_DELIVER); clear_sta_flag(sta, WLAN_STA_PS_DRIVER); ieee80211_queue_work(hw, &sta->drv_deliver_wk); } else if (test_sta_flag(sta, WLAN_STA_PSPOLL) || test_sta_flag(sta, WLAN_STA_UAPSD)) { /* must be asleep in this case */ clear_sta_flag(sta, WLAN_STA_PS_DRIVER); ieee80211_queue_work(hw, &sta->drv_deliver_wk); } else { clear_sta_flag(sta, WLAN_STA_PS_DRIVER); ieee80211_check_fast_xmit(sta); } } EXPORT_SYMBOL(ieee80211_sta_block_awake); void ieee80211_sta_eosp(struct ieee80211_sta *pubsta) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); struct ieee80211_local *local = sta->local; trace_api_eosp(local, pubsta); clear_sta_flag(sta, WLAN_STA_SP); } EXPORT_SYMBOL(ieee80211_sta_eosp); void ieee80211_send_eosp_nullfunc(struct ieee80211_sta *pubsta, int tid) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); enum ieee80211_frame_release_type reason; bool more_data; trace_api_send_eosp_nullfunc(sta->local, pubsta, tid); reason = IEEE80211_FRAME_RELEASE_UAPSD; more_data = ieee80211_sta_ps_more_data(sta, ~sta->sta.uapsd_queues, reason, 0); ieee80211_send_null_response(sta, tid, reason, false, more_data); } EXPORT_SYMBOL(ieee80211_send_eosp_nullfunc); void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta, u8 tid, bool buffered) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); if (WARN_ON(tid >= IEEE80211_NUM_TIDS)) return; trace_api_sta_set_buffered(sta->local, pubsta, tid, buffered); if (buffered) set_bit(tid, &sta->driver_buffered_tids); else clear_bit(tid, &sta->driver_buffered_tids); sta_info_recalc_tim(sta); } EXPORT_SYMBOL(ieee80211_sta_set_buffered); void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid, u32 tx_airtime, u32 rx_airtime) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); struct ieee80211_local *local = sta->sdata->local; u8 ac = ieee80211_ac_from_tid(tid); u32 airtime = 0; if (sta->local->airtime_flags & AIRTIME_USE_TX) airtime += tx_airtime; if (sta->local->airtime_flags & AIRTIME_USE_RX) airtime += rx_airtime; spin_lock_bh(&local->active_txq_lock[ac]); sta->airtime[ac].tx_airtime += tx_airtime; sta->airtime[ac].rx_airtime += rx_airtime; if (ieee80211_sta_keep_active(sta, ac)) sta->airtime[ac].deficit -= airtime; spin_unlock_bh(&local->active_txq_lock[ac]); } EXPORT_SYMBOL(ieee80211_sta_register_airtime); void __ieee80211_sta_recalc_aggregates(struct sta_info *sta, u16 active_links) { bool first = true; int link_id; if (!sta->sta.valid_links || !sta->sta.mlo) { sta->sta.cur = &sta->sta.deflink.agg; return; } rcu_read_lock(); for (link_id = 0; link_id < ARRAY_SIZE((sta)->link); link_id++) { struct ieee80211_link_sta *link_sta; int i; if (!(active_links & BIT(link_id))) continue; link_sta = rcu_dereference(sta->sta.link[link_id]); if (!link_sta) continue; if (first) { sta->cur = sta->sta.deflink.agg; first = false; continue; } sta->cur.max_amsdu_len = min(sta->cur.max_amsdu_len, link_sta->agg.max_amsdu_len); sta->cur.max_rc_amsdu_len = min(sta->cur.max_rc_amsdu_len, link_sta->agg.max_rc_amsdu_len); for (i = 0; i < ARRAY_SIZE(sta->cur.max_tid_amsdu_len); i++) sta->cur.max_tid_amsdu_len[i] = min(sta->cur.max_tid_amsdu_len[i], link_sta->agg.max_tid_amsdu_len[i]); } rcu_read_unlock(); sta->sta.cur = &sta->cur; } void ieee80211_sta_recalc_aggregates(struct ieee80211_sta *pubsta) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); __ieee80211_sta_recalc_aggregates(sta, sta->sdata->vif.active_links); } EXPORT_SYMBOL(ieee80211_sta_recalc_aggregates); void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local, struct sta_info *sta, u8 ac, u16 tx_airtime, bool tx_completed) { int tx_pending; if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) return; if (!tx_completed) { if (sta) atomic_add(tx_airtime, &sta->airtime[ac].aql_tx_pending); atomic_add(tx_airtime, &local->aql_total_pending_airtime); atomic_add(tx_airtime, &local->aql_ac_pending_airtime[ac]); return; } if (sta) { tx_pending = atomic_sub_return(tx_airtime, &sta->airtime[ac].aql_tx_pending); if (tx_pending < 0) atomic_cmpxchg(&sta->airtime[ac].aql_tx_pending, tx_pending, 0); } atomic_sub(tx_airtime, &local->aql_total_pending_airtime); tx_pending = atomic_sub_return(tx_airtime, &local->aql_ac_pending_airtime[ac]); if (WARN_ONCE(tx_pending < 0, "Device %s AC %d pending airtime underflow: %u, %u", wiphy_name(local->hw.wiphy), ac, tx_pending, tx_airtime)) { atomic_cmpxchg(&local->aql_ac_pending_airtime[ac], tx_pending, 0); atomic_sub(tx_pending, &local->aql_total_pending_airtime); } } static struct ieee80211_sta_rx_stats * sta_get_last_rx_stats(struct sta_info *sta) { struct ieee80211_sta_rx_stats *stats = &sta->deflink.rx_stats; int cpu; if (!sta->deflink.pcpu_rx_stats) return stats; for_each_possible_cpu(cpu) { struct ieee80211_sta_rx_stats *cpustats; cpustats = per_cpu_ptr(sta->deflink.pcpu_rx_stats, cpu); if (time_after(cpustats->last_rx, stats->last_rx)) stats = cpustats; } return stats; } static void sta_stats_decode_rate(struct ieee80211_local *local, u32 rate, struct rate_info *rinfo) { rinfo->bw = STA_STATS_GET(BW, rate); switch (STA_STATS_GET(TYPE, rate)) { case STA_STATS_RATE_TYPE_VHT: rinfo->flags = RATE_INFO_FLAGS_VHT_MCS; rinfo->mcs = STA_STATS_GET(VHT_MCS, rate); rinfo->nss = STA_STATS_GET(VHT_NSS, rate); if (STA_STATS_GET(SGI, rate)) rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; break; case STA_STATS_RATE_TYPE_HT: rinfo->flags = RATE_INFO_FLAGS_MCS; rinfo->mcs = STA_STATS_GET(HT_MCS, rate); if (STA_STATS_GET(SGI, rate)) rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; break; case STA_STATS_RATE_TYPE_LEGACY: { struct ieee80211_supported_band *sband; u16 brate; unsigned int shift; int band = STA_STATS_GET(LEGACY_BAND, rate); int rate_idx = STA_STATS_GET(LEGACY_IDX, rate); sband = local->hw.wiphy->bands[band]; if (WARN_ON_ONCE(!sband->bitrates)) break; brate = sband->bitrates[rate_idx].bitrate; if (rinfo->bw == RATE_INFO_BW_5) shift = 2; else if (rinfo->bw == RATE_INFO_BW_10) shift = 1; else shift = 0; rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift); break; } case STA_STATS_RATE_TYPE_HE: rinfo->flags = RATE_INFO_FLAGS_HE_MCS; rinfo->mcs = STA_STATS_GET(HE_MCS, rate); rinfo->nss = STA_STATS_GET(HE_NSS, rate); rinfo->he_gi = STA_STATS_GET(HE_GI, rate); rinfo->he_ru_alloc = STA_STATS_GET(HE_RU, rate); rinfo->he_dcm = STA_STATS_GET(HE_DCM, rate); break; case STA_STATS_RATE_TYPE_EHT: rinfo->flags = RATE_INFO_FLAGS_EHT_MCS; rinfo->mcs = STA_STATS_GET(EHT_MCS, rate); rinfo->nss = STA_STATS_GET(EHT_NSS, rate); rinfo->eht_gi = STA_STATS_GET(EHT_GI, rate); rinfo->eht_ru_alloc = STA_STATS_GET(EHT_RU, rate); break; } } static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo) { u32 rate = READ_ONCE(sta_get_last_rx_stats(sta)->last_rate); if (rate == STA_STATS_RATE_INVALID) return -EINVAL; sta_stats_decode_rate(sta->local, rate, rinfo); return 0; } static inline u64 sta_get_tidstats_msdu(struct ieee80211_sta_rx_stats *rxstats, int tid) { unsigned int start; u64 value; do { start = u64_stats_fetch_begin(&rxstats->syncp); value = rxstats->msdu[tid]; } while (u64_stats_fetch_retry(&rxstats->syncp, start)); return value; } static void sta_set_tidstats(struct sta_info *sta, struct cfg80211_tid_stats *tidstats, int tid) { struct ieee80211_local *local = sta->local; int cpu; if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) { tidstats->rx_msdu += sta_get_tidstats_msdu(&sta->deflink.rx_stats, tid); if (sta->deflink.pcpu_rx_stats) { for_each_possible_cpu(cpu) { struct ieee80211_sta_rx_stats *cpurxs; cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats, cpu); tidstats->rx_msdu += sta_get_tidstats_msdu(cpurxs, tid); } } tidstats->filled |= BIT(NL80211_TID_STATS_RX_MSDU); } if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU))) { tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU); tidstats->tx_msdu = sta->deflink.tx_stats.msdu[tid]; } if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU_RETRIES)) && ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_RETRIES); tidstats->tx_msdu_retries = sta->deflink.status_stats.msdu_retries[tid]; } if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU_FAILED)) && ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_FAILED); tidstats->tx_msdu_failed = sta->deflink.status_stats.msdu_failed[tid]; } if (tid < IEEE80211_NUM_TIDS) { spin_lock_bh(&local->fq.lock); rcu_read_lock(); tidstats->filled |= BIT(NL80211_TID_STATS_TXQ_STATS); ieee80211_fill_txq_stats(&tidstats->txq_stats, to_txq_info(sta->sta.txq[tid])); rcu_read_unlock(); spin_unlock_bh(&local->fq.lock); } } static inline u64 sta_get_stats_bytes(struct ieee80211_sta_rx_stats *rxstats) { unsigned int start; u64 value; do { start = u64_stats_fetch_begin(&rxstats->syncp); value = rxstats->bytes; } while (u64_stats_fetch_retry(&rxstats->syncp, start)); return value; } void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, bool tidstats) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; u32 thr = 0; int i, ac, cpu; struct ieee80211_sta_rx_stats *last_rxstats; last_rxstats = sta_get_last_rx_stats(sta); sinfo->generation = sdata->local->sta_generation; /* do before driver, so beacon filtering drivers have a * chance to e.g. just add the number of filtered beacons * (or just modify the value entirely, of course) */ if (sdata->vif.type == NL80211_IFTYPE_STATION) sinfo->rx_beacon = sdata->deflink.u.mgd.count_beacon_signal; drv_sta_statistics(local, sdata, &sta->sta, sinfo); sinfo->filled |= BIT_ULL(NL80211_STA_INFO_INACTIVE_TIME) | BIT_ULL(NL80211_STA_INFO_STA_FLAGS) | BIT_ULL(NL80211_STA_INFO_BSS_PARAM) | BIT_ULL(NL80211_STA_INFO_CONNECTED_TIME) | BIT_ULL(NL80211_STA_INFO_ASSOC_AT_BOOTTIME) | BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC); if (sdata->vif.type == NL80211_IFTYPE_STATION) { sinfo->beacon_loss_count = sdata->deflink.u.mgd.beacon_loss_count; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_LOSS); } sinfo->connected_time = ktime_get_seconds() - sta->last_connected; sinfo->assoc_at = sta->assoc_at; sinfo->inactive_time = jiffies_to_msecs(jiffies - ieee80211_sta_last_active(sta)); if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_TX_BYTES64) | BIT_ULL(NL80211_STA_INFO_TX_BYTES)))) { sinfo->tx_bytes = 0; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) sinfo->tx_bytes += sta->deflink.tx_stats.bytes[ac]; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BYTES64); } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_PACKETS))) { sinfo->tx_packets = 0; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) sinfo->tx_packets += sta->deflink.tx_stats.packets[ac]; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_PACKETS); } if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES64) | BIT_ULL(NL80211_STA_INFO_RX_BYTES)))) { sinfo->rx_bytes += sta_get_stats_bytes(&sta->deflink.rx_stats); if (sta->deflink.pcpu_rx_stats) { for_each_possible_cpu(cpu) { struct ieee80211_sta_rx_stats *cpurxs; cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats, cpu); sinfo->rx_bytes += sta_get_stats_bytes(cpurxs); } } sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BYTES64); } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_PACKETS))) { sinfo->rx_packets = sta->deflink.rx_stats.packets; if (sta->deflink.pcpu_rx_stats) { for_each_possible_cpu(cpu) { struct ieee80211_sta_rx_stats *cpurxs; cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats, cpu); sinfo->rx_packets += cpurxs->packets; } } sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_PACKETS); } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_RETRIES))) { sinfo->tx_retries = sta->deflink.status_stats.retry_count; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_RETRIES); } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED))) { sinfo->tx_failed = sta->deflink.status_stats.retry_failed; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED); } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_DURATION))) { for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) sinfo->rx_duration += sta->airtime[ac].rx_airtime; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_DURATION); } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_DURATION))) { for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) sinfo->tx_duration += sta->airtime[ac].tx_airtime; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_DURATION); } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT))) { sinfo->airtime_weight = sta->airtime_weight; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT); } sinfo->rx_dropped_misc = sta->deflink.rx_stats.dropped; if (sta->deflink.pcpu_rx_stats) { for_each_possible_cpu(cpu) { struct ieee80211_sta_rx_stats *cpurxs; cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats, cpu); sinfo->rx_dropped_misc += cpurxs->dropped; } } if (sdata->vif.type == NL80211_IFTYPE_STATION && !(sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)) { sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_RX) | BIT_ULL(NL80211_STA_INFO_BEACON_SIGNAL_AVG); sinfo->rx_beacon_signal_avg = ieee80211_ave_rssi(&sdata->vif); } if (ieee80211_hw_check(&sta->local->hw, SIGNAL_DBM) || ieee80211_hw_check(&sta->local->hw, SIGNAL_UNSPEC)) { if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_SIGNAL))) { sinfo->signal = (s8)last_rxstats->last_signal; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL); } if (!sta->deflink.pcpu_rx_stats && !(sinfo->filled & BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG))) { sinfo->signal_avg = -ewma_signal_read(&sta->deflink.rx_stats_avg.signal); sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG); } } /* for the average - if pcpu_rx_stats isn't set - rxstats must point to * the sta->rx_stats struct, so the check here is fine with and without * pcpu statistics */ if (last_rxstats->chains && !(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL) | BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) { sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL); if (!sta->deflink.pcpu_rx_stats) sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG); sinfo->chains = last_rxstats->chains; for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) { sinfo->chain_signal[i] = last_rxstats->chain_signal_last[i]; sinfo->chain_signal_avg[i] = -ewma_signal_read(&sta->deflink.rx_stats_avg.chain_signal[i]); } } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE)) && !sta->sta.valid_links && ieee80211_rate_valid(&sta->deflink.tx_stats.last_rate)) { sta_set_rate_info_tx(sta, &sta->deflink.tx_stats.last_rate, &sinfo->txrate); sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE); } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_BITRATE)) && !sta->sta.valid_links) { if (sta_set_rate_info_rx(sta, &sinfo->rxrate) == 0) sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BITRATE); } if (tidstats && !cfg80211_sinfo_alloc_tid_stats(sinfo, GFP_KERNEL)) { for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) sta_set_tidstats(sta, &sinfo->pertid[i], i); } if (ieee80211_vif_is_mesh(&sdata->vif)) { #ifdef CONFIG_MAC80211_MESH sinfo->filled |= BIT_ULL(NL80211_STA_INFO_LLID) | BIT_ULL(NL80211_STA_INFO_PLID) | BIT_ULL(NL80211_STA_INFO_PLINK_STATE) | BIT_ULL(NL80211_STA_INFO_LOCAL_PM) | BIT_ULL(NL80211_STA_INFO_PEER_PM) | BIT_ULL(NL80211_STA_INFO_NONPEER_PM) | BIT_ULL(NL80211_STA_INFO_CONNECTED_TO_GATE) | BIT_ULL(NL80211_STA_INFO_CONNECTED_TO_AS); sinfo->llid = sta->mesh->llid; sinfo->plid = sta->mesh->plid; sinfo->plink_state = sta->mesh->plink_state; if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) { sinfo->filled |= BIT_ULL(NL80211_STA_INFO_T_OFFSET); sinfo->t_offset = sta->mesh->t_offset; } sinfo->local_pm = sta->mesh->local_pm; sinfo->peer_pm = sta->mesh->peer_pm; sinfo->nonpeer_pm = sta->mesh->nonpeer_pm; sinfo->connected_to_gate = sta->mesh->connected_to_gate; sinfo->connected_to_as = sta->mesh->connected_to_as; #endif } sinfo->bss_param.flags = 0; if (sdata->vif.bss_conf.use_cts_prot) sinfo->bss_param.flags |= BSS_PARAM_FLAGS_CTS_PROT; if (sdata->vif.bss_conf.use_short_preamble) sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_PREAMBLE; if (sdata->vif.bss_conf.use_short_slot) sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_SLOT_TIME; sinfo->bss_param.dtim_period = sdata->vif.bss_conf.dtim_period; sinfo->bss_param.beacon_interval = sdata->vif.bss_conf.beacon_int; sinfo->sta_flags.set = 0; sinfo->sta_flags.mask = BIT(NL80211_STA_FLAG_AUTHORIZED) | BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | BIT(NL80211_STA_FLAG_WME) | BIT(NL80211_STA_FLAG_MFP) | BIT(NL80211_STA_FLAG_AUTHENTICATED) | BIT(NL80211_STA_FLAG_ASSOCIATED) | BIT(NL80211_STA_FLAG_TDLS_PEER); if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHORIZED); if (test_sta_flag(sta, WLAN_STA_SHORT_PREAMBLE)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_SHORT_PREAMBLE); if (sta->sta.wme) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_WME); if (test_sta_flag(sta, WLAN_STA_MFP)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_MFP); if (test_sta_flag(sta, WLAN_STA_AUTH)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHENTICATED); if (test_sta_flag(sta, WLAN_STA_ASSOC)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_ASSOCIATED); if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER); thr = sta_get_expected_throughput(sta); if (thr != 0) { sinfo->filled |= BIT_ULL(NL80211_STA_INFO_EXPECTED_THROUGHPUT); sinfo->expected_throughput = thr; } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL)) && sta->deflink.status_stats.ack_signal_filled) { sinfo->ack_signal = sta->deflink.status_stats.last_ack_signal; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL); } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG)) && sta->deflink.status_stats.ack_signal_filled) { sinfo->avg_ack_signal = -(s8)ewma_avg_signal_read( &sta->deflink.status_stats.avg_ack_signal); sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG); } if (ieee80211_vif_is_mesh(&sdata->vif)) { sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_LINK_METRIC); sinfo->airtime_link_metric = airtime_link_metric_get(local, sta); } } u32 sta_get_expected_throughput(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; struct rate_control_ref *ref = NULL; u32 thr = 0; if (test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) ref = local->rate_ctrl; /* check if the driver has a SW RC implementation */ if (ref && ref->ops->get_expected_throughput) thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv); else thr = drv_get_expected_throughput(local, sta); return thr; } unsigned long ieee80211_sta_last_active(struct sta_info *sta) { struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta); if (!sta->deflink.status_stats.last_ack || time_after(stats->last_rx, sta->deflink.status_stats.last_ack)) return stats->last_rx; return sta->deflink.status_stats.last_ack; } static void sta_update_codel_params(struct sta_info *sta, u32 thr) { if (thr && thr < STA_SLOW_THRESHOLD * sta->local->num_sta) { sta->cparams.target = MS2TIME(50); sta->cparams.interval = MS2TIME(300); sta->cparams.ecn = false; } else { sta->cparams.target = MS2TIME(20); sta->cparams.interval = MS2TIME(100); sta->cparams.ecn = true; } } void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta, u32 thr) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); sta_update_codel_params(sta, thr); } int ieee80211_sta_allocate_link(struct sta_info *sta, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct sta_link_alloc *alloc; int ret; lockdep_assert_wiphy(sdata->local->hw.wiphy); WARN_ON(!test_sta_flag(sta, WLAN_STA_INSERTED)); /* must represent an MLD from the start */ if (WARN_ON(!sta->sta.valid_links)) return -EINVAL; if (WARN_ON(sta->sta.valid_links & BIT(link_id) || sta->link[link_id])) return -EBUSY; alloc = kzalloc(sizeof(*alloc), GFP_KERNEL); if (!alloc) return -ENOMEM; ret = sta_info_alloc_link(sdata->local, &alloc->info, GFP_KERNEL); if (ret) { kfree(alloc); return ret; } sta_info_add_link(sta, link_id, &alloc->info, &alloc->sta); ieee80211_link_sta_debugfs_add(&alloc->info); return 0; } void ieee80211_sta_free_link(struct sta_info *sta, unsigned int link_id) { lockdep_assert_wiphy(sta->sdata->local->hw.wiphy); WARN_ON(!test_sta_flag(sta, WLAN_STA_INSERTED)); sta_remove_link(sta, link_id, false); } int ieee80211_sta_activate_link(struct sta_info *sta, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct link_sta_info *link_sta; u16 old_links = sta->sta.valid_links; u16 new_links = old_links | BIT(link_id); int ret; link_sta = rcu_dereference_protected(sta->link[link_id], lockdep_is_held(&sdata->local->hw.wiphy->mtx)); if (WARN_ON(old_links == new_links || !link_sta)) return -EINVAL; rcu_read_lock(); if (link_sta_info_hash_lookup(sdata->local, link_sta->addr)) { rcu_read_unlock(); return -EALREADY; } /* we only modify under the mutex so this is fine */ rcu_read_unlock(); sta->sta.valid_links = new_links; if (WARN_ON(!test_sta_flag(sta, WLAN_STA_INSERTED))) goto hash; ieee80211_recalc_min_chandef(sdata, link_id); /* Ensure the values are updated for the driver, * redone by sta_remove_link on failure. */ ieee80211_sta_recalc_aggregates(&sta->sta); ret = drv_change_sta_links(sdata->local, sdata, &sta->sta, old_links, new_links); if (ret) { sta->sta.valid_links = old_links; sta_remove_link(sta, link_id, false); return ret; } hash: ret = link_sta_info_hash_add(sdata->local, link_sta); WARN_ON(ret); return 0; } void ieee80211_sta_remove_link(struct sta_info *sta, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = sta->sdata; u16 old_links = sta->sta.valid_links; lockdep_assert_wiphy(sdata->local->hw.wiphy); sta->sta.valid_links &= ~BIT(link_id); if (!WARN_ON(!test_sta_flag(sta, WLAN_STA_INSERTED))) drv_change_sta_links(sdata->local, sdata, &sta->sta, old_links, sta->sta.valid_links); sta_remove_link(sta, link_id, true); } void ieee80211_sta_set_max_amsdu_subframes(struct sta_info *sta, const u8 *ext_capab, unsigned int ext_capab_len) { u8 val; sta->sta.max_amsdu_subframes = 0; if (ext_capab_len < 8) return; /* The sender might not have sent the last bit, consider it to be 0 */ val = u8_get_bits(ext_capab[7], WLAN_EXT_CAPA8_MAX_MSDU_IN_AMSDU_LSB); /* we did get all the bits, take the MSB as well */ if (ext_capab_len >= 9) val |= u8_get_bits(ext_capab[8], WLAN_EXT_CAPA9_MAX_MSDU_IN_AMSDU_MSB) << 1; if (val) sta->sta.max_amsdu_subframes = 4 << (4 - val); } #ifdef CONFIG_LOCKDEP bool lockdep_sta_mutex_held(struct ieee80211_sta *pubsta) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); return lockdep_is_held(&sta->local->hw.wiphy->mtx); } EXPORT_SYMBOL(lockdep_sta_mutex_held); #endif |
| 202 237 223 224 31 153 46 11 137 16 217 45 2 43 21 33 1 100 1 32 36 2 169 100 158 65 154 23 39 71 21 114 31 78 31 15 13 16 66 107 100 167 21 107 107 66 136 222 60 40 5 29 29 15 65 17 47 20 1 1 1 24 38 38 38 38 167 167 27 2 2 167 166 21 167 167 20 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 | /* * net/tipc/msg.h: Include file for TIPC message header routines * * Copyright (c) 2000-2007, 2014-2017 Ericsson AB * Copyright (c) 2005-2008, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL") version 2 as published by the Free * Software Foundation. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef _TIPC_MSG_H #define _TIPC_MSG_H #include <linux/tipc.h> #include "core.h" /* * Constants and routines used to read and write TIPC payload message headers * * Note: Some items are also used with TIPC internal message headers */ #define TIPC_VERSION 2 struct plist; /* * Payload message users are defined in TIPC's public API: * - TIPC_LOW_IMPORTANCE * - TIPC_MEDIUM_IMPORTANCE * - TIPC_HIGH_IMPORTANCE * - TIPC_CRITICAL_IMPORTANCE */ #define TIPC_SYSTEM_IMPORTANCE 4 /* * Payload message types */ #define TIPC_CONN_MSG 0 #define TIPC_MCAST_MSG 1 #define TIPC_NAMED_MSG 2 #define TIPC_DIRECT_MSG 3 #define TIPC_GRP_MEMBER_EVT 4 #define TIPC_GRP_BCAST_MSG 5 #define TIPC_GRP_MCAST_MSG 6 #define TIPC_GRP_UCAST_MSG 7 /* * Internal message users */ #define BCAST_PROTOCOL 5 #define MSG_BUNDLER 6 #define LINK_PROTOCOL 7 #define CONN_MANAGER 8 #define GROUP_PROTOCOL 9 #define TUNNEL_PROTOCOL 10 #define NAME_DISTRIBUTOR 11 #define MSG_FRAGMENTER 12 #define LINK_CONFIG 13 #define MSG_CRYPTO 14 #define SOCK_WAKEUP 14 /* pseudo user */ #define TOP_SRV 15 /* pseudo user */ /* * Message header sizes */ #define SHORT_H_SIZE 24 /* In-cluster basic payload message */ #define BASIC_H_SIZE 32 /* Basic payload message */ #define NAMED_H_SIZE 40 /* Named payload message */ #define MCAST_H_SIZE 44 /* Multicast payload message */ #define GROUP_H_SIZE 44 /* Group payload message */ #define INT_H_SIZE 40 /* Internal messages */ #define MIN_H_SIZE 24 /* Smallest legal TIPC header size */ #define MAX_H_SIZE 60 /* Largest possible TIPC header size */ #define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE) #define TIPC_MEDIA_INFO_OFFSET 5 extern const int one_page_mtu; struct tipc_skb_cb { union { struct { struct sk_buff *tail; unsigned long nxt_retr; unsigned long retr_stamp; u32 bytes_read; u32 orig_member; u16 chain_imp; u16 ackers; u16 retr_cnt; } __packed; #ifdef CONFIG_TIPC_CRYPTO struct { struct tipc_crypto *rx; struct tipc_aead *last; u8 recurs; } tx_clone_ctx __packed; #endif } __packed; union { struct { u8 validated:1; #ifdef CONFIG_TIPC_CRYPTO u8 encrypted:1; u8 decrypted:1; #define SKB_PROBING 1 #define SKB_GRACING 2 u8 xmit_type:2; u8 tx_clone_deferred:1; #endif }; u8 flags; }; u8 reserved; #ifdef CONFIG_TIPC_CRYPTO void *crypto_ctx; #endif } __packed; #define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) struct tipc_msg { __be32 hdr[15]; }; /* struct tipc_gap_ack - TIPC Gap ACK block * @ack: seqno of the last consecutive packet in link deferdq * @gap: number of gap packets since the last ack * * E.g: * link deferdq: 1 2 3 4 10 11 13 14 15 20 * --> Gap ACK blocks: <4, 5>, <11, 1>, <15, 4>, <20, 0> */ struct tipc_gap_ack { __be16 ack; __be16 gap; }; /* struct tipc_gap_ack_blks * @len: actual length of the record * @ugack_cnt: number of Gap ACK blocks for unicast (following the broadcast * ones) * @start_index: starting index for "valid" broadcast Gap ACK blocks * @bgack_cnt: number of Gap ACK blocks for broadcast in the record * @gacks: array of Gap ACK blocks * * 31 16 15 0 * +-------------+-------------+-------------+-------------+ * | bgack_cnt | ugack_cnt | len | * +-------------+-------------+-------------+-------------+ - * | gap | ack | | * +-------------+-------------+-------------+-------------+ > bc gacks * : : : | * +-------------+-------------+-------------+-------------+ - * | gap | ack | | * +-------------+-------------+-------------+-------------+ > uc gacks * : : : | * +-------------+-------------+-------------+-------------+ - */ struct tipc_gap_ack_blks { __be16 len; union { u8 ugack_cnt; u8 start_index; }; u8 bgack_cnt; struct tipc_gap_ack gacks[]; }; #define MAX_GAP_ACK_BLKS 128 #define MAX_GAP_ACK_BLKS_SZ (sizeof(struct tipc_gap_ack_blks) + \ sizeof(struct tipc_gap_ack) * MAX_GAP_ACK_BLKS) static inline struct tipc_msg *buf_msg(struct sk_buff *skb) { return (struct tipc_msg *)skb->data; } static inline u32 msg_word(struct tipc_msg *m, u32 pos) { return ntohl(m->hdr[pos]); } static inline void msg_set_word(struct tipc_msg *m, u32 w, u32 val) { m->hdr[w] = htonl(val); } static inline u32 msg_bits(struct tipc_msg *m, u32 w, u32 pos, u32 mask) { return (msg_word(m, w) >> pos) & mask; } static inline void msg_set_bits(struct tipc_msg *m, u32 w, u32 pos, u32 mask, u32 val) { val = (val & mask) << pos; mask = mask << pos; m->hdr[w] &= ~htonl(mask); m->hdr[w] |= htonl(val); } /* * Word 0 */ static inline u32 msg_version(struct tipc_msg *m) { return msg_bits(m, 0, 29, 7); } static inline void msg_set_version(struct tipc_msg *m) { msg_set_bits(m, 0, 29, 7, TIPC_VERSION); } static inline u32 msg_user(struct tipc_msg *m) { return msg_bits(m, 0, 25, 0xf); } static inline u32 msg_isdata(struct tipc_msg *m) { return msg_user(m) <= TIPC_CRITICAL_IMPORTANCE; } static inline void msg_set_user(struct tipc_msg *m, u32 n) { msg_set_bits(m, 0, 25, 0xf, n); } static inline u32 msg_hdr_sz(struct tipc_msg *m) { return msg_bits(m, 0, 21, 0xf) << 2; } static inline void msg_set_hdr_sz(struct tipc_msg *m, u32 n) { msg_set_bits(m, 0, 21, 0xf, n>>2); } static inline u32 msg_size(struct tipc_msg *m) { return msg_bits(m, 0, 0, 0x1ffff); } static inline u32 msg_blocks(struct tipc_msg *m) { return (msg_size(m) / 1024) + 1; } static inline u32 msg_data_sz(struct tipc_msg *m) { return msg_size(m) - msg_hdr_sz(m); } static inline int msg_non_seq(struct tipc_msg *m) { return msg_bits(m, 0, 20, 1); } static inline void msg_set_non_seq(struct tipc_msg *m, u32 n) { msg_set_bits(m, 0, 20, 1, n); } static inline int msg_is_syn(struct tipc_msg *m) { return msg_bits(m, 0, 17, 1); } static inline void msg_set_syn(struct tipc_msg *m, u32 d) { msg_set_bits(m, 0, 17, 1, d); } static inline int msg_dest_droppable(struct tipc_msg *m) { return msg_bits(m, 0, 19, 1); } static inline void msg_set_dest_droppable(struct tipc_msg *m, u32 d) { msg_set_bits(m, 0, 19, 1, d); } static inline int msg_is_keepalive(struct tipc_msg *m) { return msg_bits(m, 0, 19, 1); } static inline void msg_set_is_keepalive(struct tipc_msg *m, u32 d) { msg_set_bits(m, 0, 19, 1, d); } static inline int msg_src_droppable(struct tipc_msg *m) { return msg_bits(m, 0, 18, 1); } static inline void msg_set_src_droppable(struct tipc_msg *m, u32 d) { msg_set_bits(m, 0, 18, 1, d); } static inline int msg_ack_required(struct tipc_msg *m) { return msg_bits(m, 0, 18, 1); } static inline void msg_set_ack_required(struct tipc_msg *m) { msg_set_bits(m, 0, 18, 1, 1); } static inline int msg_nagle_ack(struct tipc_msg *m) { return msg_bits(m, 0, 18, 1); } static inline void msg_set_nagle_ack(struct tipc_msg *m) { msg_set_bits(m, 0, 18, 1, 1); } static inline bool msg_is_rcast(struct tipc_msg *m) { return msg_bits(m, 0, 18, 0x1); } static inline void msg_set_is_rcast(struct tipc_msg *m, bool d) { msg_set_bits(m, 0, 18, 0x1, d); } static inline void msg_set_size(struct tipc_msg *m, u32 sz) { m->hdr[0] = htonl((msg_word(m, 0) & ~0x1ffff) | sz); } static inline unchar *msg_data(struct tipc_msg *m) { return ((unchar *)m) + msg_hdr_sz(m); } static inline struct tipc_msg *msg_inner_hdr(struct tipc_msg *m) { return (struct tipc_msg *)msg_data(m); } /* * Word 1 */ static inline u32 msg_type(struct tipc_msg *m) { return msg_bits(m, 1, 29, 0x7); } static inline void msg_set_type(struct tipc_msg *m, u32 n) { msg_set_bits(m, 1, 29, 0x7, n); } static inline int msg_in_group(struct tipc_msg *m) { int mtyp = msg_type(m); return mtyp >= TIPC_GRP_MEMBER_EVT && mtyp <= TIPC_GRP_UCAST_MSG; } static inline bool msg_is_grp_evt(struct tipc_msg *m) { return msg_type(m) == TIPC_GRP_MEMBER_EVT; } static inline u32 msg_named(struct tipc_msg *m) { return msg_type(m) == TIPC_NAMED_MSG; } static inline u32 msg_mcast(struct tipc_msg *m) { int mtyp = msg_type(m); return ((mtyp == TIPC_MCAST_MSG) || (mtyp == TIPC_GRP_BCAST_MSG) || (mtyp == TIPC_GRP_MCAST_MSG)); } static inline u32 msg_connected(struct tipc_msg *m) { return msg_type(m) == TIPC_CONN_MSG; } static inline u32 msg_direct(struct tipc_msg *m) { return msg_type(m) == TIPC_DIRECT_MSG; } static inline u32 msg_errcode(struct tipc_msg *m) { return msg_bits(m, 1, 25, 0xf); } static inline void msg_set_errcode(struct tipc_msg *m, u32 err) { msg_set_bits(m, 1, 25, 0xf, err); } static inline void msg_set_bulk(struct tipc_msg *m) { msg_set_bits(m, 1, 28, 0x1, 1); } static inline u32 msg_is_bulk(struct tipc_msg *m) { return msg_bits(m, 1, 28, 0x1); } static inline void msg_set_last_bulk(struct tipc_msg *m) { msg_set_bits(m, 1, 27, 0x1, 1); } static inline u32 msg_is_last_bulk(struct tipc_msg *m) { return msg_bits(m, 1, 27, 0x1); } static inline void msg_set_non_legacy(struct tipc_msg *m) { msg_set_bits(m, 1, 26, 0x1, 1); } static inline u32 msg_is_legacy(struct tipc_msg *m) { return !msg_bits(m, 1, 26, 0x1); } static inline u32 msg_reroute_cnt(struct tipc_msg *m) { return msg_bits(m, 1, 21, 0xf); } static inline void msg_incr_reroute_cnt(struct tipc_msg *m) { msg_set_bits(m, 1, 21, 0xf, msg_reroute_cnt(m) + 1); } static inline u32 msg_lookup_scope(struct tipc_msg *m) { return msg_bits(m, 1, 19, 0x3); } static inline void msg_set_lookup_scope(struct tipc_msg *m, u32 n) { msg_set_bits(m, 1, 19, 0x3, n); } static inline u16 msg_bcast_ack(struct tipc_msg *m) { return msg_bits(m, 1, 0, 0xffff); } static inline void msg_set_bcast_ack(struct tipc_msg *m, u16 n) { msg_set_bits(m, 1, 0, 0xffff, n); } /* Note: reusing bits in word 1 for ACTIVATE_MSG only, to re-synch * link peer session number */ static inline bool msg_dest_session_valid(struct tipc_msg *m) { return msg_bits(m, 1, 16, 0x1); } static inline void msg_set_dest_session_valid(struct tipc_msg *m, bool valid) { msg_set_bits(m, 1, 16, 0x1, valid); } static inline u16 msg_dest_session(struct tipc_msg *m) { return msg_bits(m, 1, 0, 0xffff); } static inline void msg_set_dest_session(struct tipc_msg *m, u16 n) { msg_set_bits(m, 1, 0, 0xffff, n); } /* * Word 2 */ static inline u16 msg_ack(struct tipc_msg *m) { return msg_bits(m, 2, 16, 0xffff); } static inline void msg_set_ack(struct tipc_msg *m, u16 n) { msg_set_bits(m, 2, 16, 0xffff, n); } static inline u16 msg_seqno(struct tipc_msg *m) { return msg_bits(m, 2, 0, 0xffff); } static inline void msg_set_seqno(struct tipc_msg *m, u16 n) { msg_set_bits(m, 2, 0, 0xffff, n); } /* * Words 3-10 */ static inline u32 msg_importance(struct tipc_msg *m) { int usr = msg_user(m); if (likely((usr <= TIPC_CRITICAL_IMPORTANCE) && !msg_errcode(m))) return usr; if ((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER)) return msg_bits(m, 9, 0, 0x7); return TIPC_SYSTEM_IMPORTANCE; } static inline void msg_set_importance(struct tipc_msg *m, u32 i) { int usr = msg_user(m); if (likely((usr == MSG_FRAGMENTER) || (usr == MSG_BUNDLER))) msg_set_bits(m, 9, 0, 0x7, i); else if (i < TIPC_SYSTEM_IMPORTANCE) msg_set_user(m, i); else pr_warn("Trying to set illegal importance in message\n"); } static inline u32 msg_prevnode(struct tipc_msg *m) { return msg_word(m, 3); } static inline void msg_set_prevnode(struct tipc_msg *m, u32 a) { msg_set_word(m, 3, a); } static inline u32 msg_origport(struct tipc_msg *m) { if (msg_user(m) == MSG_FRAGMENTER) m = msg_inner_hdr(m); return msg_word(m, 4); } static inline void msg_set_origport(struct tipc_msg *m, u32 p) { msg_set_word(m, 4, p); } static inline u16 msg_named_seqno(struct tipc_msg *m) { return msg_bits(m, 4, 0, 0xffff); } static inline void msg_set_named_seqno(struct tipc_msg *m, u16 n) { msg_set_bits(m, 4, 0, 0xffff, n); } static inline u32 msg_destport(struct tipc_msg *m) { return msg_word(m, 5); } static inline void msg_set_destport(struct tipc_msg *m, u32 p) { msg_set_word(m, 5, p); } static inline u32 msg_mc_netid(struct tipc_msg *m) { return msg_word(m, 5); } static inline void msg_set_mc_netid(struct tipc_msg *m, u32 p) { msg_set_word(m, 5, p); } static inline int msg_short(struct tipc_msg *m) { return msg_hdr_sz(m) == SHORT_H_SIZE; } static inline u32 msg_orignode(struct tipc_msg *m) { if (likely(msg_short(m))) return msg_prevnode(m); return msg_word(m, 6); } static inline void msg_set_orignode(struct tipc_msg *m, u32 a) { msg_set_word(m, 6, a); } static inline u32 msg_destnode(struct tipc_msg *m) { return msg_word(m, 7); } static inline void msg_set_destnode(struct tipc_msg *m, u32 a) { msg_set_word(m, 7, a); } static inline u32 msg_nametype(struct tipc_msg *m) { return msg_word(m, 8); } static inline void msg_set_nametype(struct tipc_msg *m, u32 n) { msg_set_word(m, 8, n); } static inline u32 msg_nameinst(struct tipc_msg *m) { return msg_word(m, 9); } static inline u32 msg_namelower(struct tipc_msg *m) { return msg_nameinst(m); } static inline void msg_set_namelower(struct tipc_msg *m, u32 n) { msg_set_word(m, 9, n); } static inline void msg_set_nameinst(struct tipc_msg *m, u32 n) { msg_set_namelower(m, n); } static inline u32 msg_nameupper(struct tipc_msg *m) { return msg_word(m, 10); } static inline void msg_set_nameupper(struct tipc_msg *m, u32 n) { msg_set_word(m, 10, n); } /* * Constants and routines used to read and write TIPC internal message headers */ /* * Connection management protocol message types */ #define CONN_PROBE 0 #define CONN_PROBE_REPLY 1 #define CONN_ACK 2 /* * Name distributor message types */ #define PUBLICATION 0 #define WITHDRAWAL 1 /* * Segmentation message types */ #define FIRST_FRAGMENT 0 #define FRAGMENT 1 #define LAST_FRAGMENT 2 /* * Link management protocol message types */ #define STATE_MSG 0 #define RESET_MSG 1 #define ACTIVATE_MSG 2 /* * Changeover tunnel message types */ #define SYNCH_MSG 0 #define FAILOVER_MSG 1 /* * Config protocol message types */ #define DSC_REQ_MSG 0 #define DSC_RESP_MSG 1 #define DSC_TRIAL_MSG 2 #define DSC_TRIAL_FAIL_MSG 3 /* * Group protocol message types */ #define GRP_JOIN_MSG 0 #define GRP_LEAVE_MSG 1 #define GRP_ADV_MSG 2 #define GRP_ACK_MSG 3 #define GRP_RECLAIM_MSG 4 #define GRP_REMIT_MSG 5 /* Crypto message types */ #define KEY_DISTR_MSG 0 /* * Word 1 */ static inline u32 msg_seq_gap(struct tipc_msg *m) { return msg_bits(m, 1, 16, 0x1fff); } static inline void msg_set_seq_gap(struct tipc_msg *m, u32 n) { msg_set_bits(m, 1, 16, 0x1fff, n); } static inline u32 msg_node_sig(struct tipc_msg *m) { return msg_bits(m, 1, 0, 0xffff); } static inline void msg_set_node_sig(struct tipc_msg *m, u32 n) { msg_set_bits(m, 1, 0, 0xffff, n); } static inline u32 msg_node_capabilities(struct tipc_msg *m) { return msg_bits(m, 1, 15, 0x1fff); } static inline void msg_set_node_capabilities(struct tipc_msg *m, u32 n) { msg_set_bits(m, 1, 15, 0x1fff, n); } /* * Word 2 */ static inline u32 msg_dest_domain(struct tipc_msg *m) { return msg_word(m, 2); } static inline void msg_set_dest_domain(struct tipc_msg *m, u32 n) { msg_set_word(m, 2, n); } static inline void msg_set_bcgap_after(struct tipc_msg *m, u32 n) { msg_set_bits(m, 2, 16, 0xffff, n); } static inline u32 msg_bcgap_to(struct tipc_msg *m) { return msg_bits(m, 2, 0, 0xffff); } static inline void msg_set_bcgap_to(struct tipc_msg *m, u32 n) { msg_set_bits(m, 2, 0, 0xffff, n); } /* * Word 4 */ static inline u32 msg_last_bcast(struct tipc_msg *m) { return msg_bits(m, 4, 16, 0xffff); } static inline u32 msg_bc_snd_nxt(struct tipc_msg *m) { return msg_last_bcast(m) + 1; } static inline void msg_set_last_bcast(struct tipc_msg *m, u32 n) { msg_set_bits(m, 4, 16, 0xffff, n); } static inline u32 msg_nof_fragms(struct tipc_msg *m) { return msg_bits(m, 4, 0, 0xffff); } static inline void msg_set_nof_fragms(struct tipc_msg *m, u32 n) { msg_set_bits(m, 4, 0, 0xffff, n); } static inline u32 msg_fragm_no(struct tipc_msg *m) { return msg_bits(m, 4, 16, 0xffff); } static inline void msg_set_fragm_no(struct tipc_msg *m, u32 n) { msg_set_bits(m, 4, 16, 0xffff, n); } static inline u16 msg_next_sent(struct tipc_msg *m) { return msg_bits(m, 4, 0, 0xffff); } static inline void msg_set_next_sent(struct tipc_msg *m, u16 n) { msg_set_bits(m, 4, 0, 0xffff, n); } static inline u32 msg_bc_netid(struct tipc_msg *m) { return msg_word(m, 4); } static inline void msg_set_bc_netid(struct tipc_msg *m, u32 id) { msg_set_word(m, 4, id); } static inline u32 msg_link_selector(struct tipc_msg *m) { if (msg_user(m) == MSG_FRAGMENTER) m = (void *)msg_data(m); return msg_bits(m, 4, 0, 1); } /* * Word 5 */ static inline u16 msg_session(struct tipc_msg *m) { return msg_bits(m, 5, 16, 0xffff); } static inline void msg_set_session(struct tipc_msg *m, u16 n) { msg_set_bits(m, 5, 16, 0xffff, n); } static inline u32 msg_probe(struct tipc_msg *m) { return msg_bits(m, 5, 0, 1); } static inline void msg_set_probe(struct tipc_msg *m, u32 val) { msg_set_bits(m, 5, 0, 1, val); } static inline char msg_net_plane(struct tipc_msg *m) { return msg_bits(m, 5, 1, 7) + 'A'; } static inline void msg_set_net_plane(struct tipc_msg *m, char n) { msg_set_bits(m, 5, 1, 7, (n - 'A')); } static inline u32 msg_linkprio(struct tipc_msg *m) { return msg_bits(m, 5, 4, 0x1f); } static inline void msg_set_linkprio(struct tipc_msg *m, u32 n) { msg_set_bits(m, 5, 4, 0x1f, n); } static inline u32 msg_bearer_id(struct tipc_msg *m) { return msg_bits(m, 5, 9, 0x7); } static inline void msg_set_bearer_id(struct tipc_msg *m, u32 n) { msg_set_bits(m, 5, 9, 0x7, n); } static inline u32 msg_redundant_link(struct tipc_msg *m) { return msg_bits(m, 5, 12, 0x1); } static inline void msg_set_redundant_link(struct tipc_msg *m, u32 r) { msg_set_bits(m, 5, 12, 0x1, r); } static inline u32 msg_peer_stopping(struct tipc_msg *m) { return msg_bits(m, 5, 13, 0x1); } static inline void msg_set_peer_stopping(struct tipc_msg *m, u32 s) { msg_set_bits(m, 5, 13, 0x1, s); } static inline bool msg_bc_ack_invalid(struct tipc_msg *m) { switch (msg_user(m)) { case BCAST_PROTOCOL: case NAME_DISTRIBUTOR: case LINK_PROTOCOL: return msg_bits(m, 5, 14, 0x1); default: return false; } } static inline void msg_set_bc_ack_invalid(struct tipc_msg *m, bool invalid) { msg_set_bits(m, 5, 14, 0x1, invalid); } static inline char *msg_media_addr(struct tipc_msg *m) { return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET]; } static inline u32 msg_bc_gap(struct tipc_msg *m) { return msg_bits(m, 8, 0, 0x3ff); } static inline void msg_set_bc_gap(struct tipc_msg *m, u32 n) { msg_set_bits(m, 8, 0, 0x3ff, n); } /* * Word 9 */ static inline u16 msg_msgcnt(struct tipc_msg *m) { return msg_bits(m, 9, 16, 0xffff); } static inline void msg_set_msgcnt(struct tipc_msg *m, u16 n) { msg_set_bits(m, 9, 16, 0xffff, n); } static inline u16 msg_syncpt(struct tipc_msg *m) { return msg_bits(m, 9, 16, 0xffff); } static inline void msg_set_syncpt(struct tipc_msg *m, u16 n) { msg_set_bits(m, 9, 16, 0xffff, n); } static inline u32 msg_conn_ack(struct tipc_msg *m) { return msg_bits(m, 9, 16, 0xffff); } static inline void msg_set_conn_ack(struct tipc_msg *m, u32 n) { msg_set_bits(m, 9, 16, 0xffff, n); } static inline u16 msg_adv_win(struct tipc_msg *m) { return msg_bits(m, 9, 0, 0xffff); } static inline void msg_set_adv_win(struct tipc_msg *m, u16 n) { msg_set_bits(m, 9, 0, 0xffff, n); } static inline u32 msg_max_pkt(struct tipc_msg *m) { return msg_bits(m, 9, 16, 0xffff) * 4; } static inline void msg_set_max_pkt(struct tipc_msg *m, u32 n) { msg_set_bits(m, 9, 16, 0xffff, (n / 4)); } static inline u32 msg_link_tolerance(struct tipc_msg *m) { return msg_bits(m, 9, 0, 0xffff); } static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) { msg_set_bits(m, 9, 0, 0xffff, n); } static inline u16 msg_grp_bc_syncpt(struct tipc_msg *m) { return msg_bits(m, 9, 16, 0xffff); } static inline void msg_set_grp_bc_syncpt(struct tipc_msg *m, u16 n) { msg_set_bits(m, 9, 16, 0xffff, n); } static inline u16 msg_grp_bc_acked(struct tipc_msg *m) { return msg_bits(m, 9, 16, 0xffff); } static inline void msg_set_grp_bc_acked(struct tipc_msg *m, u16 n) { msg_set_bits(m, 9, 16, 0xffff, n); } static inline u16 msg_grp_remitted(struct tipc_msg *m) { return msg_bits(m, 9, 16, 0xffff); } static inline void msg_set_grp_remitted(struct tipc_msg *m, u16 n) { msg_set_bits(m, 9, 16, 0xffff, n); } /* Word 10 */ static inline u16 msg_grp_evt(struct tipc_msg *m) { return msg_bits(m, 10, 0, 0x3); } static inline void msg_set_grp_evt(struct tipc_msg *m, int n) { msg_set_bits(m, 10, 0, 0x3, n); } static inline u16 msg_grp_bc_ack_req(struct tipc_msg *m) { return msg_bits(m, 10, 0, 0x1); } static inline void msg_set_grp_bc_ack_req(struct tipc_msg *m, bool n) { msg_set_bits(m, 10, 0, 0x1, n); } static inline u16 msg_grp_bc_seqno(struct tipc_msg *m) { return msg_bits(m, 10, 16, 0xffff); } static inline void msg_set_grp_bc_seqno(struct tipc_msg *m, u32 n) { msg_set_bits(m, 10, 16, 0xffff, n); } static inline bool msg_peer_link_is_up(struct tipc_msg *m) { if (likely(msg_user(m) != LINK_PROTOCOL)) return true; if (msg_type(m) == STATE_MSG) return true; return false; } static inline bool msg_peer_node_is_up(struct tipc_msg *m) { if (msg_peer_link_is_up(m)) return true; return msg_redundant_link(m); } static inline bool msg_is_reset(struct tipc_msg *hdr) { return (msg_user(hdr) == LINK_PROTOCOL) && (msg_type(hdr) == RESET_MSG); } /* Word 13 */ static inline void msg_set_peer_net_hash(struct tipc_msg *m, u32 n) { msg_set_word(m, 13, n); } static inline u32 msg_peer_net_hash(struct tipc_msg *m) { return msg_word(m, 13); } /* Word 14 */ static inline u32 msg_sugg_node_addr(struct tipc_msg *m) { return msg_word(m, 14); } static inline void msg_set_sugg_node_addr(struct tipc_msg *m, u32 n) { msg_set_word(m, 14, n); } static inline void msg_set_node_id(struct tipc_msg *hdr, u8 *id) { memcpy(msg_data(hdr), id, 16); } static inline u8 *msg_node_id(struct tipc_msg *hdr) { return (u8 *)msg_data(hdr); } struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp); bool tipc_msg_validate(struct sk_buff **_skb); bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err); void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb, struct sk_buff_head *xmitq); void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode); struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, uint data_sz, u32 dnode, u32 onode, u32 dport, u32 oport, int errcode); int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf); bool tipc_msg_try_bundle(struct sk_buff *tskb, struct sk_buff **skb, u32 mss, u32 dnode, bool *new_bundle); bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos); int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr, int pktmax, struct sk_buff_head *frags); int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, int dsz, int mtu, struct sk_buff_head *list); int tipc_msg_append(struct tipc_msg *hdr, struct msghdr *m, int dlen, int mss, struct sk_buff_head *txq); bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err); bool tipc_msg_assemble(struct sk_buff_head *list); bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq); bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg, struct sk_buff_head *cpy); bool __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, struct sk_buff *skb); bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy); static inline u16 buf_seqno(struct sk_buff *skb) { return msg_seqno(buf_msg(skb)); } static inline int buf_roundup_len(struct sk_buff *skb) { return (skb->len / 1024 + 1) * 1024; } /* tipc_skb_peek(): peek and reserve first buffer in list * @list: list to be peeked in * Returns pointer to first buffer in list, if any */ static inline struct sk_buff *tipc_skb_peek(struct sk_buff_head *list, spinlock_t *lock) { struct sk_buff *skb; spin_lock_bh(lock); skb = skb_peek(list); if (skb) skb_get(skb); spin_unlock_bh(lock); return skb; } /* tipc_skb_peek_port(): find a destination port, ignoring all destinations * up to and including 'filter'. * Note: ignoring previously tried destinations minimizes the risk of * contention on the socket lock * @list: list to be peeked in * @filter: last destination to be ignored from search * Returns a destination port number, of applicable. */ static inline u32 tipc_skb_peek_port(struct sk_buff_head *list, u32 filter) { struct sk_buff *skb; u32 dport = 0; bool ignore = true; spin_lock_bh(&list->lock); skb_queue_walk(list, skb) { dport = msg_destport(buf_msg(skb)); if (!filter || skb_queue_is_last(list, skb)) break; if (dport == filter) ignore = false; else if (!ignore) break; } spin_unlock_bh(&list->lock); return dport; } /* tipc_skb_dequeue(): unlink first buffer with dest 'dport' from list * @list: list to be unlinked from * @dport: selection criteria for buffer to unlink */ static inline struct sk_buff *tipc_skb_dequeue(struct sk_buff_head *list, u32 dport) { struct sk_buff *_skb, *tmp, *skb = NULL; spin_lock_bh(&list->lock); skb_queue_walk_safe(list, _skb, tmp) { if (msg_destport(buf_msg(_skb)) == dport) { __skb_unlink(_skb, list); skb = _skb; break; } } spin_unlock_bh(&list->lock); return skb; } /* tipc_skb_queue_splice_tail - append an skb list to lock protected list * @list: the new list to append. Not lock protected * @head: target list. Lock protected. */ static inline void tipc_skb_queue_splice_tail(struct sk_buff_head *list, struct sk_buff_head *head) { spin_lock_bh(&head->lock); skb_queue_splice_tail(list, head); spin_unlock_bh(&head->lock); } /* tipc_skb_queue_splice_tail_init - merge two lock protected skb lists * @list: the new list to add. Lock protected. Will be reinitialized * @head: target list. Lock protected. */ static inline void tipc_skb_queue_splice_tail_init(struct sk_buff_head *list, struct sk_buff_head *head) { struct sk_buff_head tmp; __skb_queue_head_init(&tmp); spin_lock_bh(&list->lock); skb_queue_splice_tail_init(list, &tmp); spin_unlock_bh(&list->lock); tipc_skb_queue_splice_tail(&tmp, head); } /* __tipc_skb_dequeue() - dequeue the head skb according to expected seqno * @list: list to be dequeued from * @seqno: seqno of the expected msg * * returns skb dequeued from the list if its seqno is less than or equal to * the expected one, otherwise the skb is still hold * * Note: must be used with appropriate locks held only */ static inline struct sk_buff *__tipc_skb_dequeue(struct sk_buff_head *list, u16 seqno) { struct sk_buff *skb = skb_peek(list); if (skb && less_eq(buf_seqno(skb), seqno)) { __skb_unlink(skb, list); return skb; } return NULL; } #endif |
| 7 9 7 9 14 4 16 10 1 81 83 65 69 17 4 7 5 23 23 16 16 14 1 6 5 6 6 6 2 2 4 4 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 | /* SPDX-License-Identifier: GPL-1.0+ */ /* * Bond several ethernet interfaces into a Cisco, running 'Etherchannel'. * * Portions are (c) Copyright 1995 Simon "Guru Aleph-Null" Janes * NCM: Network and Communications Management, Inc. * * BUT, I'm the one who modified it for ethernet, so: * (c) Copyright 1999, Thomas Davis, tadavis@lbl.gov * */ #ifndef _NET_BONDING_H #define _NET_BONDING_H #include <linux/timer.h> #include <linux/proc_fs.h> #include <linux/if_bonding.h> #include <linux/cpumask.h> #include <linux/in6.h> #include <linux/netpoll.h> #include <linux/inetdevice.h> #include <linux/etherdevice.h> #include <linux/reciprocal_div.h> #include <linux/if_link.h> #include <net/bond_3ad.h> #include <net/bond_alb.h> #include <net/bond_options.h> #include <net/ipv6.h> #include <net/addrconf.h> #define BOND_MAX_ARP_TARGETS 16 #define BOND_MAX_NS_TARGETS BOND_MAX_ARP_TARGETS #define BOND_DEFAULT_MIIMON 100 #ifndef __long_aligned #define __long_aligned __attribute__((aligned((sizeof(long))))) #endif #define slave_info(bond_dev, slave_dev, fmt, ...) \ netdev_info(bond_dev, "(slave %s): " fmt, (slave_dev)->name, ##__VA_ARGS__) #define slave_warn(bond_dev, slave_dev, fmt, ...) \ netdev_warn(bond_dev, "(slave %s): " fmt, (slave_dev)->name, ##__VA_ARGS__) #define slave_dbg(bond_dev, slave_dev, fmt, ...) \ netdev_dbg(bond_dev, "(slave %s): " fmt, (slave_dev)->name, ##__VA_ARGS__) #define slave_err(bond_dev, slave_dev, fmt, ...) \ netdev_err(bond_dev, "(slave %s): " fmt, (slave_dev)->name, ##__VA_ARGS__) #define BOND_MODE(bond) ((bond)->params.mode) /* slave list primitives */ #define bond_slave_list(bond) (&(bond)->dev->adj_list.lower) #define bond_has_slaves(bond) !list_empty(bond_slave_list(bond)) /* IMPORTANT: bond_first/last_slave can return NULL in case of an empty list */ #define bond_first_slave(bond) \ (bond_has_slaves(bond) ? \ netdev_adjacent_get_private(bond_slave_list(bond)->next) : \ NULL) #define bond_last_slave(bond) \ (bond_has_slaves(bond) ? \ netdev_adjacent_get_private(bond_slave_list(bond)->prev) : \ NULL) /* Caller must have rcu_read_lock */ #define bond_first_slave_rcu(bond) \ netdev_lower_get_first_private_rcu(bond->dev) #define bond_is_first_slave(bond, pos) (pos == bond_first_slave(bond)) #define bond_is_last_slave(bond, pos) (pos == bond_last_slave(bond)) /** * bond_for_each_slave - iterate over all slaves * @bond: the bond holding this list * @pos: current slave * @iter: list_head * iterator * * Caller must hold RTNL */ #define bond_for_each_slave(bond, pos, iter) \ netdev_for_each_lower_private((bond)->dev, pos, iter) /* Caller must have rcu_read_lock */ #define bond_for_each_slave_rcu(bond, pos, iter) \ netdev_for_each_lower_private_rcu((bond)->dev, pos, iter) #define BOND_XFRM_FEATURES (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM | \ NETIF_F_GSO_ESP) #ifdef CONFIG_NET_POLL_CONTROLLER extern atomic_t netpoll_block_tx; static inline void block_netpoll_tx(void) { atomic_inc(&netpoll_block_tx); } static inline void unblock_netpoll_tx(void) { atomic_dec(&netpoll_block_tx); } static inline int is_netpoll_tx_blocked(struct net_device *dev) { if (unlikely(netpoll_tx_running(dev))) return atomic_read(&netpoll_block_tx); return 0; } #else #define block_netpoll_tx() #define unblock_netpoll_tx() #define is_netpoll_tx_blocked(dev) (0) #endif struct bond_params { int mode; int xmit_policy; int miimon; u8 num_peer_notif; u8 missed_max; int arp_interval; int arp_validate; int arp_all_targets; int use_carrier; int fail_over_mac; int updelay; int downdelay; int peer_notif_delay; int lacp_active; int lacp_fast; unsigned int min_links; int ad_select; char primary[IFNAMSIZ]; int primary_reselect; __be32 arp_targets[BOND_MAX_ARP_TARGETS]; int tx_queues; int all_slaves_active; int resend_igmp; int lp_interval; int packets_per_slave; int tlb_dynamic_lb; struct reciprocal_value reciprocal_packets_per_slave; u16 ad_actor_sys_prio; u16 ad_user_port_key; #if IS_ENABLED(CONFIG_IPV6) struct in6_addr ns_targets[BOND_MAX_NS_TARGETS]; #endif int coupled_control; /* 2 bytes of padding : see ether_addr_equal_64bits() */ u8 ad_actor_system[ETH_ALEN + 2]; }; struct slave { struct net_device *dev; /* first - useful for panic debug */ struct bonding *bond; /* our master */ int delay; /* all 4 in jiffies */ unsigned long last_link_up; unsigned long last_tx; unsigned long last_rx; unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS]; s8 link; /* one of BOND_LINK_XXXX */ s8 link_new_state; /* one of BOND_LINK_XXXX */ u8 backup:1, /* indicates backup slave. Value corresponds with BOND_STATE_ACTIVE and BOND_STATE_BACKUP */ inactive:1, /* indicates inactive slave */ rx_disabled:1, /* indicates whether slave's Rx is disabled */ should_notify:1, /* indicates whether the state changed */ should_notify_link:1; /* indicates whether the link changed */ u8 duplex; u32 original_mtu; u32 link_failure_count; u32 speed; u16 queue_id; u8 perm_hwaddr[MAX_ADDR_LEN]; int prio; struct ad_slave_info *ad_info; struct tlb_slave_info tlb_info; #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *np; #endif struct delayed_work notify_work; struct kobject kobj; struct rtnl_link_stats64 slave_stats; }; static inline struct slave *to_slave(struct kobject *kobj) { return container_of(kobj, struct slave, kobj); } struct bond_up_slave { unsigned int count; struct rcu_head rcu; struct slave *arr[]; }; /* * Link pseudo-state only used internally by monitors */ #define BOND_LINK_NOCHANGE -1 struct bond_ipsec { struct list_head list; struct xfrm_state *xs; }; /* * Here are the locking policies for the two bonding locks: * Get rcu_read_lock when reading or RTNL when writing slave list. */ struct bonding { struct net_device *dev; /* first - useful for panic debug */ struct slave __rcu *curr_active_slave; struct slave __rcu *current_arp_slave; struct slave __rcu *primary_slave; struct bond_up_slave __rcu *usable_slaves; struct bond_up_slave __rcu *all_slaves; bool force_primary; bool notifier_ctx; s32 slave_cnt; /* never change this value outside the attach/detach wrappers */ int (*recv_probe)(const struct sk_buff *, struct bonding *, struct slave *); /* mode_lock is used for mode-specific locking needs, currently used by: * 3ad mode (4) - protect against running bond_3ad_unbind_slave() and * bond_3ad_state_machine_handler() concurrently and also * the access to the state machine shared variables. * TLB mode (5) - to sync the use and modifications of its hash table * ALB mode (6) - to sync the use and modifications of its hash table */ spinlock_t mode_lock; spinlock_t stats_lock; u32 send_peer_notif; u8 igmp_retrans; #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc_entry; char proc_file_name[IFNAMSIZ]; #endif /* CONFIG_PROC_FS */ struct list_head bond_list; u32 __percpu *rr_tx_counter; struct ad_bond_info ad_info; struct alb_bond_info alb_info; struct bond_params params; struct workqueue_struct *wq; struct delayed_work mii_work; struct delayed_work arp_work; struct delayed_work alb_work; struct delayed_work ad_work; struct delayed_work mcast_work; struct delayed_work slave_arr_work; #ifdef CONFIG_DEBUG_FS /* debugging support via debugfs */ struct dentry *debug_dir; #endif /* CONFIG_DEBUG_FS */ struct rtnl_link_stats64 bond_stats; #ifdef CONFIG_XFRM_OFFLOAD struct list_head ipsec_list; /* protecting ipsec_list */ spinlock_t ipsec_lock; #endif /* CONFIG_XFRM_OFFLOAD */ struct bpf_prog *xdp_prog; }; #define bond_slave_get_rcu(dev) \ ((struct slave *) rcu_dereference(dev->rx_handler_data)) #define bond_slave_get_rtnl(dev) \ ((struct slave *) rtnl_dereference(dev->rx_handler_data)) void bond_queue_slave_event(struct slave *slave); void bond_lower_state_changed(struct slave *slave); struct bond_vlan_tag { __be16 vlan_proto; unsigned short vlan_id; }; /* * Returns NULL if the net_device does not belong to any of the bond's slaves * * Caller must hold bond lock for read */ static inline struct slave *bond_get_slave_by_dev(struct bonding *bond, struct net_device *slave_dev) { return netdev_lower_dev_get_private(bond->dev, slave_dev); } static inline struct bonding *bond_get_bond_by_slave(struct slave *slave) { return slave->bond; } static inline bool bond_should_override_tx_queue(struct bonding *bond) { return BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP || BOND_MODE(bond) == BOND_MODE_ROUNDROBIN; } static inline bool bond_is_lb(const struct bonding *bond) { return BOND_MODE(bond) == BOND_MODE_TLB || BOND_MODE(bond) == BOND_MODE_ALB; } static inline bool bond_needs_speed_duplex(const struct bonding *bond) { return BOND_MODE(bond) == BOND_MODE_8023AD || bond_is_lb(bond); } static inline bool bond_is_nondyn_tlb(const struct bonding *bond) { return (bond_is_lb(bond) && bond->params.tlb_dynamic_lb == 0); } static inline bool bond_mode_can_use_xmit_hash(const struct bonding *bond) { return (BOND_MODE(bond) == BOND_MODE_8023AD || BOND_MODE(bond) == BOND_MODE_XOR || BOND_MODE(bond) == BOND_MODE_TLB || BOND_MODE(bond) == BOND_MODE_ALB); } static inline bool bond_mode_uses_xmit_hash(const struct bonding *bond) { return (BOND_MODE(bond) == BOND_MODE_8023AD || BOND_MODE(bond) == BOND_MODE_XOR || bond_is_nondyn_tlb(bond)); } static inline bool bond_mode_uses_arp(int mode) { return mode != BOND_MODE_8023AD && mode != BOND_MODE_TLB && mode != BOND_MODE_ALB; } static inline bool bond_mode_uses_primary(int mode) { return mode == BOND_MODE_ACTIVEBACKUP || mode == BOND_MODE_TLB || mode == BOND_MODE_ALB; } static inline bool bond_uses_primary(struct bonding *bond) { return bond_mode_uses_primary(BOND_MODE(bond)); } static inline struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond) { struct slave *slave = rcu_dereference_rtnl(bond->curr_active_slave); return bond_uses_primary(bond) && slave ? slave->dev : NULL; } static inline bool bond_slave_is_up(struct slave *slave) { return netif_running(slave->dev) && netif_carrier_ok(slave->dev); } static inline void bond_set_active_slave(struct slave *slave) { if (slave->backup) { slave->backup = 0; bond_queue_slave_event(slave); bond_lower_state_changed(slave); } } static inline void bond_set_backup_slave(struct slave *slave) { if (!slave->backup) { slave->backup = 1; bond_queue_slave_event(slave); bond_lower_state_changed(slave); } } static inline void bond_set_slave_state(struct slave *slave, int slave_state, bool notify) { if (slave->backup == slave_state) return; slave->backup = slave_state; if (notify) { bond_lower_state_changed(slave); bond_queue_slave_event(slave); slave->should_notify = 0; } else { if (slave->should_notify) slave->should_notify = 0; else slave->should_notify = 1; } } static inline void bond_slave_state_change(struct bonding *bond) { struct list_head *iter; struct slave *tmp; bond_for_each_slave(bond, tmp, iter) { if (tmp->link == BOND_LINK_UP) bond_set_active_slave(tmp); else if (tmp->link == BOND_LINK_DOWN) bond_set_backup_slave(tmp); } } static inline void bond_slave_state_notify(struct bonding *bond) { struct list_head *iter; struct slave *tmp; bond_for_each_slave(bond, tmp, iter) { if (tmp->should_notify) { bond_lower_state_changed(tmp); tmp->should_notify = 0; } } } static inline int bond_slave_state(struct slave *slave) { return slave->backup; } static inline bool bond_is_active_slave(struct slave *slave) { return !bond_slave_state(slave); } static inline bool bond_slave_can_tx(struct slave *slave) { return bond_slave_is_up(slave) && slave->link == BOND_LINK_UP && bond_is_active_slave(slave); } static inline bool bond_is_active_slave_dev(const struct net_device *slave_dev) { struct slave *slave; bool active; rcu_read_lock(); slave = bond_slave_get_rcu(slave_dev); active = bond_is_active_slave(slave); rcu_read_unlock(); return active; } static inline void bond_hw_addr_copy(u8 *dst, const u8 *src, unsigned int len) { if (len == ETH_ALEN) { ether_addr_copy(dst, src); return; } memcpy(dst, src, len); } #define BOND_PRI_RESELECT_ALWAYS 0 #define BOND_PRI_RESELECT_BETTER 1 #define BOND_PRI_RESELECT_FAILURE 2 #define BOND_FOM_NONE 0 #define BOND_FOM_ACTIVE 1 #define BOND_FOM_FOLLOW 2 #define BOND_ARP_TARGETS_ANY 0 #define BOND_ARP_TARGETS_ALL 1 #define BOND_ARP_VALIDATE_NONE 0 #define BOND_ARP_VALIDATE_ACTIVE (1 << BOND_STATE_ACTIVE) #define BOND_ARP_VALIDATE_BACKUP (1 << BOND_STATE_BACKUP) #define BOND_ARP_VALIDATE_ALL (BOND_ARP_VALIDATE_ACTIVE | \ BOND_ARP_VALIDATE_BACKUP) #define BOND_ARP_FILTER (BOND_ARP_VALIDATE_ALL + 1) #define BOND_ARP_FILTER_ACTIVE (BOND_ARP_VALIDATE_ACTIVE | \ BOND_ARP_FILTER) #define BOND_ARP_FILTER_BACKUP (BOND_ARP_VALIDATE_BACKUP | \ BOND_ARP_FILTER) #define BOND_SLAVE_NOTIFY_NOW true #define BOND_SLAVE_NOTIFY_LATER false static inline int slave_do_arp_validate(struct bonding *bond, struct slave *slave) { return bond->params.arp_validate & (1 << bond_slave_state(slave)); } static inline int slave_do_arp_validate_only(struct bonding *bond) { return bond->params.arp_validate & BOND_ARP_FILTER; } static inline int bond_is_ip_target_ok(__be32 addr) { return !ipv4_is_lbcast(addr) && !ipv4_is_zeronet(addr); } #if IS_ENABLED(CONFIG_IPV6) static inline int bond_is_ip6_target_ok(struct in6_addr *addr) { return !ipv6_addr_any(addr) && !ipv6_addr_loopback(addr) && !ipv6_addr_is_multicast(addr); } #endif /* Get the oldest arp which we've received on this slave for bond's * arp_targets. */ static inline unsigned long slave_oldest_target_arp_rx(struct bonding *bond, struct slave *slave) { int i = 1; unsigned long ret = slave->target_last_arp_rx[0]; for (; (i < BOND_MAX_ARP_TARGETS) && bond->params.arp_targets[i]; i++) if (time_before(slave->target_last_arp_rx[i], ret)) ret = slave->target_last_arp_rx[i]; return ret; } static inline unsigned long slave_last_rx(struct bonding *bond, struct slave *slave) { if (bond->params.arp_all_targets == BOND_ARP_TARGETS_ALL) return slave_oldest_target_arp_rx(bond, slave); return slave->last_rx; } static inline void slave_update_last_tx(struct slave *slave) { WRITE_ONCE(slave->last_tx, jiffies); } static inline unsigned long slave_last_tx(struct slave *slave) { return READ_ONCE(slave->last_tx); } #ifdef CONFIG_NET_POLL_CONTROLLER static inline netdev_tx_t bond_netpoll_send_skb(const struct slave *slave, struct sk_buff *skb) { return netpoll_send_skb(slave->np, skb); } #else static inline netdev_tx_t bond_netpoll_send_skb(const struct slave *slave, struct sk_buff *skb) { BUG(); return NETDEV_TX_OK; } #endif static inline void bond_set_slave_inactive_flags(struct slave *slave, bool notify) { if (!bond_is_lb(slave->bond)) bond_set_slave_state(slave, BOND_STATE_BACKUP, notify); if (!slave->bond->params.all_slaves_active) slave->inactive = 1; if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) slave->rx_disabled = 1; } static inline void bond_set_slave_tx_disabled_flags(struct slave *slave, bool notify) { bond_set_slave_state(slave, BOND_STATE_BACKUP, notify); } static inline void bond_set_slave_active_flags(struct slave *slave, bool notify) { bond_set_slave_state(slave, BOND_STATE_ACTIVE, notify); slave->inactive = 0; if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) slave->rx_disabled = 0; } static inline void bond_set_slave_rx_enabled_flags(struct slave *slave, bool notify) { slave->rx_disabled = 0; } static inline bool bond_is_slave_inactive(struct slave *slave) { return slave->inactive; } static inline bool bond_is_slave_rx_disabled(struct slave *slave) { return slave->rx_disabled; } static inline void bond_propose_link_state(struct slave *slave, int state) { slave->link_new_state = state; } static inline void bond_commit_link_state(struct slave *slave, bool notify) { if (slave->link_new_state == BOND_LINK_NOCHANGE) return; slave->link = slave->link_new_state; if (notify) { bond_queue_slave_event(slave); bond_lower_state_changed(slave); slave->should_notify_link = 0; } else { if (slave->should_notify_link) slave->should_notify_link = 0; else slave->should_notify_link = 1; } } static inline void bond_set_slave_link_state(struct slave *slave, int state, bool notify) { bond_propose_link_state(slave, state); bond_commit_link_state(slave, notify); } static inline void bond_slave_link_notify(struct bonding *bond) { struct list_head *iter; struct slave *tmp; bond_for_each_slave(bond, tmp, iter) { if (tmp->should_notify_link) { bond_queue_slave_event(tmp); bond_lower_state_changed(tmp); tmp->should_notify_link = 0; } } } static inline __be32 bond_confirm_addr(struct net_device *dev, __be32 dst, __be32 local) { struct in_device *in_dev; __be32 addr = 0; rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (in_dev) addr = inet_confirm_addr(dev_net(dev), in_dev, dst, local, RT_SCOPE_HOST); rcu_read_unlock(); return addr; } struct bond_net { struct net *net; /* Associated network namespace */ struct list_head dev_list; #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc_dir; #endif struct class_attribute class_attr_bonding_masters; }; int bond_rcv_validate(const struct sk_buff *skb, struct bonding *bond, struct slave *slave); netdev_tx_t bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev); int bond_create(struct net *net, const char *name); int bond_create_sysfs(struct bond_net *net); void bond_destroy_sysfs(struct bond_net *net); void bond_prepare_sysfs_group(struct bonding *bond); int bond_sysfs_slave_add(struct slave *slave); void bond_sysfs_slave_del(struct slave *slave); void bond_xdp_set_features(struct net_device *bond_dev); int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, struct netlink_ext_ack *extack); int bond_release(struct net_device *bond_dev, struct net_device *slave_dev); u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb); int bond_set_carrier(struct bonding *bond); void bond_select_active_slave(struct bonding *bond); void bond_change_active_slave(struct bonding *bond, struct slave *new_active); void bond_create_debugfs(void); void bond_destroy_debugfs(void); void bond_debug_register(struct bonding *bond); void bond_debug_unregister(struct bonding *bond); void bond_debug_reregister(struct bonding *bond); const char *bond_mode_name(int mode); void bond_setup(struct net_device *bond_dev); unsigned int bond_get_num_tx_queues(void); int bond_netlink_init(void); void bond_netlink_fini(void); struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond); const char *bond_slave_link_status(s8 link); struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev, struct net_device *end_dev, int level); int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave); void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay); void bond_work_init_all(struct bonding *bond); #ifdef CONFIG_PROC_FS void bond_create_proc_entry(struct bonding *bond); void bond_remove_proc_entry(struct bonding *bond); void bond_create_proc_dir(struct bond_net *bn); void bond_destroy_proc_dir(struct bond_net *bn); #else static inline void bond_create_proc_entry(struct bonding *bond) { } static inline void bond_remove_proc_entry(struct bonding *bond) { } static inline void bond_create_proc_dir(struct bond_net *bn) { } static inline void bond_destroy_proc_dir(struct bond_net *bn) { } #endif static inline struct slave *bond_slave_has_mac(struct bonding *bond, const u8 *mac) { struct list_head *iter; struct slave *tmp; bond_for_each_slave(bond, tmp, iter) if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr)) return tmp; return NULL; } /* Caller must hold rcu_read_lock() for read */ static inline bool bond_slave_has_mac_rcu(struct bonding *bond, const u8 *mac) { struct list_head *iter; struct slave *tmp; bond_for_each_slave_rcu(bond, tmp, iter) if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr)) return true; return false; } /* Check if the ip is present in arp ip list, or first free slot if ip == 0 * Returns -1 if not found, index if found */ static inline int bond_get_targets_ip(__be32 *targets, __be32 ip) { int i; for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) if (targets[i] == ip) return i; else if (targets[i] == 0) break; return -1; } #if IS_ENABLED(CONFIG_IPV6) static inline int bond_get_targets_ip6(struct in6_addr *targets, struct in6_addr *ip) { struct in6_addr mcaddr; int i; for (i = 0; i < BOND_MAX_NS_TARGETS; i++) { addrconf_addr_solict_mult(&targets[i], &mcaddr); if ((ipv6_addr_equal(&targets[i], ip)) || (ipv6_addr_equal(&mcaddr, ip))) return i; else if (ipv6_addr_any(&targets[i])) break; } return -1; } #endif /* exported from bond_main.c */ extern unsigned int bond_net_id; /* exported from bond_netlink.c */ extern struct rtnl_link_ops bond_link_ops; /* exported from bond_sysfs_slave.c */ extern const struct sysfs_ops slave_sysfs_ops; /* exported from bond_3ad.c */ extern const u8 lacpdu_mcast_addr[]; static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *skb) { dev_core_stats_tx_dropped_inc(dev); dev_kfree_skb_any(skb); return NET_XMIT_DROP; } #endif /* _NET_BONDING_H */ |