505 125 383 506 2 337 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 | // SPDX-License-Identifier: GPL-2.0 /* * Functions related to generic timeout handling of requests. */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/blkdev.h> #include <linux/fault-inject.h> #include "blk.h" #include "blk-mq.h" #ifdef CONFIG_FAIL_IO_TIMEOUT static DECLARE_FAULT_ATTR(fail_io_timeout); static int __init setup_fail_io_timeout(char *str) { return setup_fault_attr(&fail_io_timeout, str); } __setup("fail_io_timeout=", setup_fail_io_timeout); bool __blk_should_fake_timeout(struct request_queue *q) { return should_fail(&fail_io_timeout, 1); } EXPORT_SYMBOL_GPL(__blk_should_fake_timeout); static int __init fail_io_timeout_debugfs(void) { struct dentry *dir = fault_create_debugfs_attr("fail_io_timeout", NULL, &fail_io_timeout); return PTR_ERR_OR_ZERO(dir); } late_initcall(fail_io_timeout_debugfs); ssize_t part_timeout_show(struct device *dev, struct device_attribute *attr, char *buf) { struct gendisk *disk = dev_to_disk(dev); int set = test_bit(QUEUE_FLAG_FAIL_IO, &disk->queue->queue_flags); return sprintf(buf, "%d\n", set != 0); } ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct gendisk *disk = dev_to_disk(dev); int val; if (count) { struct request_queue *q = disk->queue; char *p = (char *) buf; val = simple_strtoul(p, &p, 10); if (val) blk_queue_flag_set(QUEUE_FLAG_FAIL_IO, q); else blk_queue_flag_clear(QUEUE_FLAG_FAIL_IO, q); } return count; } #endif /* CONFIG_FAIL_IO_TIMEOUT */ /** * blk_abort_request - Request recovery for the specified command * @req: pointer to the request of interest * * This function requests that the block layer start recovery for the * request by deleting the timer and calling the q's timeout function. * LLDDs who implement their own error recovery MAY ignore the timeout * event if they generated blk_abort_request. */ void blk_abort_request(struct request *req) { /* * All we need to ensure is that timeout scan takes place * immediately and that scan sees the new timeout value. * No need for fancy synchronizations. */ WRITE_ONCE(req->deadline, jiffies); kblockd_schedule_work(&req->q->timeout_work); } EXPORT_SYMBOL_GPL(blk_abort_request); static unsigned long blk_timeout_mask __read_mostly; static int __init blk_timeout_init(void) { blk_timeout_mask = roundup_pow_of_two(HZ) - 1; return 0; } late_initcall(blk_timeout_init); /* * Just a rough estimate, we don't care about specific values for timeouts. */ static inline unsigned long blk_round_jiffies(unsigned long j) { return (j + blk_timeout_mask) + 1; } unsigned long blk_rq_timeout(unsigned long timeout) { unsigned long maxt; maxt = blk_round_jiffies(jiffies + BLK_MAX_TIMEOUT); if (time_after(timeout, maxt)) timeout = maxt; return timeout; } /** * blk_add_timer - Start timeout timer for a single request * @req: request that is about to start running. * * Notes: * Each request has its own timer, and as it is added to the queue, we * set up the timer. When the request completes, we cancel the timer. */ void blk_add_timer(struct request *req) { struct request_queue *q = req->q; unsigned long expiry; /* * Some LLDs, like scsi, peek at the timeout to prevent a * command from being retried forever. */ if (!req->timeout) req->timeout = q->rq_timeout; req->rq_flags &= ~RQF_TIMED_OUT; expiry = jiffies + req->timeout; WRITE_ONCE(req->deadline, expiry); /* * If the timer isn't already pending or this timeout is earlier * than an existing one, modify the timer. Round up to next nearest * second. */ expiry = blk_rq_timeout(blk_round_jiffies(expiry)); if (!timer_pending(&q->timeout) || time_before(expiry, q->timeout.expires)) { unsigned long diff = q->timeout.expires - expiry; /* * Due to added timer slack to group timers, the timer * will often be a little in front of what we asked for. * So apply some tolerance here too, otherwise we keep * modifying the timer because expires for value X * will be X + something. */ if (!timer_pending(&q->timeout) || (diff >= HZ / 2)) mod_timer(&q->timeout, expiry); } } |
4 4 4 4 4 2 2 4 4 4 4 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 | // SPDX-License-Identifier: GPL-2.0-only #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <linux/netfilter_ipv6.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nft_fib.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> static int get_ifindex(const struct net_device *dev) { return dev ? dev->ifindex : 0; } static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv, const struct nft_pktinfo *pkt, const struct net_device *dev, struct ipv6hdr *iph) { int lookup_flags = 0; if (priv->flags & NFTA_FIB_F_DADDR) { fl6->daddr = iph->daddr; fl6->saddr = iph->saddr; } else { if (nft_hook(pkt) == NF_INET_FORWARD && priv->flags & NFTA_FIB_F_IIF) fl6->flowi6_iif = nft_out(pkt)->ifindex; fl6->daddr = iph->saddr; fl6->saddr = iph->daddr; } if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) { lookup_flags |= RT6_LOOKUP_F_IFACE; fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev); } if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST) lookup_flags |= RT6_LOOKUP_F_HAS_SADDR; if (priv->flags & NFTA_FIB_F_MARK) fl6->flowi6_mark = pkt->skb->mark; fl6->flowlabel = (*(__be32 *)iph) & IPV6_FLOWINFO_MASK; return lookup_flags; } static u32 __nft_fib6_eval_type(const struct nft_fib *priv, const struct nft_pktinfo *pkt, struct ipv6hdr *iph) { const struct net_device *dev = NULL; int route_err, addrtype; struct rt6_info *rt; struct flowi6 fl6 = { .flowi6_iif = LOOPBACK_IFINDEX, .flowi6_proto = pkt->tprot, .flowi6_uid = sock_net_uid(nft_net(pkt), NULL), }; u32 ret = 0; if (priv->flags & NFTA_FIB_F_IIF) dev = nft_in(pkt); else if (priv->flags & NFTA_FIB_F_OIF) dev = nft_out(pkt); fl6.flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev); nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph); if (dev && nf_ipv6_chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) ret = RTN_LOCAL; route_err = nf_ip6_route(nft_net(pkt), (struct dst_entry **)&rt, flowi6_to_flowi(&fl6), false); if (route_err) goto err; if (rt->rt6i_flags & RTF_REJECT) { route_err = rt->dst.error; dst_release(&rt->dst); goto err; } if (ipv6_anycast_destination((struct dst_entry *)rt, &fl6.daddr)) ret = RTN_ANYCAST; else if (!dev && rt->rt6i_flags & RTF_LOCAL) ret = RTN_LOCAL; dst_release(&rt->dst); if (ret) return ret; addrtype = ipv6_addr_type(&fl6.daddr); if (addrtype & IPV6_ADDR_MULTICAST) return RTN_MULTICAST; if (addrtype & IPV6_ADDR_UNICAST) return RTN_UNICAST; return RTN_UNSPEC; err: switch (route_err) { case -EINVAL: return RTN_BLACKHOLE; case -EACCES: return RTN_PROHIBIT; case -EAGAIN: return RTN_THROW; default: break; } return RTN_UNREACHABLE; } void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_fib *priv = nft_expr_priv(expr); int noff = skb_network_offset(pkt->skb); u32 *dest = ®s->data[priv->dreg]; struct ipv6hdr *iph, _iph; iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); if (!iph) { regs->verdict.code = NFT_BREAK; return; } *dest = __nft_fib6_eval_type(priv, pkt, iph); } EXPORT_SYMBOL_GPL(nft_fib6_eval_type); static bool nft_fib_v6_skip_icmpv6(const struct sk_buff *skb, u8 next, const struct ipv6hdr *iph) { if (likely(next != IPPROTO_ICMPV6)) return false; if (ipv6_addr_type(&iph->saddr) != IPV6_ADDR_ANY) return false; return ipv6_addr_type(&iph->daddr) & IPV6_ADDR_LINKLOCAL; } void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_fib *priv = nft_expr_priv(expr); int noff = skb_network_offset(pkt->skb); const struct net_device *oif = NULL; u32 *dest = ®s->data[priv->dreg]; struct ipv6hdr *iph, _iph; struct flowi6 fl6 = { .flowi6_iif = LOOPBACK_IFINDEX, .flowi6_proto = pkt->tprot, .flowi6_uid = sock_net_uid(nft_net(pkt), NULL), .flowi6_l3mdev = l3mdev_master_ifindex_rcu(nft_in(pkt)), }; struct rt6_info *rt; int lookup_flags; if (nft_fib_can_skip(pkt)) { nft_fib_store_result(dest, priv, nft_in(pkt)); return; } if (priv->flags & NFTA_FIB_F_IIF) oif = nft_in(pkt); else if (priv->flags & NFTA_FIB_F_OIF) oif = nft_out(pkt); iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); if (!iph) { regs->verdict.code = NFT_BREAK; return; } if (nft_fib_v6_skip_icmpv6(pkt->skb, pkt->tprot, iph)) { nft_fib_store_result(dest, priv, nft_in(pkt)); return; } lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph); *dest = 0; rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb, lookup_flags); if (rt->dst.error) goto put_rt_err; /* Should not see RTF_LOCAL here */ if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL)) goto put_rt_err; if (oif && oif != rt->rt6i_idev->dev && l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) != oif->ifindex) goto put_rt_err; nft_fib_store_result(dest, priv, rt->rt6i_idev->dev); put_rt_err: ip6_rt_put(rt); } EXPORT_SYMBOL_GPL(nft_fib6_eval); static struct nft_expr_type nft_fib6_type; static const struct nft_expr_ops nft_fib6_type_ops = { .type = &nft_fib6_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), .eval = nft_fib6_eval_type, .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, .reduce = nft_fib_reduce, }; static const struct nft_expr_ops nft_fib6_ops = { .type = &nft_fib6_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), .eval = nft_fib6_eval, .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, .reduce = nft_fib_reduce, }; static const struct nft_expr_ops * nft_fib6_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { enum nft_fib_result result; if (!tb[NFTA_FIB_RESULT]) return ERR_PTR(-EINVAL); result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT])); switch (result) { case NFT_FIB_RESULT_OIF: return &nft_fib6_ops; case NFT_FIB_RESULT_OIFNAME: return &nft_fib6_ops; case NFT_FIB_RESULT_ADDRTYPE: return &nft_fib6_type_ops; default: return ERR_PTR(-EOPNOTSUPP); } } static struct nft_expr_type nft_fib6_type __read_mostly = { .name = "fib", .select_ops = nft_fib6_select_ops, .policy = nft_fib_policy, .maxattr = NFTA_FIB_MAX, .family = NFPROTO_IPV6, .owner = THIS_MODULE, }; static int __init nft_fib6_module_init(void) { return nft_register_expr(&nft_fib6_type); } static void __exit nft_fib6_module_exit(void) { nft_unregister_expr(&nft_fib6_type); } module_init(nft_fib6_module_init); module_exit(nft_fib6_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); MODULE_ALIAS_NFT_AF_EXPR(10, "fib"); MODULE_DESCRIPTION("nftables fib / ipv6 route lookup support"); |
12 3 38 38 619 618 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * Kernel-based Virtual Machine driver for Linux * * This header defines architecture specific interfaces, x86 version */ #ifndef _ASM_X86_KVM_HOST_H #define _ASM_X86_KVM_HOST_H #include <linux/types.h> #include <linux/mm.h> #include <linux/mmu_notifier.h> #include <linux/tracepoint.h> #include <linux/cpumask.h> #include <linux/irq_work.h> #include <linux/irq.h> #include <linux/workqueue.h> #include <linux/kvm.h> #include <linux/kvm_para.h> #include <linux/kvm_types.h> #include <linux/perf_event.h> #include <linux/pvclock_gtod.h> #include <linux/clocksource.h> #include <linux/irqbypass.h> #include <linux/kfifo.h> #include <linux/sched/vhost_task.h> #include <linux/call_once.h> #include <linux/atomic.h> #include <asm/apic.h> #include <asm/pvclock-abi.h> #include <asm/desc.h> #include <asm/mtrr.h> #include <asm/msr-index.h> #include <asm/asm.h> #include <asm/kvm_page_track.h> #include <asm/kvm_vcpu_regs.h> #include <asm/reboot.h> #include <hyperv/hvhdk.h> #define __KVM_HAVE_ARCH_VCPU_DEBUGFS /* * CONFIG_KVM_MAX_NR_VCPUS is defined iff CONFIG_KVM!=n, provide a dummy max if * KVM is disabled (arbitrarily use the default from CONFIG_KVM_MAX_NR_VCPUS). */ #ifdef CONFIG_KVM_MAX_NR_VCPUS #define KVM_MAX_VCPUS CONFIG_KVM_MAX_NR_VCPUS #else #define KVM_MAX_VCPUS 1024 #endif /* * In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs * might be larger than the actual number of VCPUs because the * APIC ID encodes CPU topology information. * * In the worst case, we'll need less than one extra bit for the * Core ID, and less than one extra bit for the Package (Die) ID, * so ratio of 4 should be enough. */ #define KVM_VCPU_ID_RATIO 4 #define KVM_MAX_VCPU_IDS (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO) /* memory slots that are not exposed to userspace */ #define KVM_INTERNAL_MEM_SLOTS 3 #define KVM_HALT_POLL_NS_DEFAULT 200000 #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ KVM_DIRTY_LOG_INITIALLY_SET) #define KVM_BUS_LOCK_DETECTION_VALID_MODE (KVM_BUS_LOCK_DETECTION_OFF | \ KVM_BUS_LOCK_DETECTION_EXIT) #define KVM_X86_NOTIFY_VMEXIT_VALID_BITS (KVM_X86_NOTIFY_VMEXIT_ENABLED | \ KVM_X86_NOTIFY_VMEXIT_USER) /* x86-specific vcpu->requests bit members */ #define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0) #define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1) #define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2) #define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3) #define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4) #define KVM_REQ_LOAD_MMU_PGD KVM_ARCH_REQ(5) #define KVM_REQ_EVENT KVM_ARCH_REQ(6) #define KVM_REQ_APF_HALT KVM_ARCH_REQ(7) #define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8) #define KVM_REQ_NMI KVM_ARCH_REQ(9) #define KVM_REQ_PMU KVM_ARCH_REQ(10) #define KVM_REQ_PMI KVM_ARCH_REQ(11) #ifdef CONFIG_KVM_SMM #define KVM_REQ_SMI KVM_ARCH_REQ(12) #endif #define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13) #define KVM_REQ_MCLOCK_INPROGRESS \ KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_SCAN_IOAPIC \ KVM_ARCH_REQ_FLAGS(15, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_GLOBAL_CLOCK_UPDATE KVM_ARCH_REQ(16) #define KVM_REQ_APIC_PAGE_RELOAD \ KVM_ARCH_REQ_FLAGS(17, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_HV_CRASH KVM_ARCH_REQ(18) #define KVM_REQ_IOAPIC_EOI_EXIT KVM_ARCH_REQ(19) #define KVM_REQ_HV_RESET KVM_ARCH_REQ(20) #define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21) #define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22) #define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23) #define KVM_REQ_GET_NESTED_STATE_PAGES KVM_ARCH_REQ(24) #define KVM_REQ_APICV_UPDATE \ KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26) #define KVM_REQ_TLB_FLUSH_GUEST \ KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_APF_READY KVM_ARCH_REQ(28) #define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29) #define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \ KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \ KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_HV_TLB_FLUSH \ KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_UPDATE_PROTECTED_GUEST_STATE KVM_ARCH_REQ(34) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) #define CR4_RESERVED_BITS \ (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \ | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \ | X86_CR4_LAM_SUP)) #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) #define INVALID_PAGE (~(hpa_t)0) #define VALID_PAGE(x) ((x) != INVALID_PAGE) /* KVM Hugepage definitions for x86 */ #define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G #define KVM_NR_PAGE_SIZES (KVM_MAX_HUGEPAGE_LEVEL - PG_LEVEL_4K + 1) #define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9) #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) #define KVM_MEMSLOT_PAGES_TO_MMU_PAGES_RATIO 50 #define KVM_MIN_ALLOC_MMU_PAGES 64UL #define KVM_MMU_HASH_SHIFT 12 #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) #define KVM_MIN_FREE_MMU_PAGES 5 #define KVM_REFILL_PAGES 25 #define KVM_MAX_CPUID_ENTRIES 256 #define KVM_NR_VAR_MTRR 8 #define ASYNC_PF_PER_VCPU 64 enum kvm_reg { VCPU_REGS_RAX = __VCPU_REGS_RAX, VCPU_REGS_RCX = __VCPU_REGS_RCX, VCPU_REGS_RDX = __VCPU_REGS_RDX, VCPU_REGS_RBX = __VCPU_REGS_RBX, VCPU_REGS_RSP = __VCPU_REGS_RSP, VCPU_REGS_RBP = __VCPU_REGS_RBP, VCPU_REGS_RSI = __VCPU_REGS_RSI, VCPU_REGS_RDI = __VCPU_REGS_RDI, #ifdef CONFIG_X86_64 VCPU_REGS_R8 = __VCPU_REGS_R8, VCPU_REGS_R9 = __VCPU_REGS_R9, VCPU_REGS_R10 = __VCPU_REGS_R10, VCPU_REGS_R11 = __VCPU_REGS_R11, VCPU_REGS_R12 = __VCPU_REGS_R12, VCPU_REGS_R13 = __VCPU_REGS_R13, VCPU_REGS_R14 = __VCPU_REGS_R14, VCPU_REGS_R15 = __VCPU_REGS_R15, #endif VCPU_REGS_RIP, NR_VCPU_REGS, VCPU_EXREG_PDPTR = NR_VCPU_REGS, VCPU_EXREG_CR0, VCPU_EXREG_CR3, VCPU_EXREG_CR4, VCPU_EXREG_RFLAGS, VCPU_EXREG_SEGMENTS, VCPU_EXREG_EXIT_INFO_1, VCPU_EXREG_EXIT_INFO_2, }; enum { VCPU_SREG_ES, VCPU_SREG_CS, VCPU_SREG_SS, VCPU_SREG_DS, VCPU_SREG_FS, VCPU_SREG_GS, VCPU_SREG_TR, VCPU_SREG_LDTR, }; enum exit_fastpath_completion { EXIT_FASTPATH_NONE, EXIT_FASTPATH_REENTER_GUEST, EXIT_FASTPATH_EXIT_HANDLED, EXIT_FASTPATH_EXIT_USERSPACE, }; typedef enum exit_fastpath_completion fastpath_t; struct x86_emulate_ctxt; struct x86_exception; union kvm_smram; enum x86_intercept; enum x86_intercept_stage; #define KVM_NR_DB_REGS 4 #define DR6_BUS_LOCK (1 << 11) #define DR6_BD (1 << 13) #define DR6_BS (1 << 14) #define DR6_BT (1 << 15) #define DR6_RTM (1 << 16) /* * DR6_ACTIVE_LOW combines fixed-1 and active-low bits. * We can regard all the bits in DR6_FIXED_1 as active_low bits; * they will never be 0 for now, but when they are defined * in the future it will require no code change. * * DR6_ACTIVE_LOW is also used as the init/reset value for DR6. */ #define DR6_ACTIVE_LOW 0xffff0ff0 #define DR6_VOLATILE 0x0001e80f #define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE) #define DR7_BP_EN_MASK 0x000000ff #define DR7_GE (1 << 9) #define DR7_GD (1 << 13) #define DR7_FIXED_1 0x00000400 #define DR7_VOLATILE 0xffff2bff #define KVM_GUESTDBG_VALID_MASK \ (KVM_GUESTDBG_ENABLE | \ KVM_GUESTDBG_SINGLESTEP | \ KVM_GUESTDBG_USE_HW_BP | \ KVM_GUESTDBG_USE_SW_BP | \ KVM_GUESTDBG_INJECT_BP | \ KVM_GUESTDBG_INJECT_DB | \ KVM_GUESTDBG_BLOCKIRQ) #define PFERR_PRESENT_MASK BIT(0) #define PFERR_WRITE_MASK BIT(1) #define PFERR_USER_MASK BIT(2) #define PFERR_RSVD_MASK BIT(3) #define PFERR_FETCH_MASK BIT(4) #define PFERR_PK_MASK BIT(5) #define PFERR_SGX_MASK BIT(15) #define PFERR_GUEST_RMP_MASK BIT_ULL(31) #define PFERR_GUEST_FINAL_MASK BIT_ULL(32) #define PFERR_GUEST_PAGE_MASK BIT_ULL(33) #define PFERR_GUEST_ENC_MASK BIT_ULL(34) #define PFERR_GUEST_SIZEM_MASK BIT_ULL(35) #define PFERR_GUEST_VMPL_MASK BIT_ULL(36) /* * IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP checks * when emulating instructions that triggers implicit access. */ #define PFERR_IMPLICIT_ACCESS BIT_ULL(48) /* * PRIVATE_ACCESS is a KVM-defined flag us to indicate that a fault occurred * when the guest was accessing private memory. */ #define PFERR_PRIVATE_ACCESS BIT_ULL(49) #define PFERR_SYNTHETIC_MASK (PFERR_IMPLICIT_ACCESS | PFERR_PRIVATE_ACCESS) /* apic attention bits */ #define KVM_APIC_CHECK_VAPIC 0 /* * The following bit is set with PV-EOI, unset on EOI. * We detect PV-EOI changes by guest by comparing * this bit with PV-EOI in guest memory. * See the implementation in apic_update_pv_eoi. */ #define KVM_APIC_PV_EOI_PENDING 1 struct kvm_kernel_irq_routing_entry; /* * kvm_mmu_page_role tracks the properties of a shadow page (where shadow page * also includes TDP pages) to determine whether or not a page can be used in * the given MMU context. This is a subset of the overall kvm_cpu_role to * minimize the size of kvm_memory_slot.arch.gfn_write_track, i.e. allows * allocating 2 bytes per gfn instead of 4 bytes per gfn. * * Upper-level shadow pages having gptes are tracked for write-protection via * gfn_write_track. As above, gfn_write_track is a 16 bit counter, so KVM must * not create more than 2^16-1 upper-level shadow pages at a single gfn, * otherwise gfn_write_track will overflow and explosions will ensue. * * A unique shadow page (SP) for a gfn is created if and only if an existing SP * cannot be reused. The ability to reuse a SP is tracked by its role, which * incorporates various mode bits and properties of the SP. Roughly speaking, * the number of unique SPs that can theoretically be created is 2^n, where n * is the number of bits that are used to compute the role. * * But, even though there are 20 bits in the mask below, not all combinations * of modes and flags are possible: * * - invalid shadow pages are not accounted, mirror pages are not shadowed, * so the bits are effectively 18. * * - quadrant will only be used if has_4_byte_gpte=1 (non-PAE paging); * execonly and ad_disabled are only used for nested EPT which has * has_4_byte_gpte=0. Therefore, 2 bits are always unused. * * - the 4 bits of level are effectively limited to the values 2/3/4/5, * as 4k SPs are not tracked (allowed to go unsync). In addition non-PAE * paging has exactly one upper level, making level completely redundant * when has_4_byte_gpte=1. * * - on top of this, smep_andnot_wp and smap_andnot_wp are only set if * cr0_wp=0, therefore these three bits only give rise to 5 possibilities. * * Therefore, the maximum number of possible upper-level shadow pages for a * single gfn is a bit less than 2^13. */ union kvm_mmu_page_role { u32 word; struct { unsigned level:4; unsigned has_4_byte_gpte:1; unsigned quadrant:2; unsigned direct:1; unsigned access:3; unsigned invalid:1; unsigned efer_nx:1; unsigned cr0_wp:1; unsigned smep_andnot_wp:1; unsigned smap_andnot_wp:1; unsigned ad_disabled:1; unsigned guest_mode:1; unsigned passthrough:1; unsigned is_mirror:1; unsigned :4; /* * This is left at the top of the word so that * kvm_memslots_for_spte_role can extract it with a * simple shift. While there is room, give it a whole * byte so it is also faster to load it from memory. */ unsigned smm:8; }; }; /* * kvm_mmu_extended_role complements kvm_mmu_page_role, tracking properties * relevant to the current MMU configuration. When loading CR0, CR4, or EFER, * including on nested transitions, if nothing in the full role changes then * MMU re-configuration can be skipped. @valid bit is set on first usage so we * don't treat all-zero structure as valid data. * * The properties that are tracked in the extended role but not the page role * are for things that either (a) do not affect the validity of the shadow page * or (b) are indirectly reflected in the shadow page's role. For example, * CR4.PKE only affects permission checks for software walks of the guest page * tables (because KVM doesn't support Protection Keys with shadow paging), and * CR0.PG, CR4.PAE, and CR4.PSE are indirectly reflected in role.level. * * Note, SMEP and SMAP are not redundant with sm*p_andnot_wp in the page role. * If CR0.WP=1, KVM can reuse shadow pages for the guest regardless of SMEP and * SMAP, but the MMU's permission checks for software walks need to be SMEP and * SMAP aware regardless of CR0.WP. */ union kvm_mmu_extended_role { u32 word; struct { unsigned int valid:1; unsigned int execonly:1; unsigned int cr4_pse:1; unsigned int cr4_pke:1; unsigned int cr4_smap:1; unsigned int cr4_smep:1; unsigned int cr4_la57:1; unsigned int efer_lma:1; }; }; union kvm_cpu_role { u64 as_u64; struct { union kvm_mmu_page_role base; union kvm_mmu_extended_role ext; }; }; struct kvm_rmap_head { atomic_long_t val; }; struct kvm_pio_request { unsigned long linear_rip; unsigned long count; int in; int port; int size; }; #define PT64_ROOT_MAX_LEVEL 5 struct rsvd_bits_validate { u64 rsvd_bits_mask[2][PT64_ROOT_MAX_LEVEL]; u64 bad_mt_xwr; }; struct kvm_mmu_root_info { gpa_t pgd; hpa_t hpa; }; #define KVM_MMU_ROOT_INFO_INVALID \ ((struct kvm_mmu_root_info) { .pgd = INVALID_PAGE, .hpa = INVALID_PAGE }) #define KVM_MMU_NUM_PREV_ROOTS 3 #define KVM_MMU_ROOT_CURRENT BIT(0) #define KVM_MMU_ROOT_PREVIOUS(i) BIT(1+i) #define KVM_MMU_ROOTS_ALL (BIT(1 + KVM_MMU_NUM_PREV_ROOTS) - 1) #define KVM_HAVE_MMU_RWLOCK struct kvm_mmu_page; struct kvm_page_fault; /* * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit, * and 2-level 32-bit). The kvm_mmu structure abstracts the details of the * current mmu mode. */ struct kvm_mmu { unsigned long (*get_guest_pgd)(struct kvm_vcpu *vcpu); u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); int (*page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault); void (*inject_page_fault)(struct kvm_vcpu *vcpu, struct x86_exception *fault); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gpa_t gva_or_gpa, u64 access, struct x86_exception *exception); int (*sync_spte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int i); struct kvm_mmu_root_info root; hpa_t mirror_root_hpa; union kvm_cpu_role cpu_role; union kvm_mmu_page_role root_role; /* * The pkru_mask indicates if protection key checks are needed. It * consists of 16 domains indexed by page fault error code bits [4:1], * with PFEC.RSVD replaced by ACC_USER_MASK from the page tables. * Each domain has 2 bits which are ANDed with AD and WD from PKRU. */ u32 pkru_mask; struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS]; /* * Bitmap; bit set = permission fault * Byte index: page fault error code [4:1] * Bit index: pte permissions in ACC_* format */ u8 permissions[16]; u64 *pae_root; u64 *pml4_root; u64 *pml5_root; /* * check zero bits on shadow page table entries, these * bits include not only hardware reserved bits but also * the bits spte never used. */ struct rsvd_bits_validate shadow_zero_check; struct rsvd_bits_validate guest_rsvd_check; u64 pdptrs[4]; /* pae */ }; enum pmc_type { KVM_PMC_GP = 0, KVM_PMC_FIXED, }; struct kvm_pmc { enum pmc_type type; u8 idx; bool is_paused; bool intr; /* * Base value of the PMC counter, relative to the *consumed* count in * the associated perf_event. This value includes counter updates from * the perf_event and emulated_count since the last time the counter * was reprogrammed, but it is *not* the current value as seen by the * guest or userspace. * * The count is relative to the associated perf_event so that KVM * doesn't need to reprogram the perf_event every time the guest writes * to the counter. */ u64 counter; /* * PMC events triggered by KVM emulation that haven't been fully * processed, i.e. haven't undergone overflow detection. */ u64 emulated_counter; u64 eventsel; struct perf_event *perf_event; struct kvm_vcpu *vcpu; /* * only for creating or reusing perf_event, * eventsel value for general purpose counters, * ctrl value for fixed counters. */ u64 current_config; }; /* More counters may conflict with other existing Architectural MSRs */ #define KVM_MAX(a, b) ((a) >= (b) ? (a) : (b)) #define KVM_MAX_NR_INTEL_GP_COUNTERS 8 #define KVM_MAX_NR_AMD_GP_COUNTERS 6 #define KVM_MAX_NR_GP_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_GP_COUNTERS, \ KVM_MAX_NR_AMD_GP_COUNTERS) #define KVM_MAX_NR_INTEL_FIXED_COUTNERS 3 #define KVM_MAX_NR_AMD_FIXED_COUTNERS 0 #define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUTNERS, \ KVM_MAX_NR_AMD_FIXED_COUTNERS) struct kvm_pmu { u8 version; unsigned nr_arch_gp_counters; unsigned nr_arch_fixed_counters; unsigned available_event_types; u64 fixed_ctr_ctrl; u64 fixed_ctr_ctrl_rsvd; u64 global_ctrl; u64 global_status; u64 counter_bitmask[2]; u64 global_ctrl_rsvd; u64 global_status_rsvd; u64 reserved_bits; u64 raw_event_mask; struct kvm_pmc gp_counters[KVM_MAX_NR_GP_COUNTERS]; struct kvm_pmc fixed_counters[KVM_MAX_NR_FIXED_COUNTERS]; /* * Overlay the bitmap with a 64-bit atomic so that all bits can be * set in a single access, e.g. to reprogram all counters when the PMU * filter changes. */ union { DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX); atomic64_t __reprogram_pmi; }; DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX); DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX); u64 ds_area; u64 pebs_enable; u64 pebs_enable_rsvd; u64 pebs_data_cfg; u64 pebs_data_cfg_rsvd; /* * If a guest counter is cross-mapped to host counter with different * index, its PEBS capability will be temporarily disabled. * * The user should make sure that this mask is updated * after disabling interrupts and before perf_guest_get_msrs(); */ u64 host_cross_mapped_mask; /* * The gate to release perf_events not marked in * pmc_in_use only once in a vcpu time slice. */ bool need_cleanup; /* * The total number of programmed perf_events and it helps to avoid * redundant check before cleanup if guest don't use vPMU at all. */ u8 event_count; }; struct kvm_pmu_ops; enum { KVM_DEBUGREG_BP_ENABLED = 1, KVM_DEBUGREG_WONT_EXIT = 2, }; struct kvm_mtrr { u64 var[KVM_NR_VAR_MTRR * 2]; u64 fixed_64k; u64 fixed_16k[2]; u64 fixed_4k[8]; u64 deftype; }; /* Hyper-V SynIC timer */ struct kvm_vcpu_hv_stimer { struct hrtimer timer; int index; union hv_stimer_config config; u64 count; u64 exp_time; struct hv_message msg; bool msg_pending; }; /* Hyper-V synthetic interrupt controller (SynIC)*/ struct kvm_vcpu_hv_synic { u64 version; u64 control; u64 msg_page; u64 evt_page; atomic64_t sint[HV_SYNIC_SINT_COUNT]; atomic_t sint_to_gsi[HV_SYNIC_SINT_COUNT]; DECLARE_BITMAP(auto_eoi_bitmap, 256); DECLARE_BITMAP(vec_bitmap, 256); bool active; bool dont_zero_synic_pages; }; /* The maximum number of entries on the TLB flush fifo. */ #define KVM_HV_TLB_FLUSH_FIFO_SIZE (16) /* * Note: the following 'magic' entry is made up by KVM to avoid putting * anything besides GVA on the TLB flush fifo. It is theoretically possible * to observe a request to flush 4095 PFNs starting from 0xfffffffffffff000 * which will look identical. KVM's action to 'flush everything' instead of * flushing these particular addresses is, however, fully legitimate as * flushing more than requested is always OK. */ #define KVM_HV_TLB_FLUSHALL_ENTRY ((u64)-1) enum hv_tlb_flush_fifos { HV_L1_TLB_FLUSH_FIFO, HV_L2_TLB_FLUSH_FIFO, HV_NR_TLB_FLUSH_FIFOS, }; struct kvm_vcpu_hv_tlb_flush_fifo { spinlock_t write_lock; DECLARE_KFIFO(entries, u64, KVM_HV_TLB_FLUSH_FIFO_SIZE); }; /* Hyper-V per vcpu emulation context */ struct kvm_vcpu_hv { struct kvm_vcpu *vcpu; u32 vp_index; u64 hv_vapic; s64 runtime_offset; struct kvm_vcpu_hv_synic synic; struct kvm_hyperv_exit exit; struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT]; DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); bool enforce_cpuid; struct { u32 features_eax; /* HYPERV_CPUID_FEATURES.EAX */ u32 features_ebx; /* HYPERV_CPUID_FEATURES.EBX */ u32 features_edx; /* HYPERV_CPUID_FEATURES.EDX */ u32 enlightenments_eax; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */ u32 enlightenments_ebx; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EBX */ u32 syndbg_cap_eax; /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */ u32 nested_eax; /* HYPERV_CPUID_NESTED_FEATURES.EAX */ u32 nested_ebx; /* HYPERV_CPUID_NESTED_FEATURES.EBX */ } cpuid_cache; struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS]; /* Preallocated buffer for handling hypercalls passing sparse vCPU set */ u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS]; struct hv_vp_assist_page vp_assist_page; struct { u64 pa_page_gpa; u64 vm_id; u32 vp_id; } nested; }; struct kvm_hypervisor_cpuid { u32 base; u32 limit; }; #ifdef CONFIG_KVM_XEN /* Xen HVM per vcpu emulation context */ struct kvm_vcpu_xen { u64 hypercall_rip; u32 current_runstate; u8 upcall_vector; struct gfn_to_pfn_cache vcpu_info_cache; struct gfn_to_pfn_cache vcpu_time_info_cache; struct gfn_to_pfn_cache runstate_cache; struct gfn_to_pfn_cache runstate2_cache; u64 last_steal; u64 runstate_entry_time; u64 runstate_times[4]; unsigned long evtchn_pending_sel; u32 vcpu_id; /* The Xen / ACPI vCPU ID */ u32 timer_virq; u64 timer_expires; /* In guest epoch */ atomic_t timer_pending; struct hrtimer timer; int poll_evtchn; struct timer_list poll_timer; struct kvm_hypervisor_cpuid cpuid; }; #endif struct kvm_queued_exception { bool pending; bool injected; bool has_error_code; u8 vector; u32 error_code; unsigned long payload; bool has_payload; }; /* * Hardware-defined CPUID leafs that are either scattered by the kernel or are * unknown to the kernel, but need to be directly used by KVM. Note, these * word values conflict with the kernel's "bug" caps, but KVM doesn't use those. */ enum kvm_only_cpuid_leafs { CPUID_12_EAX = NCAPINTS, CPUID_7_1_EDX, CPUID_8000_0007_EDX, CPUID_8000_0022_EAX, CPUID_7_2_EDX, CPUID_24_0_EBX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, }; struct kvm_vcpu_arch { /* * rip and regs accesses must go through * kvm_{register,rip}_{read,write} functions. */ unsigned long regs[NR_VCPU_REGS]; u32 regs_avail; u32 regs_dirty; unsigned long cr0; unsigned long cr0_guest_owned_bits; unsigned long cr2; unsigned long cr3; unsigned long cr4; unsigned long cr4_guest_owned_bits; unsigned long cr4_guest_rsvd_bits; unsigned long cr8; u32 host_pkru; u32 pkru; u32 hflags; u64 efer; u64 host_debugctl; u64 apic_base; struct kvm_lapic *apic; /* kernel irqchip context */ bool load_eoi_exitmap_pending; DECLARE_BITMAP(ioapic_handled_vectors, 256); unsigned long apic_attention; int32_t apic_arb_prio; int mp_state; u64 ia32_misc_enable_msr; u64 smbase; u64 smi_count; bool at_instruction_boundary; bool tpr_access_reporting; bool xfd_no_write_intercept; u64 ia32_xss; u64 microcode_version; u64 arch_capabilities; u64 perf_capabilities; /* * Paging state of the vcpu * * If the vcpu runs in guest mode with two level paging this still saves * the paging mode of the l1 guest. This context is always used to * handle faults. */ struct kvm_mmu *mmu; /* Non-nested MMU for L1 */ struct kvm_mmu root_mmu; /* L1 MMU when running nested */ struct kvm_mmu guest_mmu; /* * Paging state of an L2 guest (used for nested npt) * * This context will save all necessary information to walk page tables * of an L2 guest. This context is only initialized for page table * walking and not for faulting since we never handle l2 page faults on * the host. */ struct kvm_mmu nested_mmu; /* * Pointer to the mmu context currently used for * gva_to_gpa translations. */ struct kvm_mmu *walk_mmu; struct kvm_mmu_memory_cache mmu_pte_list_desc_cache; struct kvm_mmu_memory_cache mmu_shadow_page_cache; struct kvm_mmu_memory_cache mmu_shadowed_info_cache; struct kvm_mmu_memory_cache mmu_page_header_cache; /* * This cache is to allocate external page table. E.g. private EPT used * by the TDX module. */ struct kvm_mmu_memory_cache mmu_external_spt_cache; /* * QEMU userspace and the guest each have their own FPU state. * In vcpu_run, we switch between the user and guest FPU contexts. * While running a VCPU, the VCPU thread will have the guest FPU * context. * * Note that while the PKRU state lives inside the fpu registers, * it is switched out separately at VMENTER and VMEXIT time. The * "guest_fpstate" state here contains the guest FPU context, with the * host PRKU bits. */ struct fpu_guest guest_fpu; u64 xcr0; u64 guest_supported_xcr0; struct kvm_pio_request pio; void *pio_data; void *sev_pio_data; unsigned sev_pio_count; u8 event_exit_inst_len; bool exception_from_userspace; /* Exceptions to be injected to the guest. */ struct kvm_queued_exception exception; /* Exception VM-Exits to be synthesized to L1. */ struct kvm_queued_exception exception_vmexit; struct kvm_queued_interrupt { bool injected; bool soft; u8 nr; } interrupt; int halt_request; /* real mode on Intel only */ int cpuid_nent; struct kvm_cpuid_entry2 *cpuid_entries; bool cpuid_dynamic_bits_dirty; bool is_amd_compatible; /* * cpu_caps holds the effective guest capabilities, i.e. the features * the vCPU is allowed to use. Typically, but not always, features can * be used by the guest if and only if both KVM and userspace want to * expose the feature to the guest. * * A common exception is for virtualization holes, i.e. when KVM can't * prevent the guest from using a feature, in which case the vCPU "has" * the feature regardless of what KVM or userspace desires. * * Note, features that don't require KVM involvement in any way are * NOT enforced/sanitized by KVM, i.e. are taken verbatim from the * guest CPUID provided by userspace. */ u32 cpu_caps[NR_KVM_CPU_CAPS]; u64 reserved_gpa_bits; int maxphyaddr; /* emulate context */ struct x86_emulate_ctxt *emulate_ctxt; bool emulate_regs_need_sync_to_vcpu; bool emulate_regs_need_sync_from_vcpu; int (*complete_userspace_io)(struct kvm_vcpu *vcpu); gpa_t time; s8 pvclock_tsc_shift; u32 pvclock_tsc_mul; unsigned int hw_tsc_khz; struct gfn_to_pfn_cache pv_time; /* set guest stopped flag in pvclock flags field */ bool pvclock_set_guest_stopped_request; struct { u8 preempted; u64 msr_val; u64 last_steal; struct gfn_to_hva_cache cache; } st; u64 l1_tsc_offset; u64 tsc_offset; /* current tsc offset */ u64 last_guest_tsc; u64 last_host_tsc; u64 tsc_offset_adjustment; u64 this_tsc_nsec; u64 this_tsc_write; u64 this_tsc_generation; bool tsc_catchup; bool tsc_always_catchup; s8 virtual_tsc_shift; u32 virtual_tsc_mult; u32 virtual_tsc_khz; s64 ia32_tsc_adjust_msr; u64 msr_ia32_power_ctl; u64 l1_tsc_scaling_ratio; u64 tsc_scaling_ratio; /* current scaling ratio */ atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ /* Number of NMIs pending injection, not including hardware vNMIs. */ unsigned int nmi_pending; bool nmi_injected; /* Trying to inject an NMI this entry */ bool smi_pending; /* SMI queued after currently running handler */ u8 handling_intr_from_guest; struct kvm_mtrr mtrr_state; u64 pat; unsigned switch_db_regs; unsigned long db[KVM_NR_DB_REGS]; unsigned long dr6; unsigned long dr7; unsigned long eff_db[KVM_NR_DB_REGS]; unsigned long guest_debug_dr7; u64 msr_platform_info; u64 msr_misc_features_enables; u64 mcg_cap; u64 mcg_status; u64 mcg_ctl; u64 mcg_ext_ctl; u64 *mce_banks; u64 *mci_ctl2_banks; /* Cache MMIO info */ u64 mmio_gva; unsigned mmio_access; gfn_t mmio_gfn; u64 mmio_gen; struct kvm_pmu pmu; /* used for guest single stepping over the given code position */ unsigned long singlestep_rip; #ifdef CONFIG_KVM_HYPERV bool hyperv_enabled; struct kvm_vcpu_hv *hyperv; #endif #ifdef CONFIG_KVM_XEN struct kvm_vcpu_xen xen; #endif cpumask_var_t wbinvd_dirty_mask; unsigned long last_retry_eip; unsigned long last_retry_addr; struct { bool halted; gfn_t gfns[ASYNC_PF_PER_VCPU]; struct gfn_to_hva_cache data; u64 msr_en_val; /* MSR_KVM_ASYNC_PF_EN */ u64 msr_int_val; /* MSR_KVM_ASYNC_PF_INT */ u16 vec; u32 id; u32 host_apf_flags; bool send_always; bool delivery_as_pf_vmexit; bool pageready_pending; } apf; /* OSVW MSRs (AMD only) */ struct { u64 length; u64 status; } osvw; struct { u64 msr_val; struct gfn_to_hva_cache data; } pv_eoi; u64 msr_kvm_poll_control; /* pv related host specific info */ struct { bool pv_unhalted; } pv; int pending_ioapic_eoi; int pending_external_vector; /* be preempted when it's in kernel-mode(cpl=0) */ bool preempted_in_kernel; /* Flush the L1 Data cache for L1TF mitigation on VMENTER */ bool l1tf_flush_l1d; /* Host CPU on which VM-entry was most recently attempted */ int last_vmentry_cpu; /* AMD MSRC001_0015 Hardware Configuration */ u64 msr_hwcr; /* pv related cpuid info */ struct { /* * value of the eax register in the KVM_CPUID_FEATURES CPUID * leaf. */ u32 features; /* * indicates whether pv emulation should be disabled if features * are not present in the guest's cpuid */ bool enforce; } pv_cpuid; /* Protected Guests */ bool guest_state_protected; bool guest_tsc_protected; /* * Set when PDPTS were loaded directly by the userspace without * reading the guest memory */ bool pdptrs_from_userspace; #if IS_ENABLED(CONFIG_HYPERV) hpa_t hv_root_tdp; #endif }; struct kvm_lpage_info { int disallow_lpage; }; struct kvm_arch_memory_slot { struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES]; struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; unsigned short *gfn_write_track; }; /* * Track the mode of the optimized logical map, as the rules for decoding the * destination vary per mode. Enabling the optimized logical map requires all * software-enabled local APIs to be in the same mode, each addressable APIC to * be mapped to only one MDA, and each MDA to map to at most one APIC. */ enum kvm_apic_logical_mode { /* All local APICs are software disabled. */ KVM_APIC_MODE_SW_DISABLED, /* All software enabled local APICs in xAPIC cluster addressing mode. */ KVM_APIC_MODE_XAPIC_CLUSTER, /* All software enabled local APICs in xAPIC flat addressing mode. */ KVM_APIC_MODE_XAPIC_FLAT, /* All software enabled local APICs in x2APIC mode. */ KVM_APIC_MODE_X2APIC, /* * Optimized map disabled, e.g. not all local APICs in the same logical * mode, same logical ID assigned to multiple APICs, etc. */ KVM_APIC_MODE_MAP_DISABLED, }; struct kvm_apic_map { struct rcu_head rcu; enum kvm_apic_logical_mode logical_mode; u32 max_apic_id; union { struct kvm_lapic *xapic_flat_map[8]; struct kvm_lapic *xapic_cluster_map[16][4]; }; struct kvm_lapic *phys_map[]; }; /* Hyper-V synthetic debugger (SynDbg)*/ struct kvm_hv_syndbg { struct { u64 control; u64 status; u64 send_page; u64 recv_page; u64 pending_page; } control; u64 options; }; /* Current state of Hyper-V TSC page clocksource */ enum hv_tsc_page_status { /* TSC page was not set up or disabled */ HV_TSC_PAGE_UNSET = 0, /* TSC page MSR was written by the guest, update pending */ HV_TSC_PAGE_GUEST_CHANGED, /* TSC page update was triggered from the host side */ HV_TSC_PAGE_HOST_CHANGED, /* TSC page was properly set up and is currently active */ HV_TSC_PAGE_SET, /* TSC page was set up with an inaccessible GPA */ HV_TSC_PAGE_BROKEN, }; #ifdef CONFIG_KVM_HYPERV /* Hyper-V emulation context */ struct kvm_hv { struct mutex hv_lock; u64 hv_guest_os_id; u64 hv_hypercall; u64 hv_tsc_page; enum hv_tsc_page_status hv_tsc_page_status; /* Hyper-v based guest crash (NT kernel bugcheck) parameters */ u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS]; u64 hv_crash_ctl; struct ms_hyperv_tsc_page tsc_ref; struct idr conn_to_evt; u64 hv_reenlightenment_control; u64 hv_tsc_emulation_control; u64 hv_tsc_emulation_status; u64 hv_invtsc_control; /* How many vCPUs have VP index != vCPU index */ atomic_t num_mismatched_vp_indexes; /* * How many SynICs use 'AutoEOI' feature * (protected by arch.apicv_update_lock) */ unsigned int synic_auto_eoi_used; struct kvm_hv_syndbg hv_syndbg; bool xsaves_xsavec_checked; }; #endif struct msr_bitmap_range { u32 flags; u32 nmsrs; u32 base; unsigned long *bitmap; }; #ifdef CONFIG_KVM_XEN /* Xen emulation context */ struct kvm_xen { struct mutex xen_lock; u32 xen_version; bool long_mode; bool runstate_update_flag; u8 upcall_vector; struct gfn_to_pfn_cache shinfo_cache; struct idr evtchn_ports; unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)]; struct kvm_xen_hvm_config hvm_config; }; #endif enum kvm_irqchip_mode { KVM_IRQCHIP_NONE, KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */ KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ }; struct kvm_x86_msr_filter { u8 count; bool default_allow:1; struct msr_bitmap_range ranges[16]; }; struct kvm_x86_pmu_event_filter { __u32 action; __u32 nevents; __u32 fixed_counter_bitmap; __u32 flags; __u32 nr_includes; __u32 nr_excludes; __u64 *includes; __u64 *excludes; __u64 events[]; }; enum kvm_apicv_inhibit { /********************************************************************/ /* INHIBITs that are relevant to both Intel's APICv and AMD's AVIC. */ /********************************************************************/ /* * APIC acceleration is disabled by a module parameter * and/or not supported in hardware. */ APICV_INHIBIT_REASON_DISABLED, /* * APIC acceleration is inhibited because AutoEOI feature is * being used by a HyperV guest. */ APICV_INHIBIT_REASON_HYPERV, /* * APIC acceleration is inhibited because the userspace didn't yet * enable the kernel/split irqchip. */ APICV_INHIBIT_REASON_ABSENT, /* APIC acceleration is inhibited because KVM_GUESTDBG_BLOCKIRQ * (out of band, debug measure of blocking all interrupts on this vCPU) * was enabled, to avoid AVIC/APICv bypassing it. */ APICV_INHIBIT_REASON_BLOCKIRQ, /* * APICv is disabled because not all vCPUs have a 1:1 mapping between * APIC ID and vCPU, _and_ KVM is not applying its x2APIC hotplug hack. */ APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED, /* * For simplicity, the APIC acceleration is inhibited * first time either APIC ID or APIC base are changed by the guest * from their reset values. */ APICV_INHIBIT_REASON_APIC_ID_MODIFIED, APICV_INHIBIT_REASON_APIC_BASE_MODIFIED, /******************************************************/ /* INHIBITs that are relevant only to the AMD's AVIC. */ /******************************************************/ /* * AVIC is inhibited on a vCPU because it runs a nested guest. * * This is needed because unlike APICv, the peers of this vCPU * cannot use the doorbell mechanism to signal interrupts via AVIC when * a vCPU runs nested. */ APICV_INHIBIT_REASON_NESTED, /* * On SVM, the wait for the IRQ window is implemented with pending vIRQ, * which cannot be injected when the AVIC is enabled, thus AVIC * is inhibited while KVM waits for IRQ window. */ APICV_INHIBIT_REASON_IRQWIN, /* * PIT (i8254) 're-inject' mode, relies on EOI intercept, * which AVIC doesn't support for edge triggered interrupts. */ APICV_INHIBIT_REASON_PIT_REINJ, /* * AVIC is disabled because SEV doesn't support it. */ APICV_INHIBIT_REASON_SEV, /* * AVIC is disabled because not all vCPUs with a valid LDR have a 1:1 * mapping between logical ID and vCPU. */ APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED, NR_APICV_INHIBIT_REASONS, }; #define __APICV_INHIBIT_REASON(reason) \ { BIT(APICV_INHIBIT_REASON_##reason), #reason } #define APICV_INHIBIT_REASONS \ __APICV_INHIBIT_REASON(DISABLED), \ __APICV_INHIBIT_REASON(HYPERV), \ __APICV_INHIBIT_REASON(ABSENT), \ __APICV_INHIBIT_REASON(BLOCKIRQ), \ __APICV_INHIBIT_REASON(PHYSICAL_ID_ALIASED), \ __APICV_INHIBIT_REASON(APIC_ID_MODIFIED), \ __APICV_INHIBIT_REASON(APIC_BASE_MODIFIED), \ __APICV_INHIBIT_REASON(NESTED), \ __APICV_INHIBIT_REASON(IRQWIN), \ __APICV_INHIBIT_REASON(PIT_REINJ), \ __APICV_INHIBIT_REASON(SEV), \ __APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED) struct kvm_arch { unsigned long n_used_mmu_pages; unsigned long n_requested_mmu_pages; unsigned long n_max_mmu_pages; unsigned int indirect_shadow_pages; u8 mmu_valid_gen; u8 vm_type; bool has_private_mem; bool has_protected_state; bool pre_fault_allowed; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; struct list_head active_mmu_pages; /* * A list of kvm_mmu_page structs that, if zapped, could possibly be * replaced by an NX huge page. A shadow page is on this list if its * existence disallows an NX huge page (nx_huge_page_disallowed is set) * and there are no other conditions that prevent a huge page, e.g. * the backing host page is huge, dirtly logging is not enabled for its * memslot, etc... Note, zapping shadow pages on this list doesn't * guarantee an NX huge page will be created in its stead, e.g. if the * guest attempts to execute from the region then KVM obviously can't * create an NX huge page (without hanging the guest). */ struct list_head possible_nx_huge_pages; #ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING struct kvm_page_track_notifier_head track_notifier_head; #endif /* * Protects marking pages unsync during page faults, as TDP MMU page * faults only take mmu_lock for read. For simplicity, the unsync * pages lock is always taken when marking pages unsync regardless of * whether mmu_lock is held for read or write. */ spinlock_t mmu_unsync_pages_lock; u64 shadow_mmio_value; #define __KVM_HAVE_ARCH_NONCOHERENT_DMA atomic_t noncoherent_dma_count; #define __KVM_HAVE_ARCH_ASSIGNED_DEVICE atomic_t assigned_device_count; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; atomic_t vapics_in_nmi_mode; struct mutex apic_map_lock; struct kvm_apic_map __rcu *apic_map; atomic_t apic_map_dirty; bool apic_access_memslot_enabled; bool apic_access_memslot_inhibited; /* Protects apicv_inhibit_reasons */ struct rw_semaphore apicv_update_lock; unsigned long apicv_inhibit_reasons; gpa_t wall_clock; bool mwait_in_guest; bool hlt_in_guest; bool pause_in_guest; bool cstate_in_guest; unsigned long irq_sources_bitmap; s64 kvmclock_offset; /* * This also protects nr_vcpus_matched_tsc which is read from a * preemption-disabled region, so it must be a raw spinlock. */ raw_spinlock_t tsc_write_lock; u64 last_tsc_nsec; u64 last_tsc_write; u32 last_tsc_khz; u64 last_tsc_offset; u64 cur_tsc_nsec; u64 cur_tsc_write; u64 cur_tsc_offset; u64 cur_tsc_generation; int nr_vcpus_matched_tsc; u32 default_tsc_khz; bool user_set_tsc; u64 apic_bus_cycle_ns; seqcount_raw_spinlock_t pvclock_sc; bool use_master_clock; u64 master_kernel_ns; u64 master_cycle_now; struct delayed_work kvmclock_update_work; struct delayed_work kvmclock_sync_work; /* reads protected by irq_srcu, writes by irq_lock */ struct hlist_head mask_notifier_list; #ifdef CONFIG_KVM_HYPERV struct kvm_hv hyperv; #endif #ifdef CONFIG_KVM_XEN struct kvm_xen xen; #endif bool backwards_tsc_observed; bool boot_vcpu_runs_old_kvmclock; u32 bsp_vcpu_id; u64 disabled_quirks; enum kvm_irqchip_mode irqchip_mode; u8 nr_reserved_ioapic_pins; bool disabled_lapic_found; bool x2apic_format; bool x2apic_broadcast_quirk_disabled; bool guest_can_read_msr_platform_info; bool exception_payload_enabled; bool triple_fault_event; bool bus_lock_detection_enabled; bool enable_pmu; u32 notify_window; u32 notify_vmexit_flags; /* * If exit_on_emulation_error is set, and the in-kernel instruction * emulator fails to emulate an instruction, allow userspace * the opportunity to look at it. */ bool exit_on_emulation_error; /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */ u32 user_space_msr_mask; struct kvm_x86_msr_filter __rcu *msr_filter; u32 hypercall_exit_enabled; /* Guest can access the SGX PROVISIONKEY. */ bool sgx_provisioning_allowed; struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter; struct vhost_task *nx_huge_page_recovery_thread; u64 nx_huge_page_last; struct once nx_once; #ifdef CONFIG_X86_64 /* The number of TDP MMU pages across all roots. */ atomic64_t tdp_mmu_pages; /* * List of struct kvm_mmu_pages being used as roots. * All struct kvm_mmu_pages in the list should have * tdp_mmu_page set. * * For reads, this list is protected by: * RCU alone or * the MMU lock in read mode + RCU or * the MMU lock in write mode * * For writes, this list is protected by tdp_mmu_pages_lock; see * below for the details. * * Roots will remain in the list until their tdp_mmu_root_count * drops to zero, at which point the thread that decremented the * count to zero should removed the root from the list and clean * it up, freeing the root after an RCU grace period. */ struct list_head tdp_mmu_roots; /* * Protects accesses to the following fields when the MMU lock * is held in read mode: * - tdp_mmu_roots (above) * - the link field of kvm_mmu_page structs used by the TDP MMU * - possible_nx_huge_pages; * - the possible_nx_huge_page_link field of kvm_mmu_page structs used * by the TDP MMU * Because the lock is only taken within the MMU lock, strictly * speaking it is redundant to acquire this lock when the thread * holds the MMU lock in write mode. However it often simplifies * the code to do so. */ spinlock_t tdp_mmu_pages_lock; #endif /* CONFIG_X86_64 */ /* * If set, at least one shadow root has been allocated. This flag * is used as one input when determining whether certain memslot * related allocations are necessary. */ bool shadow_root_allocated; #ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING /* * If set, the VM has (or had) an external write tracking user, and * thus all write tracking metadata has been allocated, even if KVM * itself isn't using write tracking. */ bool external_write_tracking_enabled; #endif #if IS_ENABLED(CONFIG_HYPERV) hpa_t hv_root_tdp; spinlock_t hv_root_tdp_lock; struct hv_partition_assist_pg *hv_pa_pg; #endif /* * VM-scope maximum vCPU ID. Used to determine the size of structures * that increase along with the maximum vCPU ID, in which case, using * the global KVM_MAX_VCPU_IDS may lead to significant memory waste. */ u32 max_vcpu_ids; bool disable_nx_huge_pages; /* * Memory caches used to allocate shadow pages when performing eager * page splitting. No need for a shadowed_info_cache since eager page * splitting only allocates direct shadow pages. * * Protected by kvm->slots_lock. */ struct kvm_mmu_memory_cache split_shadow_page_cache; struct kvm_mmu_memory_cache split_page_header_cache; /* * Memory cache used to allocate pte_list_desc structs while splitting * huge pages. In the worst case, to split one huge page, 512 * pte_list_desc structs are needed to add each lower level leaf sptep * to the rmap plus 1 to extend the parent_ptes rmap of the lower level * page table. * * Protected by kvm->slots_lock. */ #define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1) struct kvm_mmu_memory_cache split_desc_cache; gfn_t gfn_direct_bits; }; struct kvm_vm_stat { struct kvm_vm_stat_generic generic; u64 mmu_shadow_zapped; u64 mmu_pte_write; u64 mmu_pde_zapped; u64 mmu_flooded; u64 mmu_recycled; u64 mmu_cache_miss; u64 mmu_unsync; union { struct { atomic64_t pages_4k; atomic64_t pages_2m; atomic64_t pages_1g; }; atomic64_t pages[KVM_NR_PAGE_SIZES]; }; u64 nx_lpage_splits; u64 max_mmu_page_hash_collisions; u64 max_mmu_rmap_size; }; struct kvm_vcpu_stat { struct kvm_vcpu_stat_generic generic; u64 pf_taken; u64 pf_fixed; u64 pf_emulate; u64 pf_spurious; u64 pf_fast; u64 pf_mmio_spte_created; u64 pf_guest; u64 tlb_flush; u64 invlpg; u64 exits; u64 io_exits; u64 mmio_exits; u64 signal_exits; u64 irq_window_exits; u64 nmi_window_exits; u64 l1d_flush; u64 halt_exits; u64 request_irq_exits; u64 irq_exits; u64 host_state_reload; u64 fpu_reload; u64 insn_emulation; u64 insn_emulation_fail; u64 hypercalls; u64 irq_injections; u64 nmi_injections; u64 req_event; u64 nested_run; u64 directed_yield_attempted; u64 directed_yield_successful; u64 preemption_reported; u64 preemption_other; u64 guest_mode; u64 notify_window_exits; }; struct x86_instruction_info; struct msr_data { bool host_initiated; u32 index; u64 data; }; struct kvm_lapic_irq { u32 vector; u16 delivery_mode; u16 dest_mode; bool level; u16 trig_mode; u32 shorthand; u32 dest_id; bool msi_redir_hint; }; static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical) { return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL; } struct kvm_x86_ops { const char *name; int (*check_processor_compatibility)(void); int (*enable_virtualization_cpu)(void); void (*disable_virtualization_cpu)(void); cpu_emergency_virt_cb *emergency_disable_virtualization_cpu; void (*hardware_unsetup)(void); bool (*has_emulated_msr)(struct kvm *kvm, u32 index); void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu); unsigned int vm_size; int (*vm_init)(struct kvm *kvm); void (*vm_destroy)(struct kvm *kvm); /* Create, but do not attach this VCPU */ int (*vcpu_precreate)(struct kvm *kvm); int (*vcpu_create)(struct kvm_vcpu *vcpu); void (*vcpu_free)(struct kvm_vcpu *vcpu); void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event); void (*prepare_switch_to_guest)(struct kvm_vcpu *vcpu); void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); void (*vcpu_put)(struct kvm_vcpu *vcpu); void (*update_exception_bitmap)(struct kvm_vcpu *vcpu); int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); void (*get_segment)(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); int (*get_cpl)(struct kvm_vcpu *vcpu); int (*get_cpl_no_cache)(struct kvm_vcpu *vcpu); void (*set_segment)(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); bool (*is_valid_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); void (*post_set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu); void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value); void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); bool (*get_if_flag)(struct kvm_vcpu *vcpu); void (*flush_tlb_all)(struct kvm_vcpu *vcpu); void (*flush_tlb_current)(struct kvm_vcpu *vcpu); #if IS_ENABLED(CONFIG_HYPERV) int (*flush_remote_tlbs)(struct kvm *kvm); int (*flush_remote_tlbs_range)(struct kvm *kvm, gfn_t gfn, gfn_t nr_pages); #endif /* * Flush any TLB entries associated with the given GVA. * Does not need to flush GPA->HPA mappings. * Can potentially get non-canonical addresses through INVLPGs, which * the implementation may choose to ignore if appropriate. */ void (*flush_tlb_gva)(struct kvm_vcpu *vcpu, gva_t addr); /* * Flush any TLB entries created by the guest. Like tlb_flush_gva(), * does not need to flush GPA->HPA mappings. */ void (*flush_tlb_guest)(struct kvm_vcpu *vcpu); int (*vcpu_pre_run)(struct kvm_vcpu *vcpu); enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu, bool force_immediate_exit); int (*handle_exit)(struct kvm_vcpu *vcpu, enum exit_fastpath_completion exit_fastpath); int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); void (*update_emulated_instruction)(struct kvm_vcpu *vcpu); void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu); void (*patch_hypercall)(struct kvm_vcpu *vcpu, unsigned char *hypercall_addr); void (*inject_irq)(struct kvm_vcpu *vcpu, bool reinjected); void (*inject_nmi)(struct kvm_vcpu *vcpu); void (*inject_exception)(struct kvm_vcpu *vcpu); void (*cancel_injection)(struct kvm_vcpu *vcpu); int (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection); int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); /* Whether or not a virtual NMI is pending in hardware. */ bool (*is_vnmi_pending)(struct kvm_vcpu *vcpu); /* * Attempt to pend a virtual NMI in hardware. Returns %true on success * to allow using static_call_ret0 as the fallback. */ bool (*set_vnmi_pending)(struct kvm_vcpu *vcpu); void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); const bool x2apic_icr_is_split; const unsigned long required_apicv_inhibits; bool allow_apicv_in_x2apic_without_x2apic_virtualization; void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu); void (*deliver_interrupt)(struct kvm_lapic *apic, int delivery_mode, int trig_mode, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr); u8 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level); /* Update external mapping with page table link. */ int (*link_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level, void *external_spt); /* Update the external page table from spte getting set. */ int (*set_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level, kvm_pfn_t pfn_for_gfn); /* Update external page tables for page table about to be freed. */ int (*free_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level, void *external_spt); /* Update external page table from spte getting removed, and flush TLB. */ int (*remove_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level, kvm_pfn_t pfn_for_gfn); bool (*has_wbinvd_exit)(void); u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu); u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu); void (*write_tsc_offset)(struct kvm_vcpu *vcpu); void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu); /* * Retrieve somewhat arbitrary exit/entry information. Intended to * be used only from within tracepoints or error paths. */ void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason, u64 *info1, u64 *info2, u32 *intr_info, u32 *error_code); void (*get_entry_info)(struct kvm_vcpu *vcpu, u32 *intr_info, u32 *error_code); int (*check_intercept)(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage, struct x86_exception *exception); void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu); /* * Size of the CPU's dirty log buffer, i.e. VMX's PML buffer. A zero * value indicates CPU dirty logging is unsupported or disabled. */ int cpu_dirty_log_size; void (*update_cpu_dirty_logging)(struct kvm_vcpu *vcpu); const struct kvm_x86_nested_ops *nested_ops; void (*vcpu_blocking)(struct kvm_vcpu *vcpu); void (*vcpu_unblocking)(struct kvm_vcpu *vcpu); int (*pi_update_irte)(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); void (*pi_start_assignment)(struct kvm *kvm); void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu); void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu); int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, bool *expired); void (*cancel_hv_timer)(struct kvm_vcpu *vcpu); void (*setup_mce)(struct kvm_vcpu *vcpu); #ifdef CONFIG_KVM_SMM int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); int (*enter_smm)(struct kvm_vcpu *vcpu, union kvm_smram *smram); int (*leave_smm)(struct kvm_vcpu *vcpu, const union kvm_smram *smram); void (*enable_smi_window)(struct kvm_vcpu *vcpu); #endif int (*dev_get_attr)(u32 group, u64 attr, u64 *val); int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp); int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd); int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd); void (*guest_memory_reclaimed)(struct kvm *kvm); int (*get_feature_msr)(u32 msr, u64 *data); int (*check_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type, void *insn, int insn_len); bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu); void (*migrate_timers)(struct kvm_vcpu *vcpu); void (*msr_filter_changed)(struct kvm_vcpu *vcpu); int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err); void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector); /* * Returns vCPU specific APICv inhibit reasons */ unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu); gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags); void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu); int (*gmem_prepare)(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order); void (*gmem_invalidate)(kvm_pfn_t start, kvm_pfn_t end); int (*private_max_mapping_level)(struct kvm *kvm, kvm_pfn_t pfn); }; struct kvm_x86_nested_ops { void (*leave_nested)(struct kvm_vcpu *vcpu); bool (*is_exception_vmexit)(struct kvm_vcpu *vcpu, u8 vector, u32 error_code); int (*check_events)(struct kvm_vcpu *vcpu); bool (*has_events)(struct kvm_vcpu *vcpu, bool for_injection); void (*triple_fault)(struct kvm_vcpu *vcpu); int (*get_state)(struct kvm_vcpu *vcpu, struct kvm_nested_state __user *user_kvm_nested_state, unsigned user_data_size); int (*set_state)(struct kvm_vcpu *vcpu, struct kvm_nested_state __user *user_kvm_nested_state, struct kvm_nested_state *kvm_state); bool (*get_nested_state_pages)(struct kvm_vcpu *vcpu); int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa); int (*enable_evmcs)(struct kvm_vcpu *vcpu, uint16_t *vmcs_version); uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu); void (*hv_inject_synthetic_vmexit_post_tlb_flush)(struct kvm_vcpu *vcpu); }; struct kvm_x86_init_ops { int (*hardware_setup)(void); unsigned int (*handle_intel_pt_intr)(void); struct kvm_x86_ops *runtime_ops; struct kvm_pmu_ops *pmu_ops; }; struct kvm_arch_async_pf { u32 token; gfn_t gfn; unsigned long cr3; bool direct_map; u64 error_code; }; extern u32 __read_mostly kvm_nr_uret_msrs; extern bool __read_mostly allow_smaller_maxphyaddr; extern bool __read_mostly enable_apicv; extern struct kvm_x86_ops kvm_x86_ops; #define kvm_x86_call(func) static_call(kvm_x86_##func) #define kvm_pmu_call(func) static_call(kvm_x86_pmu_##func) #define KVM_X86_OP(func) \ DECLARE_STATIC_CALL(kvm_x86_##func, *(((struct kvm_x86_ops *)0)->func)); #define KVM_X86_OP_OPTIONAL KVM_X86_OP #define KVM_X86_OP_OPTIONAL_RET0 KVM_X86_OP #include <asm/kvm-x86-ops.h> int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops); void kvm_x86_vendor_exit(void); #define __KVM_HAVE_ARCH_VM_ALLOC static inline struct kvm *kvm_arch_alloc_vm(void) { return __vmalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); } #define __KVM_HAVE_ARCH_VM_FREE void kvm_arch_free_vm(struct kvm *kvm); #if IS_ENABLED(CONFIG_HYPERV) #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm) { if (kvm_x86_ops.flush_remote_tlbs && !kvm_x86_call(flush_remote_tlbs)(kvm)) return 0; else return -ENOTSUPP; } #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages) { if (!kvm_x86_ops.flush_remote_tlbs_range) return -EOPNOTSUPP; return kvm_x86_call(flush_remote_tlbs_range)(kvm, gfn, nr_pages); } #endif /* CONFIG_HYPERV */ enum kvm_intr_type { /* Values are arbitrary, but must be non-zero. */ KVM_HANDLING_IRQ = 1, KVM_HANDLING_NMI, }; /* Enable perf NMI and timer modes to work, and minimise false positives. */ #define kvm_arch_pmi_in_guest(vcpu) \ ((vcpu) && (vcpu)->arch.handling_intr_from_guest && \ (!!in_nmi() == ((vcpu)->arch.handling_intr_from_guest == KVM_HANDLING_NMI))) void __init kvm_mmu_x86_module_init(void); int kvm_mmu_vendor_module_init(void); void kvm_mmu_vendor_module_exit(void); void kvm_mmu_destroy(struct kvm_vcpu *vcpu); int kvm_mmu_create(struct kvm_vcpu *vcpu); void kvm_mmu_init_vm(struct kvm *kvm); void kvm_mmu_uninit_vm(struct kvm *kvm); void kvm_mmu_init_memslot_memory_attributes(struct kvm *kvm, struct kvm_memory_slot *slot); void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu); void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, const struct kvm_memory_slot *memslot, int start_level); void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm, const struct kvm_memory_slot *memslot, int target_level); void kvm_mmu_try_split_huge_pages(struct kvm *kvm, const struct kvm_memory_slot *memslot, u64 start, u64 end, int target_level); void kvm_mmu_recover_huge_pages(struct kvm *kvm, const struct kvm_memory_slot *memslot); void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, const struct kvm_memory_slot *memslot); void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen); void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages); void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end); int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, const void *val, int bytes); struct kvm_irq_mask_notifier { void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked); int irq; struct hlist_node link; }; void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, struct kvm_irq_mask_notifier *kimn); void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, struct kvm_irq_mask_notifier *kimn); void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, bool mask); extern bool tdp_enabled; u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); /* * EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing * userspace I/O) to indicate that the emulation context * should be reused as is, i.e. skip initialization of * emulation context, instruction fetch and decode. * * EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware. * Indicates that only select instructions (tagged with * EmulateOnUD) should be emulated (to minimize the emulator * attack surface). See also EMULTYPE_TRAP_UD_FORCED. * * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to * decode the instruction length. For use *only* by * kvm_x86_ops.skip_emulated_instruction() implementations if * EMULTYPE_COMPLETE_USER_EXIT is not set. * * EMULTYPE_ALLOW_RETRY_PF - Set when the emulator should resume the guest to * retry native execution under certain conditions, * Can only be set in conjunction with EMULTYPE_PF. * * EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was * triggered by KVM's magic "force emulation" prefix, * which is opt in via module param (off by default). * Bypasses EmulateOnUD restriction despite emulating * due to an intercepted #UD (see EMULTYPE_TRAP_UD). * Used to test the full emulator from userspace. * * EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware * backdoor emulation, which is opt in via module param. * VMware backdoor emulation handles select instructions * and reinjects the #GP for all other cases. * * EMULTYPE_PF - Set when an intercepted #PF triggers the emulation, in which case * the CR2/GPA value pass on the stack is valid. * * EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility * state and inject single-step #DBs after skipping * an instruction (after completing userspace I/O). * * EMULTYPE_WRITE_PF_TO_SP - Set when emulating an intercepted page fault that * is attempting to write a gfn that contains one or * more of the PTEs used to translate the write itself, * and the owning page table is being shadowed by KVM. * If emulation of the faulting instruction fails and * this flag is set, KVM will exit to userspace instead * of retrying emulation as KVM cannot make forward * progress. * * If emulation fails for a write to guest page tables, * KVM unprotects (zaps) the shadow page for the target * gfn and resumes the guest to retry the non-emulatable * instruction (on hardware). Unprotecting the gfn * doesn't allow forward progress for a self-changing * access because doing so also zaps the translation for * the gfn, i.e. retrying the instruction will hit a * !PRESENT fault, which results in a new shadow page * and sends KVM back to square one. */ #define EMULTYPE_NO_DECODE (1 << 0) #define EMULTYPE_TRAP_UD (1 << 1) #define EMULTYPE_SKIP (1 << 2) #define EMULTYPE_ALLOW_RETRY_PF (1 << 3) #define EMULTYPE_TRAP_UD_FORCED (1 << 4) #define EMULTYPE_VMWARE_GP (1 << 5) #define EMULTYPE_PF (1 << 6) #define EMULTYPE_COMPLETE_USER_EXIT (1 << 7) #define EMULTYPE_WRITE_PF_TO_SP (1 << 8) static inline bool kvm_can_emulate_event_vectoring(int emul_type) { return !(emul_type & EMULTYPE_PF); } int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type); int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, void *insn, int insn_len); void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, u64 *data, u8 ndata); void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu); void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa); void kvm_enable_efer_bits(u64); bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data); int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data); int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated); int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data); int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data); int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu); int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu); int kvm_emulate_as_nop(struct kvm_vcpu *vcpu); int kvm_emulate_invd(struct kvm_vcpu *vcpu); int kvm_emulate_mwait(struct kvm_vcpu *vcpu); int kvm_handle_invalid_op(struct kvm_vcpu *vcpu); int kvm_emulate_monitor(struct kvm_vcpu *vcpu); int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in); int kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu); int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu); int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); void kvm_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, int reason, bool has_error_code, u32 error_code); void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0); void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4); int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr); unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu); int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr); int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr); unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu); void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu); void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload); void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned int nr, bool has_error_code, u32 error_code); void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr); static inline int __kvm_irq_line_state(unsigned long *irq_state, int irq_source_id, int level) { /* Logical OR for level trig interrupt */ if (level) __set_bit(irq_source_id, irq_state); else __clear_bit(irq_source_id, irq_state); return !!(*irq_state); } int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level); void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); void kvm_inject_nmi(struct kvm_vcpu *vcpu); int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu); void kvm_update_dr7(struct kvm_vcpu *vcpu); bool __kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, bool always_retry); static inline bool kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa) { return __kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa, false); } void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu, ulong roots_to_free); void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu); gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception); gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception); gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception); bool kvm_apicv_activated(struct kvm *kvm); bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu); void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu); void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm, enum kvm_apicv_inhibit reason, bool set); void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm, enum kvm_apicv_inhibit reason, bool set); static inline void kvm_set_apicv_inhibit(struct kvm *kvm, enum kvm_apicv_inhibit reason) { kvm_set_or_clear_apicv_inhibit(kvm, reason, true); } static inline void kvm_clear_apicv_inhibit(struct kvm *kvm, enum kvm_apicv_inhibit reason) { kvm_set_or_clear_apicv_inhibit(kvm, reason, false); } int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len); void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, u64 addr, unsigned long roots); void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd); void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, int tdp_max_root_level, int tdp_huge_page_level); #ifdef CONFIG_KVM_PRIVATE_MEM #define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem) #else #define kvm_arch_has_private_mem(kvm) false #endif #define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state) static inline u16 kvm_read_ldt(void) { u16 ldt; asm("sldt %0" : "=g"(ldt)); return ldt; } static inline void kvm_load_ldt(u16 sel) { asm("lldt %0" : : "rm"(sel)); } #ifdef CONFIG_X86_64 static inline unsigned long read_msr(unsigned long msr) { u64 value; rdmsrl(msr, value); return value; } #endif static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) { kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); } #define TSS_IOPB_BASE_OFFSET 0x66 #define TSS_BASE_SIZE 0x68 #define TSS_IOPB_SIZE (65536 / 8) #define TSS_REDIRECTION_SIZE (256 / 8) #define RMODE_TSS_SIZE \ (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1) enum { TASK_SWITCH_CALL = 0, TASK_SWITCH_IRET = 1, TASK_SWITCH_JMP = 2, TASK_SWITCH_GATE = 3, }; #define HF_GUEST_MASK (1 << 0) /* VCPU is in guest-mode */ #ifdef CONFIG_KVM_SMM #define HF_SMM_MASK (1 << 1) #define HF_SMM_INSIDE_NMI_MASK (1 << 2) # define KVM_MAX_NR_ADDRESS_SPACES 2 /* SMM is currently unsupported for guests with private memory. */ # define kvm_arch_nr_memslot_as_ids(kvm) (kvm_arch_has_private_mem(kvm) ? 1 : 2) # define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) # define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) #else # define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, 0) #endif int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int kvm_cpu_has_extint(struct kvm_vcpu *v); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_extint(struct kvm_vcpu *v); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, unsigned long ipi_bitmap_high, u32 min, unsigned long icr, int op_64_bit); int kvm_add_user_return_msr(u32 msr); int kvm_find_user_return_msr(u32 msr); int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); static inline bool kvm_is_supported_user_return_msr(u32 msr) { return kvm_find_user_return_msr(msr) >= 0; } u64 kvm_scale_tsc(u64 tsc, u64 ratio); u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc); u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier); u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier); unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); void kvm_make_scan_ioapic_request(struct kvm *kvm); void kvm_make_scan_ioapic_request_mask(struct kvm *kvm, unsigned long *vcpu_bitmap); bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu); bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu); extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu); int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size); bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, struct kvm_vcpu **dest_vcpu); void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, struct kvm_lapic_irq *irq); static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq) { /* We can only post Fixed and LowPrio IRQs */ return (irq->delivery_mode == APIC_DM_FIXED || irq->delivery_mode == APIC_DM_LOWEST); } static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) { kvm_x86_call(vcpu_blocking)(vcpu); } static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) { kvm_x86_call(vcpu_unblocking)(vcpu); } static inline int kvm_cpu_get_apicid(int mps_cpu) { #ifdef CONFIG_X86_LOCAL_APIC return default_cpu_present_to_apicid(mps_cpu); #else WARN_ON_ONCE(1); return BAD_APICID; #endif } int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); #define KVM_CLOCK_VALID_FLAGS \ (KVM_CLOCK_TSC_STABLE | KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC) #define KVM_X86_VALID_QUIRKS \ (KVM_X86_QUIRK_LINT0_REENABLED | \ KVM_X86_QUIRK_CD_NW_CLEARED | \ KVM_X86_QUIRK_LAPIC_MMIO_HOLE | \ KVM_X86_QUIRK_OUT_7E_INC_RIP | \ KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT | \ KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \ KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS | \ KVM_X86_QUIRK_SLOT_ZAP_ALL | \ KVM_X86_QUIRK_STUFF_FEATURE_MSRS) /* * KVM previously used a u32 field in kvm_run to indicate the hypercall was * initiated from long mode. KVM now sets bit 0 to indicate long mode, but the * remaining 31 lower bits must be 0 to preserve ABI. */ #define KVM_EXIT_HYPERCALL_MBZ GENMASK_ULL(31, 1) #endif /* _ASM_X86_KVM_HOST_H */ |
3 3 3 3 2 1 3 1 2 3 3 3 3 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 | // SPDX-License-Identifier: GPL-2.0+ /* * Transport & Protocol Driver for In-System Design, Inc. ISD200 ASIC * * Current development and maintenance: * (C) 2001-2002 Björn Stenberg (bjorn@haxx.se) * * Developed with the assistance of: * (C) 2002 Alan Stern <stern@rowland.org> * * Initial work: * (C) 2000 In-System Design, Inc. (support@in-system.com) * * The ISD200 ASIC does not natively support ATA devices. The chip * does implement an interface, the ATA Command Block (ATACB) which provides * a means of passing ATA commands and ATA register accesses to a device. * * History: * * 2002-10-19: Removed the specialized transfer routines. * (Alan Stern <stern@rowland.harvard.edu>) * 2001-02-24: Removed lots of duplicate code and simplified the structure. * (bjorn@haxx.se) * 2002-01-16: Fixed endianness bug so it works on the ppc arch. * (Luc Saillard <luc@saillard.org>) * 2002-01-17: All bitfields removed. * (bjorn@haxx.se) */ /* Include files */ #include <linux/jiffies.h> #include <linux/errno.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/ata.h> #include <linux/hdreg.h> #include <linux/scatterlist.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_device.h> #include "usb.h" #include "transport.h" #include "protocol.h" #include "debug.h" #include "scsiglue.h" #define DRV_NAME "ums-isd200" MODULE_DESCRIPTION("Driver for In-System Design, Inc. ISD200 ASIC"); MODULE_AUTHOR("Björn Stenberg <bjorn@haxx.se>"); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS("USB_STORAGE"); static int isd200_Initialization(struct us_data *us); /* * The table of devices */ #define UNUSUAL_DEV(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax, \ vendorName, productName, useProtocol, useTransport, \ initFunction, flags) \ { USB_DEVICE_VER(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax), \ .driver_info = (flags) } static const struct usb_device_id isd200_usb_ids[] = { # include "unusual_isd200.h" { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, isd200_usb_ids); #undef UNUSUAL_DEV /* * The flags table */ #define UNUSUAL_DEV(idVendor, idProduct, bcdDeviceMin, bcdDeviceMax, \ vendor_name, product_name, use_protocol, use_transport, \ init_function, Flags) \ { \ .vendorName = vendor_name, \ .productName = product_name, \ .useProtocol = use_protocol, \ .useTransport = use_transport, \ .initFunction = init_function, \ } static const struct us_unusual_dev isd200_unusual_dev_list[] = { # include "unusual_isd200.h" { } /* Terminating entry */ }; #undef UNUSUAL_DEV /* Timeout defines (in Seconds) */ #define ISD200_ENUM_BSY_TIMEOUT 35 #define ISD200_ENUM_DETECT_TIMEOUT 30 #define ISD200_DEFAULT_TIMEOUT 30 /* device flags */ #define DF_ATA_DEVICE 0x0001 #define DF_MEDIA_STATUS_ENABLED 0x0002 #define DF_REMOVABLE_MEDIA 0x0004 /* capability bit definitions */ #define CAPABILITY_DMA 0x01 #define CAPABILITY_LBA 0x02 /* command_setX bit definitions */ #define COMMANDSET_REMOVABLE 0x02 #define COMMANDSET_MEDIA_STATUS 0x10 /* ATA Vendor Specific defines */ #define ATA_ADDRESS_DEVHEAD_STD 0xa0 #define ATA_ADDRESS_DEVHEAD_LBA_MODE 0x40 #define ATA_ADDRESS_DEVHEAD_SLAVE 0x10 /* Action Select bits */ #define ACTION_SELECT_0 0x01 #define ACTION_SELECT_1 0x02 #define ACTION_SELECT_2 0x04 #define ACTION_SELECT_3 0x08 #define ACTION_SELECT_4 0x10 #define ACTION_SELECT_5 0x20 #define ACTION_SELECT_6 0x40 #define ACTION_SELECT_7 0x80 /* Register Select bits */ #define REG_ALTERNATE_STATUS 0x01 #define REG_DEVICE_CONTROL 0x01 #define REG_ERROR 0x02 #define REG_FEATURES 0x02 #define REG_SECTOR_COUNT 0x04 #define REG_SECTOR_NUMBER 0x08 #define REG_CYLINDER_LOW 0x10 #define REG_CYLINDER_HIGH 0x20 #define REG_DEVICE_HEAD 0x40 #define REG_STATUS 0x80 #define REG_COMMAND 0x80 /* ATA registers offset definitions */ #define ATA_REG_ERROR_OFFSET 1 #define ATA_REG_LCYL_OFFSET 4 #define ATA_REG_HCYL_OFFSET 5 #define ATA_REG_STATUS_OFFSET 7 /* ATA error definitions not in <linux/hdreg.h> */ #define ATA_ERROR_MEDIA_CHANGE 0x20 /* ATA command definitions not in <linux/hdreg.h> */ #define ATA_COMMAND_GET_MEDIA_STATUS 0xDA #define ATA_COMMAND_MEDIA_EJECT 0xED /* ATA drive control definitions */ #define ATA_DC_DISABLE_INTERRUPTS 0x02 #define ATA_DC_RESET_CONTROLLER 0x04 #define ATA_DC_REENABLE_CONTROLLER 0x00 /* * General purpose return codes */ #define ISD200_ERROR -1 #define ISD200_GOOD 0 /* * Transport return codes */ #define ISD200_TRANSPORT_GOOD 0 /* Transport good, command good */ #define ISD200_TRANSPORT_FAILED 1 /* Transport good, command failed */ #define ISD200_TRANSPORT_ERROR 2 /* Transport bad (i.e. device dead) */ /* driver action codes */ #define ACTION_READ_STATUS 0 #define ACTION_RESET 1 #define ACTION_REENABLE 2 #define ACTION_SOFT_RESET 3 #define ACTION_ENUM 4 #define ACTION_IDENTIFY 5 /* * ata_cdb struct */ union ata_cdb { struct { unsigned char SignatureByte0; unsigned char SignatureByte1; unsigned char ActionSelect; unsigned char RegisterSelect; unsigned char TransferBlockSize; unsigned char WriteData3F6; unsigned char WriteData1F1; unsigned char WriteData1F2; unsigned char WriteData1F3; unsigned char WriteData1F4; unsigned char WriteData1F5; unsigned char WriteData1F6; unsigned char WriteData1F7; unsigned char Reserved[3]; } generic; struct { unsigned char SignatureByte0; unsigned char SignatureByte1; unsigned char ActionSelect; unsigned char RegisterSelect; unsigned char TransferBlockSize; unsigned char AlternateStatusByte; unsigned char ErrorByte; unsigned char SectorCountByte; unsigned char SectorNumberByte; unsigned char CylinderLowByte; unsigned char CylinderHighByte; unsigned char DeviceHeadByte; unsigned char StatusByte; unsigned char Reserved[3]; } read; struct { unsigned char SignatureByte0; unsigned char SignatureByte1; unsigned char ActionSelect; unsigned char RegisterSelect; unsigned char TransferBlockSize; unsigned char DeviceControlByte; unsigned char FeaturesByte; unsigned char SectorCountByte; unsigned char SectorNumberByte; unsigned char CylinderLowByte; unsigned char CylinderHighByte; unsigned char DeviceHeadByte; unsigned char CommandByte; unsigned char Reserved[3]; } write; }; /* * Inquiry data structure. This is the data returned from the target * after it receives an inquiry. * * This structure may be extended by the number of bytes specified * in the field AdditionalLength. The defined size constant only * includes fields through ProductRevisionLevel. */ /* * DeviceType field */ #define DIRECT_ACCESS_DEVICE 0x00 /* disks */ #define DEVICE_REMOVABLE 0x80 struct inquiry_data { unsigned char DeviceType; unsigned char DeviceTypeModifier; unsigned char Versions; unsigned char Format; unsigned char AdditionalLength; unsigned char Reserved[2]; unsigned char Capability; unsigned char VendorId[8]; unsigned char ProductId[16]; unsigned char ProductRevisionLevel[4]; unsigned char VendorSpecific[20]; unsigned char Reserved3[40]; } __attribute__ ((packed)); /* * INQUIRY data buffer size */ #define INQUIRYDATABUFFERSIZE 36 /* * ISD200 CONFIG data struct */ #define ATACFG_TIMING 0x0f #define ATACFG_ATAPI_RESET 0x10 #define ATACFG_MASTER 0x20 #define ATACFG_BLOCKSIZE 0xa0 #define ATACFGE_LAST_LUN 0x07 #define ATACFGE_DESC_OVERRIDE 0x08 #define ATACFGE_STATE_SUSPEND 0x10 #define ATACFGE_SKIP_BOOT 0x20 #define ATACFGE_CONF_DESC2 0x40 #define ATACFGE_INIT_STATUS 0x80 #define CFG_CAPABILITY_SRST 0x01 struct isd200_config { unsigned char EventNotification; unsigned char ExternalClock; unsigned char ATAInitTimeout; unsigned char ATAConfig; unsigned char ATAMajorCommand; unsigned char ATAMinorCommand; unsigned char ATAExtraConfig; unsigned char Capability; }__attribute__ ((packed)); /* * ISD200 driver information struct */ struct isd200_info { struct inquiry_data InquiryData; u16 *id; struct isd200_config ConfigData; unsigned char *RegsBuf; unsigned char ATARegs[8]; unsigned char DeviceHead; unsigned char DeviceFlags; /* maximum number of LUNs supported */ unsigned char MaxLUNs; unsigned char cmnd[MAX_COMMAND_SIZE]; struct scsi_cmnd srb; struct scatterlist sg; }; /* * Read Capacity Data - returned in Big Endian format */ struct read_capacity_data { __be32 LogicalBlockAddress; __be32 BytesPerBlock; }; /* * Read Block Limits Data - returned in Big Endian format * This structure returns the maximum and minimum block * size for a TAPE device. */ struct read_block_limits { unsigned char Reserved; unsigned char BlockMaximumSize[3]; unsigned char BlockMinimumSize[2]; }; /* * Sense Data Format */ #define SENSE_ERRCODE 0x7f #define SENSE_ERRCODE_VALID 0x80 #define SENSE_FLAG_SENSE_KEY 0x0f #define SENSE_FLAG_BAD_LENGTH 0x20 #define SENSE_FLAG_END_OF_MEDIA 0x40 #define SENSE_FLAG_FILE_MARK 0x80 struct sense_data { unsigned char ErrorCode; unsigned char SegmentNumber; unsigned char Flags; unsigned char Information[4]; unsigned char AdditionalSenseLength; unsigned char CommandSpecificInformation[4]; unsigned char AdditionalSenseCode; unsigned char AdditionalSenseCodeQualifier; unsigned char FieldReplaceableUnitCode; unsigned char SenseKeySpecific[3]; } __attribute__ ((packed)); /* * Default request sense buffer size */ #define SENSE_BUFFER_SIZE 18 /*********************************************************************** * Helper routines ***********************************************************************/ /************************************************************************** * isd200_build_sense * * Builds an artificial sense buffer to report the results of a * failed command. * * RETURNS: * void */ static void isd200_build_sense(struct us_data *us, struct scsi_cmnd *srb) { struct isd200_info *info = (struct isd200_info *)us->extra; struct sense_data *buf = (struct sense_data *) &srb->sense_buffer[0]; unsigned char error = info->ATARegs[ATA_REG_ERROR_OFFSET]; if(error & ATA_ERROR_MEDIA_CHANGE) { buf->ErrorCode = 0x70 | SENSE_ERRCODE_VALID; buf->AdditionalSenseLength = 0xb; buf->Flags = UNIT_ATTENTION; buf->AdditionalSenseCode = 0; buf->AdditionalSenseCodeQualifier = 0; } else if (error & ATA_MCR) { buf->ErrorCode = 0x70 | SENSE_ERRCODE_VALID; buf->AdditionalSenseLength = 0xb; buf->Flags = UNIT_ATTENTION; buf->AdditionalSenseCode = 0; buf->AdditionalSenseCodeQualifier = 0; } else if (error & ATA_TRK0NF) { buf->ErrorCode = 0x70 | SENSE_ERRCODE_VALID; buf->AdditionalSenseLength = 0xb; buf->Flags = NOT_READY; buf->AdditionalSenseCode = 0; buf->AdditionalSenseCodeQualifier = 0; } else if (error & ATA_UNC) { buf->ErrorCode = 0x70 | SENSE_ERRCODE_VALID; buf->AdditionalSenseLength = 0xb; buf->Flags = DATA_PROTECT; buf->AdditionalSenseCode = 0; buf->AdditionalSenseCodeQualifier = 0; } else { buf->ErrorCode = 0; buf->AdditionalSenseLength = 0; buf->Flags = 0; buf->AdditionalSenseCode = 0; buf->AdditionalSenseCodeQualifier = 0; } } /*********************************************************************** * Transport routines ***********************************************************************/ /************************************************************************** * isd200_set_srb(), isd200_srb_set_bufflen() * * Two helpers to facilitate in initialization of scsi_cmnd structure * Will need to change when struct scsi_cmnd changes */ static void isd200_set_srb(struct isd200_info *info, enum dma_data_direction dir, void* buff, unsigned bufflen) { struct scsi_cmnd *srb = &info->srb; if (buff) sg_init_one(&info->sg, buff, bufflen); srb->sc_data_direction = dir; srb->sdb.table.sgl = buff ? &info->sg : NULL; srb->sdb.length = bufflen; srb->sdb.table.nents = buff ? 1 : 0; } static void isd200_srb_set_bufflen(struct scsi_cmnd *srb, unsigned bufflen) { srb->sdb.length = bufflen; } /************************************************************************** * isd200_action * * Routine for sending commands to the isd200 * * RETURNS: * ISD status code */ static int isd200_action( struct us_data *us, int action, void* pointer, int value ) { union ata_cdb ata; /* static to prevent this large struct being placed on the valuable stack */ static struct scsi_device srb_dev; struct isd200_info *info = (struct isd200_info *)us->extra; struct scsi_cmnd *srb = &info->srb; int status; memset(&ata, 0, sizeof(ata)); memcpy(srb->cmnd, info->cmnd, MAX_COMMAND_SIZE); srb->device = &srb_dev; ata.generic.SignatureByte0 = info->ConfigData.ATAMajorCommand; ata.generic.SignatureByte1 = info->ConfigData.ATAMinorCommand; ata.generic.TransferBlockSize = 1; switch ( action ) { case ACTION_READ_STATUS: usb_stor_dbg(us, " isd200_action(READ_STATUS)\n"); ata.generic.ActionSelect = ACTION_SELECT_0|ACTION_SELECT_2; ata.generic.RegisterSelect = REG_CYLINDER_LOW | REG_CYLINDER_HIGH | REG_STATUS | REG_ERROR; isd200_set_srb(info, DMA_FROM_DEVICE, pointer, value); break; case ACTION_ENUM: usb_stor_dbg(us, " isd200_action(ENUM,0x%02x)\n", value); ata.generic.ActionSelect = ACTION_SELECT_1|ACTION_SELECT_2| ACTION_SELECT_3|ACTION_SELECT_4| ACTION_SELECT_5; ata.generic.RegisterSelect = REG_DEVICE_HEAD; ata.write.DeviceHeadByte = value; isd200_set_srb(info, DMA_NONE, NULL, 0); break; case ACTION_RESET: usb_stor_dbg(us, " isd200_action(RESET)\n"); ata.generic.ActionSelect = ACTION_SELECT_1|ACTION_SELECT_2| ACTION_SELECT_3|ACTION_SELECT_4; ata.generic.RegisterSelect = REG_DEVICE_CONTROL; ata.write.DeviceControlByte = ATA_DC_RESET_CONTROLLER; isd200_set_srb(info, DMA_NONE, NULL, 0); break; case ACTION_REENABLE: usb_stor_dbg(us, " isd200_action(REENABLE)\n"); ata.generic.ActionSelect = ACTION_SELECT_1|ACTION_SELECT_2| ACTION_SELECT_3|ACTION_SELECT_4; ata.generic.RegisterSelect = REG_DEVICE_CONTROL; ata.write.DeviceControlByte = ATA_DC_REENABLE_CONTROLLER; isd200_set_srb(info, DMA_NONE, NULL, 0); break; case ACTION_SOFT_RESET: usb_stor_dbg(us, " isd200_action(SOFT_RESET)\n"); ata.generic.ActionSelect = ACTION_SELECT_1|ACTION_SELECT_5; ata.generic.RegisterSelect = REG_DEVICE_HEAD | REG_COMMAND; ata.write.DeviceHeadByte = info->DeviceHead; ata.write.CommandByte = ATA_CMD_DEV_RESET; isd200_set_srb(info, DMA_NONE, NULL, 0); break; case ACTION_IDENTIFY: usb_stor_dbg(us, " isd200_action(IDENTIFY)\n"); ata.generic.RegisterSelect = REG_COMMAND; ata.write.CommandByte = ATA_CMD_ID_ATA; isd200_set_srb(info, DMA_FROM_DEVICE, info->id, ATA_ID_WORDS * 2); break; default: usb_stor_dbg(us, "Error: Undefined action %d\n", action); return ISD200_ERROR; } memcpy(srb->cmnd, &ata, sizeof(ata.generic)); srb->cmd_len = sizeof(ata.generic); status = usb_stor_Bulk_transport(srb, us); if (status == USB_STOR_TRANSPORT_GOOD) status = ISD200_GOOD; else { usb_stor_dbg(us, " isd200_action(0x%02x) error: %d\n", action, status); status = ISD200_ERROR; /* need to reset device here */ } return status; } /************************************************************************** * isd200_read_regs * * Read ATA Registers * * RETURNS: * ISD status code */ static int isd200_read_regs( struct us_data *us ) { struct isd200_info *info = (struct isd200_info *)us->extra; int retStatus = ISD200_GOOD; int transferStatus; usb_stor_dbg(us, "Entering isd200_IssueATAReadRegs\n"); transferStatus = isd200_action( us, ACTION_READ_STATUS, info->RegsBuf, sizeof(info->ATARegs) ); if (transferStatus != ISD200_TRANSPORT_GOOD) { usb_stor_dbg(us, " Error reading ATA registers\n"); retStatus = ISD200_ERROR; } else { memcpy(info->ATARegs, info->RegsBuf, sizeof(info->ATARegs)); usb_stor_dbg(us, " Got ATA Register[ATA_REG_ERROR_OFFSET] = 0x%x\n", info->ATARegs[ATA_REG_ERROR_OFFSET]); } return retStatus; } /************************************************************************** * Invoke the transport and basic error-handling/recovery methods * * This is used by the protocol layers to actually send the message to * the device and receive the response. */ static void isd200_invoke_transport( struct us_data *us, struct scsi_cmnd *srb, union ata_cdb *ataCdb ) { int need_auto_sense = 0; int transferStatus; int result; /* send the command to the transport layer */ memcpy(srb->cmnd, ataCdb, sizeof(ataCdb->generic)); srb->cmd_len = sizeof(ataCdb->generic); transferStatus = usb_stor_Bulk_transport(srb, us); /* * if the command gets aborted by the higher layers, we need to * short-circuit all other processing */ if (test_bit(US_FLIDX_TIMED_OUT, &us->dflags)) { usb_stor_dbg(us, "-- command was aborted\n"); goto Handle_Abort; } switch (transferStatus) { case USB_STOR_TRANSPORT_GOOD: /* Indicate a good result */ srb->result = SAM_STAT_GOOD; break; case USB_STOR_TRANSPORT_NO_SENSE: usb_stor_dbg(us, "-- transport indicates protocol failure\n"); srb->result = SAM_STAT_CHECK_CONDITION; return; case USB_STOR_TRANSPORT_FAILED: usb_stor_dbg(us, "-- transport indicates command failure\n"); need_auto_sense = 1; break; case USB_STOR_TRANSPORT_ERROR: usb_stor_dbg(us, "-- transport indicates transport error\n"); srb->result = DID_ERROR << 16; /* Need reset here */ return; default: usb_stor_dbg(us, "-- transport indicates unknown error\n"); srb->result = DID_ERROR << 16; /* Need reset here */ return; } if ((scsi_get_resid(srb) > 0) && !((srb->cmnd[0] == REQUEST_SENSE) || (srb->cmnd[0] == INQUIRY) || (srb->cmnd[0] == MODE_SENSE) || (srb->cmnd[0] == LOG_SENSE) || (srb->cmnd[0] == MODE_SENSE_10))) { usb_stor_dbg(us, "-- unexpectedly short transfer\n"); need_auto_sense = 1; } if (need_auto_sense) { result = isd200_read_regs(us); if (test_bit(US_FLIDX_TIMED_OUT, &us->dflags)) { usb_stor_dbg(us, "-- auto-sense aborted\n"); goto Handle_Abort; } if (result == ISD200_GOOD) { isd200_build_sense(us, srb); srb->result = SAM_STAT_CHECK_CONDITION; /* If things are really okay, then let's show that */ if ((srb->sense_buffer[2] & 0xf) == 0x0) srb->result = SAM_STAT_GOOD; } else { srb->result = DID_ERROR << 16; /* Need reset here */ } } /* * Regardless of auto-sense, if we _know_ we have an error * condition, show that in the result code */ if (transferStatus == USB_STOR_TRANSPORT_FAILED) srb->result = SAM_STAT_CHECK_CONDITION; return; /* * abort processing: the bulk-only transport requires a reset * following an abort */ Handle_Abort: srb->result = DID_ABORT << 16; /* permit the reset transfer to take place */ clear_bit(US_FLIDX_ABORTING, &us->dflags); /* Need reset here */ } #ifdef CONFIG_USB_STORAGE_DEBUG static void isd200_log_config(struct us_data *us, struct isd200_info *info) { usb_stor_dbg(us, " Event Notification: 0x%x\n", info->ConfigData.EventNotification); usb_stor_dbg(us, " External Clock: 0x%x\n", info->ConfigData.ExternalClock); usb_stor_dbg(us, " ATA Init Timeout: 0x%x\n", info->ConfigData.ATAInitTimeout); usb_stor_dbg(us, " ATAPI Command Block Size: 0x%x\n", (info->ConfigData.ATAConfig & ATACFG_BLOCKSIZE) >> 6); usb_stor_dbg(us, " Master/Slave Selection: 0x%x\n", info->ConfigData.ATAConfig & ATACFG_MASTER); usb_stor_dbg(us, " ATAPI Reset: 0x%x\n", info->ConfigData.ATAConfig & ATACFG_ATAPI_RESET); usb_stor_dbg(us, " ATA Timing: 0x%x\n", info->ConfigData.ATAConfig & ATACFG_TIMING); usb_stor_dbg(us, " ATA Major Command: 0x%x\n", info->ConfigData.ATAMajorCommand); usb_stor_dbg(us, " ATA Minor Command: 0x%x\n", info->ConfigData.ATAMinorCommand); usb_stor_dbg(us, " Init Status: 0x%x\n", info->ConfigData.ATAExtraConfig & ATACFGE_INIT_STATUS); usb_stor_dbg(us, " Config Descriptor 2: 0x%x\n", info->ConfigData.ATAExtraConfig & ATACFGE_CONF_DESC2); usb_stor_dbg(us, " Skip Device Boot: 0x%x\n", info->ConfigData.ATAExtraConfig & ATACFGE_SKIP_BOOT); usb_stor_dbg(us, " ATA 3 State Suspend: 0x%x\n", info->ConfigData.ATAExtraConfig & ATACFGE_STATE_SUSPEND); usb_stor_dbg(us, " Descriptor Override: 0x%x\n", info->ConfigData.ATAExtraConfig & ATACFGE_DESC_OVERRIDE); usb_stor_dbg(us, " Last LUN Identifier: 0x%x\n", info->ConfigData.ATAExtraConfig & ATACFGE_LAST_LUN); usb_stor_dbg(us, " SRST Enable: 0x%x\n", info->ConfigData.ATAExtraConfig & CFG_CAPABILITY_SRST); } #endif /************************************************************************** * isd200_write_config * * Write the ISD200 Configuration data * * RETURNS: * ISD status code */ static int isd200_write_config( struct us_data *us ) { struct isd200_info *info = (struct isd200_info *)us->extra; int retStatus = ISD200_GOOD; int result; #ifdef CONFIG_USB_STORAGE_DEBUG usb_stor_dbg(us, "Entering isd200_write_config\n"); usb_stor_dbg(us, " Writing the following ISD200 Config Data:\n"); isd200_log_config(us, info); #endif /* let's send the command via the control pipe */ result = usb_stor_ctrl_transfer( us, us->send_ctrl_pipe, 0x01, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, 0x0000, 0x0002, (void *) &info->ConfigData, sizeof(info->ConfigData)); if (result >= 0) { usb_stor_dbg(us, " ISD200 Config Data was written successfully\n"); } else { usb_stor_dbg(us, " Request to write ISD200 Config Data failed!\n"); retStatus = ISD200_ERROR; } usb_stor_dbg(us, "Leaving isd200_write_config %08X\n", retStatus); return retStatus; } /************************************************************************** * isd200_read_config * * Reads the ISD200 Configuration data * * RETURNS: * ISD status code */ static int isd200_read_config( struct us_data *us ) { struct isd200_info *info = (struct isd200_info *)us->extra; int retStatus = ISD200_GOOD; int result; usb_stor_dbg(us, "Entering isd200_read_config\n"); /* read the configuration information from ISD200. Use this to */ /* determine what the special ATA CDB bytes are. */ result = usb_stor_ctrl_transfer( us, us->recv_ctrl_pipe, 0x02, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, 0x0000, 0x0002, (void *) &info->ConfigData, sizeof(info->ConfigData)); if (result >= 0) { usb_stor_dbg(us, " Retrieved the following ISD200 Config Data:\n"); #ifdef CONFIG_USB_STORAGE_DEBUG isd200_log_config(us, info); #endif } else { usb_stor_dbg(us, " Request to get ISD200 Config Data failed!\n"); retStatus = ISD200_ERROR; } usb_stor_dbg(us, "Leaving isd200_read_config %08X\n", retStatus); return retStatus; } /************************************************************************** * isd200_atapi_soft_reset * * Perform an Atapi Soft Reset on the device * * RETURNS: * NT status code */ static int isd200_atapi_soft_reset( struct us_data *us ) { int retStatus = ISD200_GOOD; int transferStatus; usb_stor_dbg(us, "Entering isd200_atapi_soft_reset\n"); transferStatus = isd200_action( us, ACTION_SOFT_RESET, NULL, 0 ); if (transferStatus != ISD200_TRANSPORT_GOOD) { usb_stor_dbg(us, " Error issuing Atapi Soft Reset\n"); retStatus = ISD200_ERROR; } usb_stor_dbg(us, "Leaving isd200_atapi_soft_reset %08X\n", retStatus); return retStatus; } /************************************************************************** * isd200_srst * * Perform an SRST on the device * * RETURNS: * ISD status code */ static int isd200_srst( struct us_data *us ) { int retStatus = ISD200_GOOD; int transferStatus; usb_stor_dbg(us, "Entering isd200_SRST\n"); transferStatus = isd200_action( us, ACTION_RESET, NULL, 0 ); /* check to see if this request failed */ if (transferStatus != ISD200_TRANSPORT_GOOD) { usb_stor_dbg(us, " Error issuing SRST\n"); retStatus = ISD200_ERROR; } else { /* delay 10ms to give the drive a chance to see it */ msleep(10); transferStatus = isd200_action( us, ACTION_REENABLE, NULL, 0 ); if (transferStatus != ISD200_TRANSPORT_GOOD) { usb_stor_dbg(us, " Error taking drive out of reset\n"); retStatus = ISD200_ERROR; } else { /* delay 50ms to give the drive a chance to recover after SRST */ msleep(50); } } usb_stor_dbg(us, "Leaving isd200_srst %08X\n", retStatus); return retStatus; } /************************************************************************** * isd200_try_enum * * Helper function for isd200_manual_enum(). Does ENUM and READ_STATUS * and tries to analyze the status registers * * RETURNS: * ISD status code */ static int isd200_try_enum(struct us_data *us, unsigned char master_slave, int detect ) { int status = ISD200_GOOD; unsigned long endTime; struct isd200_info *info = (struct isd200_info *)us->extra; unsigned char *regs = info->RegsBuf; int recheckAsMaster = 0; if ( detect ) endTime = jiffies + ISD200_ENUM_DETECT_TIMEOUT * HZ; else endTime = jiffies + ISD200_ENUM_BSY_TIMEOUT * HZ; /* loop until we detect !BSY or timeout */ while(1) { status = isd200_action( us, ACTION_ENUM, NULL, master_slave ); if ( status != ISD200_GOOD ) break; status = isd200_action( us, ACTION_READ_STATUS, regs, 8 ); if ( status != ISD200_GOOD ) break; if (!detect) { if (regs[ATA_REG_STATUS_OFFSET] & ATA_BUSY) { usb_stor_dbg(us, " %s status is still BSY, try again...\n", master_slave == ATA_ADDRESS_DEVHEAD_STD ? "Master" : "Slave"); } else { usb_stor_dbg(us, " %s status !BSY, continue with next operation\n", master_slave == ATA_ADDRESS_DEVHEAD_STD ? "Master" : "Slave"); break; } } /* check for ATA_BUSY and */ /* ATA_DF (workaround ATA Zip drive) and */ /* ATA_ERR (workaround for Archos CD-ROM) */ else if (regs[ATA_REG_STATUS_OFFSET] & (ATA_BUSY | ATA_DF | ATA_ERR)) { usb_stor_dbg(us, " Status indicates it is not ready, try again...\n"); } /* check for DRDY, ATA devices set DRDY after SRST */ else if (regs[ATA_REG_STATUS_OFFSET] & ATA_DRDY) { usb_stor_dbg(us, " Identified ATA device\n"); info->DeviceFlags |= DF_ATA_DEVICE; info->DeviceHead = master_slave; break; } /* * check Cylinder High/Low to * determine if it is an ATAPI device */ else if (regs[ATA_REG_HCYL_OFFSET] == 0xEB && regs[ATA_REG_LCYL_OFFSET] == 0x14) { /* * It seems that the RICOH * MP6200A CD/RW drive will * report itself okay as a * slave when it is really a * master. So this check again * as a master device just to * make sure it doesn't report * itself okay as a master also */ if ((master_slave & ATA_ADDRESS_DEVHEAD_SLAVE) && !recheckAsMaster) { usb_stor_dbg(us, " Identified ATAPI device as slave. Rechecking again as master\n"); recheckAsMaster = 1; master_slave = ATA_ADDRESS_DEVHEAD_STD; } else { usb_stor_dbg(us, " Identified ATAPI device\n"); info->DeviceHead = master_slave; status = isd200_atapi_soft_reset(us); break; } } else { usb_stor_dbg(us, " Not ATA, not ATAPI - Weird\n"); break; } /* check for timeout on this request */ if (time_after_eq(jiffies, endTime)) { if (!detect) usb_stor_dbg(us, " BSY check timeout, just continue with next operation...\n"); else usb_stor_dbg(us, " Device detect timeout!\n"); break; } } return status; } /************************************************************************** * isd200_manual_enum * * Determines if the drive attached is an ATA or ATAPI and if it is a * master or slave. * * RETURNS: * ISD status code */ static int isd200_manual_enum(struct us_data *us) { struct isd200_info *info = (struct isd200_info *)us->extra; int retStatus = ISD200_GOOD; usb_stor_dbg(us, "Entering isd200_manual_enum\n"); retStatus = isd200_read_config(us); if (retStatus == ISD200_GOOD) { int isslave; /* master or slave? */ retStatus = isd200_try_enum( us, ATA_ADDRESS_DEVHEAD_STD, 0); if (retStatus == ISD200_GOOD) retStatus = isd200_try_enum( us, ATA_ADDRESS_DEVHEAD_SLAVE, 0); if (retStatus == ISD200_GOOD) { retStatus = isd200_srst(us); if (retStatus == ISD200_GOOD) /* ata or atapi? */ retStatus = isd200_try_enum( us, ATA_ADDRESS_DEVHEAD_STD, 1); } isslave = (info->DeviceHead & ATA_ADDRESS_DEVHEAD_SLAVE) ? 1 : 0; if (!(info->ConfigData.ATAConfig & ATACFG_MASTER)) { usb_stor_dbg(us, " Setting Master/Slave selection to %d\n", isslave); info->ConfigData.ATAConfig &= 0x3f; info->ConfigData.ATAConfig |= (isslave<<6); retStatus = isd200_write_config(us); } } usb_stor_dbg(us, "Leaving isd200_manual_enum %08X\n", retStatus); return(retStatus); } static void isd200_fix_driveid(u16 *id) { #ifndef __LITTLE_ENDIAN # ifdef __BIG_ENDIAN int i; for (i = 0; i < ATA_ID_WORDS; i++) id[i] = __le16_to_cpu(id[i]); # else # error "Please fix <asm/byteorder.h>" # endif #endif } static void isd200_dump_driveid(struct us_data *us, u16 *id) { usb_stor_dbg(us, " Identify Data Structure:\n"); usb_stor_dbg(us, " config = 0x%x\n", id[ATA_ID_CONFIG]); usb_stor_dbg(us, " cyls = 0x%x\n", id[ATA_ID_CYLS]); usb_stor_dbg(us, " heads = 0x%x\n", id[ATA_ID_HEADS]); usb_stor_dbg(us, " track_bytes = 0x%x\n", id[4]); usb_stor_dbg(us, " sector_bytes = 0x%x\n", id[5]); usb_stor_dbg(us, " sectors = 0x%x\n", id[ATA_ID_SECTORS]); usb_stor_dbg(us, " serial_no[0] = 0x%x\n", *(char *)&id[ATA_ID_SERNO]); usb_stor_dbg(us, " buf_type = 0x%x\n", id[20]); usb_stor_dbg(us, " buf_size = 0x%x\n", id[ATA_ID_BUF_SIZE]); usb_stor_dbg(us, " ecc_bytes = 0x%x\n", id[22]); usb_stor_dbg(us, " fw_rev[0] = 0x%x\n", *(char *)&id[ATA_ID_FW_REV]); usb_stor_dbg(us, " model[0] = 0x%x\n", *(char *)&id[ATA_ID_PROD]); usb_stor_dbg(us, " max_multsect = 0x%x\n", id[ATA_ID_MAX_MULTSECT] & 0xff); usb_stor_dbg(us, " dword_io = 0x%x\n", id[ATA_ID_DWORD_IO]); usb_stor_dbg(us, " capability = 0x%x\n", id[ATA_ID_CAPABILITY] >> 8); usb_stor_dbg(us, " tPIO = 0x%x\n", id[ATA_ID_OLD_PIO_MODES] >> 8); usb_stor_dbg(us, " tDMA = 0x%x\n", id[ATA_ID_OLD_DMA_MODES] >> 8); usb_stor_dbg(us, " field_valid = 0x%x\n", id[ATA_ID_FIELD_VALID]); usb_stor_dbg(us, " cur_cyls = 0x%x\n", id[ATA_ID_CUR_CYLS]); usb_stor_dbg(us, " cur_heads = 0x%x\n", id[ATA_ID_CUR_HEADS]); usb_stor_dbg(us, " cur_sectors = 0x%x\n", id[ATA_ID_CUR_SECTORS]); usb_stor_dbg(us, " cur_capacity = 0x%x\n", ata_id_u32(id, 57)); usb_stor_dbg(us, " multsect = 0x%x\n", id[ATA_ID_MULTSECT] & 0xff); usb_stor_dbg(us, " lba_capacity = 0x%x\n", ata_id_u32(id, ATA_ID_LBA_CAPACITY)); usb_stor_dbg(us, " command_set_1 = 0x%x\n", id[ATA_ID_COMMAND_SET_1]); usb_stor_dbg(us, " command_set_2 = 0x%x\n", id[ATA_ID_COMMAND_SET_2]); } /************************************************************************** * isd200_get_inquiry_data * * Get inquiry data * * RETURNS: * ISD status code */ static int isd200_get_inquiry_data( struct us_data *us ) { struct isd200_info *info = (struct isd200_info *)us->extra; int retStatus; u16 *id = info->id; usb_stor_dbg(us, "Entering isd200_get_inquiry_data\n"); /* set default to Master */ info->DeviceHead = ATA_ADDRESS_DEVHEAD_STD; /* attempt to manually enumerate this device */ retStatus = isd200_manual_enum(us); if (retStatus == ISD200_GOOD) { int transferStatus; /* check for an ATA device */ if (info->DeviceFlags & DF_ATA_DEVICE) { /* this must be an ATA device */ /* perform an ATA Command Identify */ transferStatus = isd200_action( us, ACTION_IDENTIFY, id, ATA_ID_WORDS * 2); if (transferStatus != ISD200_TRANSPORT_GOOD) { /* Error issuing ATA Command Identify */ usb_stor_dbg(us, " Error issuing ATA Command Identify\n"); retStatus = ISD200_ERROR; } else { /* ATA Command Identify successful */ int i; __be16 *src; __u16 *dest; isd200_fix_driveid(id); isd200_dump_driveid(us, id); /* Prevent division by 0 in isd200_scsi_to_ata() */ if (id[ATA_ID_HEADS] == 0 || id[ATA_ID_SECTORS] == 0) { usb_stor_dbg(us, " Invalid ATA Identify data\n"); retStatus = ISD200_ERROR; goto Done; } memset(&info->InquiryData, 0, sizeof(info->InquiryData)); /* Standard IDE interface only supports disks */ info->InquiryData.DeviceType = DIRECT_ACCESS_DEVICE; /* The length must be at least 36 (5 + 31) */ info->InquiryData.AdditionalLength = 0x1F; if (id[ATA_ID_COMMAND_SET_1] & COMMANDSET_MEDIA_STATUS) { /* set the removable bit */ info->InquiryData.DeviceTypeModifier = DEVICE_REMOVABLE; info->DeviceFlags |= DF_REMOVABLE_MEDIA; } /* Fill in vendor identification fields */ src = (__be16 *)&id[ATA_ID_PROD]; dest = (__u16*)info->InquiryData.VendorId; for (i = 0; i < 4; i++) dest[i] = be16_to_cpu(src[i]); src = (__be16 *)&id[ATA_ID_PROD + 8/2]; dest = (__u16*)info->InquiryData.ProductId; for (i=0;i<8;i++) dest[i] = be16_to_cpu(src[i]); src = (__be16 *)&id[ATA_ID_FW_REV]; dest = (__u16*)info->InquiryData.ProductRevisionLevel; for (i=0;i<2;i++) dest[i] = be16_to_cpu(src[i]); /* determine if it supports Media Status Notification */ if (id[ATA_ID_COMMAND_SET_2] & COMMANDSET_MEDIA_STATUS) { usb_stor_dbg(us, " Device supports Media Status Notification\n"); /* * Indicate that it is enabled, even * though it is not. * This allows the lock/unlock of the * media to work correctly. */ info->DeviceFlags |= DF_MEDIA_STATUS_ENABLED; } else info->DeviceFlags &= ~DF_MEDIA_STATUS_ENABLED; } } else { /* * this must be an ATAPI device * use an ATAPI protocol (Transparent SCSI) */ us->protocol_name = "Transparent SCSI"; us->proto_handler = usb_stor_transparent_scsi_command; usb_stor_dbg(us, "Protocol changed to: %s\n", us->protocol_name); /* Free driver structure */ us->extra_destructor(info); kfree(info); us->extra = NULL; us->extra_destructor = NULL; } } Done: usb_stor_dbg(us, "Leaving isd200_get_inquiry_data %08X\n", retStatus); return(retStatus); } /************************************************************************** * isd200_scsi_to_ata * * Translate SCSI commands to ATA commands. * * RETURNS: * 1 if the command needs to be sent to the transport layer * 0 otherwise */ static int isd200_scsi_to_ata(struct scsi_cmnd *srb, struct us_data *us, union ata_cdb * ataCdb) { struct isd200_info *info = (struct isd200_info *)us->extra; u16 *id = info->id; int sendToTransport = 1; unsigned char sectnum, head; unsigned short cylinder; unsigned long lba; unsigned long blockCount; unsigned char senseData[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; memset(ataCdb, 0, sizeof(union ata_cdb)); /* SCSI Command */ switch (srb->cmnd[0]) { case INQUIRY: usb_stor_dbg(us, " ATA OUT - INQUIRY\n"); /* copy InquiryData */ usb_stor_set_xfer_buf((unsigned char *) &info->InquiryData, sizeof(info->InquiryData), srb); srb->result = SAM_STAT_GOOD; sendToTransport = 0; break; case MODE_SENSE: usb_stor_dbg(us, " ATA OUT - SCSIOP_MODE_SENSE\n"); /* Initialize the return buffer */ usb_stor_set_xfer_buf(senseData, sizeof(senseData), srb); if (info->DeviceFlags & DF_MEDIA_STATUS_ENABLED) { ataCdb->generic.SignatureByte0 = info->ConfigData.ATAMajorCommand; ataCdb->generic.SignatureByte1 = info->ConfigData.ATAMinorCommand; ataCdb->generic.TransferBlockSize = 1; ataCdb->generic.RegisterSelect = REG_COMMAND; ataCdb->write.CommandByte = ATA_COMMAND_GET_MEDIA_STATUS; isd200_srb_set_bufflen(srb, 0); } else { usb_stor_dbg(us, " Media Status not supported, just report okay\n"); srb->result = SAM_STAT_GOOD; sendToTransport = 0; } break; case TEST_UNIT_READY: usb_stor_dbg(us, " ATA OUT - SCSIOP_TEST_UNIT_READY\n"); if (info->DeviceFlags & DF_MEDIA_STATUS_ENABLED) { ataCdb->generic.SignatureByte0 = info->ConfigData.ATAMajorCommand; ataCdb->generic.SignatureByte1 = info->ConfigData.ATAMinorCommand; ataCdb->generic.TransferBlockSize = 1; ataCdb->generic.RegisterSelect = REG_COMMAND; ataCdb->write.CommandByte = ATA_COMMAND_GET_MEDIA_STATUS; isd200_srb_set_bufflen(srb, 0); } else { usb_stor_dbg(us, " Media Status not supported, just report okay\n"); srb->result = SAM_STAT_GOOD; sendToTransport = 0; } break; case READ_CAPACITY: { unsigned long capacity; struct read_capacity_data readCapacityData; usb_stor_dbg(us, " ATA OUT - SCSIOP_READ_CAPACITY\n"); if (ata_id_has_lba(id)) capacity = ata_id_u32(id, ATA_ID_LBA_CAPACITY) - 1; else capacity = (id[ATA_ID_HEADS] * id[ATA_ID_CYLS] * id[ATA_ID_SECTORS]) - 1; readCapacityData.LogicalBlockAddress = cpu_to_be32(capacity); readCapacityData.BytesPerBlock = cpu_to_be32(0x200); usb_stor_set_xfer_buf((unsigned char *) &readCapacityData, sizeof(readCapacityData), srb); srb->result = SAM_STAT_GOOD; sendToTransport = 0; } break; case READ_10: usb_stor_dbg(us, " ATA OUT - SCSIOP_READ\n"); lba = be32_to_cpu(*(__be32 *)&srb->cmnd[2]); blockCount = (unsigned long)srb->cmnd[7]<<8 | (unsigned long)srb->cmnd[8]; if (ata_id_has_lba(id)) { sectnum = (unsigned char)(lba); cylinder = (unsigned short)(lba>>8); head = ATA_ADDRESS_DEVHEAD_LBA_MODE | (unsigned char)(lba>>24 & 0x0F); } else { sectnum = (u8)((lba % id[ATA_ID_SECTORS]) + 1); cylinder = (u16)(lba / (id[ATA_ID_SECTORS] * id[ATA_ID_HEADS])); head = (u8)((lba / id[ATA_ID_SECTORS]) % id[ATA_ID_HEADS]); } ataCdb->generic.SignatureByte0 = info->ConfigData.ATAMajorCommand; ataCdb->generic.SignatureByte1 = info->ConfigData.ATAMinorCommand; ataCdb->generic.TransferBlockSize = 1; ataCdb->generic.RegisterSelect = REG_SECTOR_COUNT | REG_SECTOR_NUMBER | REG_CYLINDER_LOW | REG_CYLINDER_HIGH | REG_DEVICE_HEAD | REG_COMMAND; ataCdb->write.SectorCountByte = (unsigned char)blockCount; ataCdb->write.SectorNumberByte = sectnum; ataCdb->write.CylinderHighByte = (unsigned char)(cylinder>>8); ataCdb->write.CylinderLowByte = (unsigned char)cylinder; ataCdb->write.DeviceHeadByte = (head | ATA_ADDRESS_DEVHEAD_STD); ataCdb->write.CommandByte = ATA_CMD_PIO_READ; break; case WRITE_10: usb_stor_dbg(us, " ATA OUT - SCSIOP_WRITE\n"); lba = be32_to_cpu(*(__be32 *)&srb->cmnd[2]); blockCount = (unsigned long)srb->cmnd[7]<<8 | (unsigned long)srb->cmnd[8]; if (ata_id_has_lba(id)) { sectnum = (unsigned char)(lba); cylinder = (unsigned short)(lba>>8); head = ATA_ADDRESS_DEVHEAD_LBA_MODE | (unsigned char)(lba>>24 & 0x0F); } else { sectnum = (u8)((lba % id[ATA_ID_SECTORS]) + 1); cylinder = (u16)(lba / (id[ATA_ID_SECTORS] * id[ATA_ID_HEADS])); head = (u8)((lba / id[ATA_ID_SECTORS]) % id[ATA_ID_HEADS]); } ataCdb->generic.SignatureByte0 = info->ConfigData.ATAMajorCommand; ataCdb->generic.SignatureByte1 = info->ConfigData.ATAMinorCommand; ataCdb->generic.TransferBlockSize = 1; ataCdb->generic.RegisterSelect = REG_SECTOR_COUNT | REG_SECTOR_NUMBER | REG_CYLINDER_LOW | REG_CYLINDER_HIGH | REG_DEVICE_HEAD | REG_COMMAND; ataCdb->write.SectorCountByte = (unsigned char)blockCount; ataCdb->write.SectorNumberByte = sectnum; ataCdb->write.CylinderHighByte = (unsigned char)(cylinder>>8); ataCdb->write.CylinderLowByte = (unsigned char)cylinder; ataCdb->write.DeviceHeadByte = (head | ATA_ADDRESS_DEVHEAD_STD); ataCdb->write.CommandByte = ATA_CMD_PIO_WRITE; break; case ALLOW_MEDIUM_REMOVAL: usb_stor_dbg(us, " ATA OUT - SCSIOP_MEDIUM_REMOVAL\n"); if (info->DeviceFlags & DF_REMOVABLE_MEDIA) { usb_stor_dbg(us, " srb->cmnd[4] = 0x%X\n", srb->cmnd[4]); ataCdb->generic.SignatureByte0 = info->ConfigData.ATAMajorCommand; ataCdb->generic.SignatureByte1 = info->ConfigData.ATAMinorCommand; ataCdb->generic.TransferBlockSize = 1; ataCdb->generic.RegisterSelect = REG_COMMAND; ataCdb->write.CommandByte = (srb->cmnd[4] & 0x1) ? ATA_CMD_MEDIA_LOCK : ATA_CMD_MEDIA_UNLOCK; isd200_srb_set_bufflen(srb, 0); } else { usb_stor_dbg(us, " Not removable media, just report okay\n"); srb->result = SAM_STAT_GOOD; sendToTransport = 0; } break; case START_STOP: usb_stor_dbg(us, " ATA OUT - SCSIOP_START_STOP_UNIT\n"); usb_stor_dbg(us, " srb->cmnd[4] = 0x%X\n", srb->cmnd[4]); if ((srb->cmnd[4] & 0x3) == 0x2) { usb_stor_dbg(us, " Media Eject\n"); ataCdb->generic.SignatureByte0 = info->ConfigData.ATAMajorCommand; ataCdb->generic.SignatureByte1 = info->ConfigData.ATAMinorCommand; ataCdb->generic.TransferBlockSize = 0; ataCdb->generic.RegisterSelect = REG_COMMAND; ataCdb->write.CommandByte = ATA_COMMAND_MEDIA_EJECT; } else if ((srb->cmnd[4] & 0x3) == 0x1) { usb_stor_dbg(us, " Get Media Status\n"); ataCdb->generic.SignatureByte0 = info->ConfigData.ATAMajorCommand; ataCdb->generic.SignatureByte1 = info->ConfigData.ATAMinorCommand; ataCdb->generic.TransferBlockSize = 1; ataCdb->generic.RegisterSelect = REG_COMMAND; ataCdb->write.CommandByte = ATA_COMMAND_GET_MEDIA_STATUS; isd200_srb_set_bufflen(srb, 0); } else { usb_stor_dbg(us, " Nothing to do, just report okay\n"); srb->result = SAM_STAT_GOOD; sendToTransport = 0; } break; default: usb_stor_dbg(us, "Unsupported SCSI command - 0x%X\n", srb->cmnd[0]); srb->result = DID_ERROR << 16; sendToTransport = 0; break; } return(sendToTransport); } /************************************************************************** * isd200_free_info * * Frees the driver structure. */ static void isd200_free_info_ptrs(void *info_) { struct isd200_info *info = (struct isd200_info *) info_; if (info) { kfree(info->id); kfree(info->RegsBuf); kfree(info->srb.sense_buffer); } } /************************************************************************** * isd200_init_info * * Allocates (if necessary) and initializes the driver structure. * * RETURNS: * error status code */ static int isd200_init_info(struct us_data *us) { struct isd200_info *info; info = kzalloc(sizeof(struct isd200_info), GFP_KERNEL); if (!info) return -ENOMEM; info->id = kzalloc(ATA_ID_WORDS * 2, GFP_KERNEL); info->RegsBuf = kmalloc(sizeof(info->ATARegs), GFP_KERNEL); info->srb.sense_buffer = kmalloc(SCSI_SENSE_BUFFERSIZE, GFP_KERNEL); if (!info->id || !info->RegsBuf || !info->srb.sense_buffer) { isd200_free_info_ptrs(info); kfree(info); return -ENOMEM; } us->extra = info; us->extra_destructor = isd200_free_info_ptrs; return 0; } /************************************************************************** * Initialization for the ISD200 */ static int isd200_Initialization(struct us_data *us) { int rc = 0; usb_stor_dbg(us, "ISD200 Initialization...\n"); /* Initialize ISD200 info struct */ if (isd200_init_info(us) < 0) { usb_stor_dbg(us, "ERROR Initializing ISD200 Info struct\n"); rc = -ENOMEM; } else { /* Get device specific data */ if (isd200_get_inquiry_data(us) != ISD200_GOOD) { usb_stor_dbg(us, "ISD200 Initialization Failure\n"); rc = -EINVAL; } else { usb_stor_dbg(us, "ISD200 Initialization complete\n"); } } return rc; } /************************************************************************** * Protocol and Transport for the ISD200 ASIC * * This protocol and transport are for ATA devices connected to an ISD200 * ASIC. An ATAPI device that is connected as a slave device will be * detected in the driver initialization function and the protocol will * be changed to an ATAPI protocol (Transparent SCSI). * */ static void isd200_ata_command(struct scsi_cmnd *srb, struct us_data *us) { int sendToTransport, orig_bufflen; union ata_cdb ataCdb; /* Make sure driver was initialized */ if (us->extra == NULL) { usb_stor_dbg(us, "ERROR Driver not initialized\n"); srb->result = DID_ERROR << 16; return; } scsi_set_resid(srb, 0); /* scsi_bufflen might change in protocol translation to ata */ orig_bufflen = scsi_bufflen(srb); sendToTransport = isd200_scsi_to_ata(srb, us, &ataCdb); /* send the command to the transport layer */ if (sendToTransport) isd200_invoke_transport(us, srb, &ataCdb); isd200_srb_set_bufflen(srb, orig_bufflen); } static struct scsi_host_template isd200_host_template; static int isd200_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct us_data *us; int result; result = usb_stor_probe1(&us, intf, id, (id - isd200_usb_ids) + isd200_unusual_dev_list, &isd200_host_template); if (result) return result; us->protocol_name = "ISD200 ATA/ATAPI"; us->proto_handler = isd200_ata_command; result = usb_stor_probe2(us); return result; } static struct usb_driver isd200_driver = { .name = DRV_NAME, .probe = isd200_probe, .disconnect = usb_stor_disconnect, .suspend = usb_stor_suspend, .resume = usb_stor_resume, .reset_resume = usb_stor_reset_resume, .pre_reset = usb_stor_pre_reset, .post_reset = usb_stor_post_reset, .id_table = isd200_usb_ids, .soft_unbind = 1, .no_dynamic_id = 1, }; module_usb_stor_driver(isd200_driver, isd200_host_template, DRV_NAME); |
4 3 1 1 4 3 1 1 1 4 4 21 2 1 1 3 14 5 2 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> * * Development of this code funded by Astaro AG (http://www.astaro.com/) */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/if_arp.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables_offload.h> #include <net/netfilter/nf_tables.h> struct nft_cmp_expr { struct nft_data data; u8 sreg; u8 len; enum nft_cmp_ops op:8; }; void nft_cmp_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_cmp_expr *priv = nft_expr_priv(expr); int d; d = memcmp(®s->data[priv->sreg], &priv->data, priv->len); switch (priv->op) { case NFT_CMP_EQ: if (d != 0) goto mismatch; break; case NFT_CMP_NEQ: if (d == 0) goto mismatch; break; case NFT_CMP_LT: if (d == 0) goto mismatch; fallthrough; case NFT_CMP_LTE: if (d > 0) goto mismatch; break; case NFT_CMP_GT: if (d == 0) goto mismatch; fallthrough; case NFT_CMP_GTE: if (d < 0) goto mismatch; break; } return; mismatch: regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_cmp_policy[NFTA_CMP_MAX + 1] = { [NFTA_CMP_SREG] = { .type = NLA_U32 }, [NFTA_CMP_OP] = { .type = NLA_U32 }, [NFTA_CMP_DATA] = { .type = NLA_NESTED }, }; static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_cmp_expr *priv = nft_expr_priv(expr); struct nft_data_desc desc = { .type = NFT_DATA_VALUE, .size = sizeof(priv->data), }; int err; err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]); if (err < 0) return err; err = nft_parse_register_load(ctx, tb[NFTA_CMP_SREG], &priv->sreg, desc.len); if (err < 0) return err; priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); priv->len = desc.len; return 0; } static int nft_cmp_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_cmp_expr *priv = nft_expr_priv(expr); if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CMP_OP, htonl(priv->op))) goto nla_put_failure; if (nft_data_dump(skb, NFTA_CMP_DATA, &priv->data, NFT_DATA_VALUE, priv->len) < 0) goto nla_put_failure; return 0; nla_put_failure: return -1; } union nft_cmp_offload_data { u16 val16; u32 val32; u64 val64; }; static void nft_payload_n2h(union nft_cmp_offload_data *data, const u8 *val, u32 len) { switch (len) { case 2: data->val16 = ntohs(*((__be16 *)val)); break; case 4: data->val32 = ntohl(*((__be32 *)val)); break; case 8: data->val64 = be64_to_cpu(*((__be64 *)val)); break; default: WARN_ON_ONCE(1); break; } } static int __nft_cmp_offload(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_cmp_expr *priv) { struct nft_offload_reg *reg = &ctx->regs[priv->sreg]; union nft_cmp_offload_data _data, _datamask; u8 *mask = (u8 *)&flow->match.mask; u8 *key = (u8 *)&flow->match.key; u8 *data, *datamask; if (priv->op != NFT_CMP_EQ || priv->len > reg->len) return -EOPNOTSUPP; if (reg->flags & NFT_OFFLOAD_F_NETWORK2HOST) { nft_payload_n2h(&_data, (u8 *)&priv->data, reg->len); nft_payload_n2h(&_datamask, (u8 *)®->mask, reg->len); data = (u8 *)&_data; datamask = (u8 *)&_datamask; } else { data = (u8 *)&priv->data; datamask = (u8 *)®->mask; } memcpy(key + reg->offset, data, reg->len); memcpy(mask + reg->offset, datamask, reg->len); flow->match.dissector.used_keys |= BIT_ULL(reg->key); flow->match.dissector.offset[reg->key] = reg->base_offset; if (reg->key == FLOW_DISSECTOR_KEY_META && reg->offset == offsetof(struct nft_flow_key, meta.ingress_iftype) && nft_reg_load16(priv->data.data) != ARPHRD_ETHER) return -EOPNOTSUPP; nft_offload_update_dependency(ctx, &priv->data, reg->len); return 0; } static int nft_cmp_offload(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_expr *expr) { const struct nft_cmp_expr *priv = nft_expr_priv(expr); return __nft_cmp_offload(ctx, flow, priv); } static const struct nft_expr_ops nft_cmp_ops = { .type = &nft_cmp_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_expr)), .eval = nft_cmp_eval, .init = nft_cmp_init, .dump = nft_cmp_dump, .reduce = NFT_REDUCE_READONLY, .offload = nft_cmp_offload, }; /* Calculate the mask for the nft_cmp_fast expression. On big endian the * mask needs to include the *upper* bytes when interpreting that data as * something smaller than the full u32, therefore a cpu_to_le32 is done. */ static u32 nft_cmp_fast_mask(unsigned int len) { __le32 mask = cpu_to_le32(~0U >> (sizeof_field(struct nft_cmp_fast_expr, data) * BITS_PER_BYTE - len)); return (__force u32)mask; } static int nft_cmp_fast_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); struct nft_data data; struct nft_data_desc desc = { .type = NFT_DATA_VALUE, .size = sizeof(data), }; int err; err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]); if (err < 0) return err; err = nft_parse_register_load(ctx, tb[NFTA_CMP_SREG], &priv->sreg, desc.len); if (err < 0) return err; desc.len *= BITS_PER_BYTE; priv->mask = nft_cmp_fast_mask(desc.len); priv->data = data.data[0] & priv->mask; priv->len = desc.len; priv->inv = ntohl(nla_get_be32(tb[NFTA_CMP_OP])) != NFT_CMP_EQ; return 0; } static int nft_cmp_fast_offload(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_expr *expr) { const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); struct nft_cmp_expr cmp = { .data = { .data = { [0] = priv->data, }, }, .sreg = priv->sreg, .len = priv->len / BITS_PER_BYTE, .op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ, }; return __nft_cmp_offload(ctx, flow, &cmp); } static int nft_cmp_fast_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); enum nft_cmp_ops op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ; struct nft_data data; if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CMP_OP, htonl(op))) goto nla_put_failure; data.data[0] = priv->data; if (nft_data_dump(skb, NFTA_CMP_DATA, &data, NFT_DATA_VALUE, priv->len / BITS_PER_BYTE) < 0) goto nla_put_failure; return 0; nla_put_failure: return -1; } const struct nft_expr_ops nft_cmp_fast_ops = { .type = &nft_cmp_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp_fast_expr)), .eval = NULL, /* inlined */ .init = nft_cmp_fast_init, .dump = nft_cmp_fast_dump, .reduce = NFT_REDUCE_READONLY, .offload = nft_cmp_fast_offload, }; static u32 nft_cmp_mask(u32 bitlen) { return (__force u32)cpu_to_le32(~0U >> (sizeof(u32) * BITS_PER_BYTE - bitlen)); } static void nft_cmp16_fast_mask(struct nft_data *data, unsigned int bitlen) { int len = bitlen / BITS_PER_BYTE; int i, words = len / sizeof(u32); for (i = 0; i < words; i++) { data->data[i] = 0xffffffff; bitlen -= sizeof(u32) * BITS_PER_BYTE; } if (len % sizeof(u32)) data->data[i++] = nft_cmp_mask(bitlen); for (; i < 4; i++) data->data[i] = 0; } static int nft_cmp16_fast_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr); struct nft_data_desc desc = { .type = NFT_DATA_VALUE, .size = sizeof(priv->data), }; int err; err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]); if (err < 0) return err; err = nft_parse_register_load(ctx, tb[NFTA_CMP_SREG], &priv->sreg, desc.len); if (err < 0) return err; nft_cmp16_fast_mask(&priv->mask, desc.len * BITS_PER_BYTE); priv->inv = ntohl(nla_get_be32(tb[NFTA_CMP_OP])) != NFT_CMP_EQ; priv->len = desc.len; return 0; } static int nft_cmp16_fast_offload(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_expr *expr) { const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr); struct nft_cmp_expr cmp = { .data = priv->data, .sreg = priv->sreg, .len = priv->len, .op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ, }; return __nft_cmp_offload(ctx, flow, &cmp); } static int nft_cmp16_fast_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr); enum nft_cmp_ops op = priv->inv ? NFT_CMP_NEQ : NFT_CMP_EQ; if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CMP_OP, htonl(op))) goto nla_put_failure; if (nft_data_dump(skb, NFTA_CMP_DATA, &priv->data, NFT_DATA_VALUE, priv->len) < 0) goto nla_put_failure; return 0; nla_put_failure: return -1; } const struct nft_expr_ops nft_cmp16_fast_ops = { .type = &nft_cmp_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_cmp16_fast_expr)), .eval = NULL, /* inlined */ .init = nft_cmp16_fast_init, .dump = nft_cmp16_fast_dump, .reduce = NFT_REDUCE_READONLY, .offload = nft_cmp16_fast_offload, }; static const struct nft_expr_ops * nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_data data; struct nft_data_desc desc = { .type = NFT_DATA_VALUE, .size = sizeof(data), }; enum nft_cmp_ops op; u8 sreg; int err; if (tb[NFTA_CMP_SREG] == NULL || tb[NFTA_CMP_OP] == NULL || tb[NFTA_CMP_DATA] == NULL) return ERR_PTR(-EINVAL); op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); switch (op) { case NFT_CMP_EQ: case NFT_CMP_NEQ: case NFT_CMP_LT: case NFT_CMP_LTE: case NFT_CMP_GT: case NFT_CMP_GTE: break; default: return ERR_PTR(-EINVAL); } err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]); if (err < 0) return ERR_PTR(err); sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); if (op == NFT_CMP_EQ || op == NFT_CMP_NEQ) { if (desc.len <= sizeof(u32)) return &nft_cmp_fast_ops; else if (desc.len <= sizeof(data) && ((sreg >= NFT_REG_1 && sreg <= NFT_REG_4) || (sreg >= NFT_REG32_00 && sreg <= NFT_REG32_12 && sreg % 2 == 0))) return &nft_cmp16_fast_ops; } return &nft_cmp_ops; } struct nft_expr_type nft_cmp_type __read_mostly = { .name = "cmp", .select_ops = nft_cmp_select_ops, .policy = nft_cmp_policy, .maxattr = NFTA_CMP_MAX, .owner = THIS_MODULE, }; |
37 37 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* * Copyright (c) 2015, Sony Mobile Communications Inc. * Copyright (c) 2013, The Linux Foundation. All rights reserved. * Copyright (c) 2020, Linaro Ltd. */ #include <linux/module.h> #include <linux/qrtr.h> #include <linux/workqueue.h> #include <net/sock.h> #include "qrtr.h" #include <trace/events/sock.h> #define CREATE_TRACE_POINTS #include <trace/events/qrtr.h> static DEFINE_XARRAY(nodes); static struct { struct socket *sock; struct sockaddr_qrtr bcast_sq; struct list_head lookups; struct workqueue_struct *workqueue; struct work_struct work; int local_node; } qrtr_ns; static const char * const qrtr_ctrl_pkt_strings[] = { [QRTR_TYPE_HELLO] = "hello", [QRTR_TYPE_BYE] = "bye", [QRTR_TYPE_NEW_SERVER] = "new-server", [QRTR_TYPE_DEL_SERVER] = "del-server", [QRTR_TYPE_DEL_CLIENT] = "del-client", [QRTR_TYPE_RESUME_TX] = "resume-tx", [QRTR_TYPE_EXIT] = "exit", [QRTR_TYPE_PING] = "ping", [QRTR_TYPE_NEW_LOOKUP] = "new-lookup", [QRTR_TYPE_DEL_LOOKUP] = "del-lookup", }; struct qrtr_server_filter { unsigned int service; unsigned int instance; unsigned int ifilter; }; struct qrtr_lookup { unsigned int service; unsigned int instance; struct sockaddr_qrtr sq; struct list_head li; }; struct qrtr_server { unsigned int service; unsigned int instance; unsigned int node; unsigned int port; struct list_head qli; }; struct qrtr_node { unsigned int id; struct xarray servers; }; static struct qrtr_node *node_get(unsigned int node_id) { struct qrtr_node *node; node = xa_load(&nodes, node_id); if (node) return node; /* If node didn't exist, allocate and insert it to the tree */ node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) return NULL; node->id = node_id; xa_init(&node->servers); if (xa_store(&nodes, node_id, node, GFP_KERNEL)) { kfree(node); return NULL; } return node; } static int server_match(const struct qrtr_server *srv, const struct qrtr_server_filter *f) { unsigned int ifilter = f->ifilter; if (f->service != 0 && srv->service != f->service) return 0; if (!ifilter && f->instance) ifilter = ~0; return (srv->instance & ifilter) == f->instance; } static int service_announce_new(struct sockaddr_qrtr *dest, struct qrtr_server *srv) { struct qrtr_ctrl_pkt pkt; struct msghdr msg = { }; struct kvec iv; trace_qrtr_ns_service_announce_new(srv->service, srv->instance, srv->node, srv->port); iv.iov_base = &pkt; iv.iov_len = sizeof(pkt); memset(&pkt, 0, sizeof(pkt)); pkt.cmd = cpu_to_le32(QRTR_TYPE_NEW_SERVER); pkt.server.service = cpu_to_le32(srv->service); pkt.server.instance = cpu_to_le32(srv->instance); pkt.server.node = cpu_to_le32(srv->node); pkt.server.port = cpu_to_le32(srv->port); msg.msg_name = (struct sockaddr *)dest; msg.msg_namelen = sizeof(*dest); return kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); } static void service_announce_del(struct sockaddr_qrtr *dest, struct qrtr_server *srv) { struct qrtr_ctrl_pkt pkt; struct msghdr msg = { }; struct kvec iv; int ret; trace_qrtr_ns_service_announce_del(srv->service, srv->instance, srv->node, srv->port); iv.iov_base = &pkt; iv.iov_len = sizeof(pkt); memset(&pkt, 0, sizeof(pkt)); pkt.cmd = cpu_to_le32(QRTR_TYPE_DEL_SERVER); pkt.server.service = cpu_to_le32(srv->service); pkt.server.instance = cpu_to_le32(srv->instance); pkt.server.node = cpu_to_le32(srv->node); pkt.server.port = cpu_to_le32(srv->port); msg.msg_name = (struct sockaddr *)dest; msg.msg_namelen = sizeof(*dest); ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); if (ret < 0 && ret != -ENODEV) pr_err("failed to announce del service\n"); return; } static void lookup_notify(struct sockaddr_qrtr *to, struct qrtr_server *srv, bool new) { struct qrtr_ctrl_pkt pkt; struct msghdr msg = { }; struct kvec iv; int ret; iv.iov_base = &pkt; iv.iov_len = sizeof(pkt); memset(&pkt, 0, sizeof(pkt)); pkt.cmd = new ? cpu_to_le32(QRTR_TYPE_NEW_SERVER) : cpu_to_le32(QRTR_TYPE_DEL_SERVER); if (srv) { pkt.server.service = cpu_to_le32(srv->service); pkt.server.instance = cpu_to_le32(srv->instance); pkt.server.node = cpu_to_le32(srv->node); pkt.server.port = cpu_to_le32(srv->port); } msg.msg_name = (struct sockaddr *)to; msg.msg_namelen = sizeof(*to); ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); if (ret < 0 && ret != -ENODEV) pr_err("failed to send lookup notification\n"); } static int announce_servers(struct sockaddr_qrtr *sq) { struct qrtr_server *srv; struct qrtr_node *node; unsigned long index; int ret; node = node_get(qrtr_ns.local_node); if (!node) return 0; /* Announce the list of servers registered in this node */ xa_for_each(&node->servers, index, srv) { ret = service_announce_new(sq, srv); if (ret < 0) { if (ret == -ENODEV) continue; pr_err("failed to announce new service\n"); return ret; } } return 0; } static struct qrtr_server *server_add(unsigned int service, unsigned int instance, unsigned int node_id, unsigned int port) { struct qrtr_server *srv; struct qrtr_server *old; struct qrtr_node *node; if (!service || !port) return NULL; srv = kzalloc(sizeof(*srv), GFP_KERNEL); if (!srv) return NULL; srv->service = service; srv->instance = instance; srv->node = node_id; srv->port = port; node = node_get(node_id); if (!node) goto err; /* Delete the old server on the same port */ old = xa_store(&node->servers, port, srv, GFP_KERNEL); if (old) { if (xa_is_err(old)) { pr_err("failed to add server [0x%x:0x%x] ret:%d\n", srv->service, srv->instance, xa_err(old)); goto err; } else { kfree(old); } } trace_qrtr_ns_server_add(srv->service, srv->instance, srv->node, srv->port); return srv; err: kfree(srv); return NULL; } static int server_del(struct qrtr_node *node, unsigned int port, bool bcast) { struct qrtr_lookup *lookup; struct qrtr_server *srv; struct list_head *li; srv = xa_load(&node->servers, port); if (!srv) return -ENOENT; xa_erase(&node->servers, port); /* Broadcast the removal of local servers */ if (srv->node == qrtr_ns.local_node && bcast) service_announce_del(&qrtr_ns.bcast_sq, srv); /* Announce the service's disappearance to observers */ list_for_each(li, &qrtr_ns.lookups) { lookup = container_of(li, struct qrtr_lookup, li); if (lookup->service && lookup->service != srv->service) continue; if (lookup->instance && lookup->instance != srv->instance) continue; lookup_notify(&lookup->sq, srv, false); } kfree(srv); return 0; } static int say_hello(struct sockaddr_qrtr *dest) { struct qrtr_ctrl_pkt pkt; struct msghdr msg = { }; struct kvec iv; int ret; iv.iov_base = &pkt; iv.iov_len = sizeof(pkt); memset(&pkt, 0, sizeof(pkt)); pkt.cmd = cpu_to_le32(QRTR_TYPE_HELLO); msg.msg_name = (struct sockaddr *)dest; msg.msg_namelen = sizeof(*dest); ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); if (ret < 0) pr_err("failed to send hello msg\n"); return ret; } /* Announce the list of servers registered on the local node */ static int ctrl_cmd_hello(struct sockaddr_qrtr *sq) { int ret; ret = say_hello(sq); if (ret < 0) return ret; return announce_servers(sq); } static int ctrl_cmd_bye(struct sockaddr_qrtr *from) { struct qrtr_node *local_node; struct qrtr_ctrl_pkt pkt; struct qrtr_server *srv; struct sockaddr_qrtr sq; struct msghdr msg = { }; struct qrtr_node *node; unsigned long index; struct kvec iv; int ret; iv.iov_base = &pkt; iv.iov_len = sizeof(pkt); node = node_get(from->sq_node); if (!node) return 0; /* Advertise removal of this client to all servers of remote node */ xa_for_each(&node->servers, index, srv) server_del(node, srv->port, true); /* Advertise the removal of this client to all local servers */ local_node = node_get(qrtr_ns.local_node); if (!local_node) return 0; memset(&pkt, 0, sizeof(pkt)); pkt.cmd = cpu_to_le32(QRTR_TYPE_BYE); pkt.client.node = cpu_to_le32(from->sq_node); xa_for_each(&local_node->servers, index, srv) { sq.sq_family = AF_QIPCRTR; sq.sq_node = srv->node; sq.sq_port = srv->port; msg.msg_name = (struct sockaddr *)&sq; msg.msg_namelen = sizeof(sq); ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); if (ret < 0 && ret != -ENODEV) { pr_err("failed to send bye cmd\n"); return ret; } } return 0; } static int ctrl_cmd_del_client(struct sockaddr_qrtr *from, unsigned int node_id, unsigned int port) { struct qrtr_node *local_node; struct qrtr_lookup *lookup; struct qrtr_ctrl_pkt pkt; struct msghdr msg = { }; struct qrtr_server *srv; struct sockaddr_qrtr sq; struct qrtr_node *node; struct list_head *tmp; struct list_head *li; unsigned long index; struct kvec iv; int ret; iv.iov_base = &pkt; iv.iov_len = sizeof(pkt); /* Don't accept spoofed messages */ if (from->sq_node != node_id) return -EINVAL; /* Local DEL_CLIENT messages comes from the port being closed */ if (from->sq_node == qrtr_ns.local_node && from->sq_port != port) return -EINVAL; /* Remove any lookups by this client */ list_for_each_safe(li, tmp, &qrtr_ns.lookups) { lookup = container_of(li, struct qrtr_lookup, li); if (lookup->sq.sq_node != node_id) continue; if (lookup->sq.sq_port != port) continue; list_del(&lookup->li); kfree(lookup); } /* Remove the server belonging to this port but don't broadcast * DEL_SERVER. Neighbours would've already removed the server belonging * to this port due to the DEL_CLIENT broadcast from qrtr_port_remove(). */ node = node_get(node_id); if (node) server_del(node, port, false); /* Advertise the removal of this client to all local servers */ local_node = node_get(qrtr_ns.local_node); if (!local_node) return 0; memset(&pkt, 0, sizeof(pkt)); pkt.cmd = cpu_to_le32(QRTR_TYPE_DEL_CLIENT); pkt.client.node = cpu_to_le32(node_id); pkt.client.port = cpu_to_le32(port); xa_for_each(&local_node->servers, index, srv) { sq.sq_family = AF_QIPCRTR; sq.sq_node = srv->node; sq.sq_port = srv->port; msg.msg_name = (struct sockaddr *)&sq; msg.msg_namelen = sizeof(sq); ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); if (ret < 0 && ret != -ENODEV) { pr_err("failed to send del client cmd\n"); return ret; } } return 0; } static int ctrl_cmd_new_server(struct sockaddr_qrtr *from, unsigned int service, unsigned int instance, unsigned int node_id, unsigned int port) { struct qrtr_lookup *lookup; struct qrtr_server *srv; struct list_head *li; int ret = 0; /* Ignore specified node and port for local servers */ if (from->sq_node == qrtr_ns.local_node) { node_id = from->sq_node; port = from->sq_port; } srv = server_add(service, instance, node_id, port); if (!srv) return -EINVAL; if (srv->node == qrtr_ns.local_node) { ret = service_announce_new(&qrtr_ns.bcast_sq, srv); if (ret < 0) { pr_err("failed to announce new service\n"); return ret; } } /* Notify any potential lookups about the new server */ list_for_each(li, &qrtr_ns.lookups) { lookup = container_of(li, struct qrtr_lookup, li); if (lookup->service && lookup->service != service) continue; if (lookup->instance && lookup->instance != instance) continue; lookup_notify(&lookup->sq, srv, true); } return ret; } static int ctrl_cmd_del_server(struct sockaddr_qrtr *from, unsigned int service, unsigned int instance, unsigned int node_id, unsigned int port) { struct qrtr_node *node; /* Ignore specified node and port for local servers*/ if (from->sq_node == qrtr_ns.local_node) { node_id = from->sq_node; port = from->sq_port; } /* Local servers may only unregister themselves */ if (from->sq_node == qrtr_ns.local_node && from->sq_port != port) return -EINVAL; node = node_get(node_id); if (!node) return -ENOENT; server_del(node, port, true); return 0; } static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from, unsigned int service, unsigned int instance) { struct qrtr_server_filter filter; struct qrtr_lookup *lookup; struct qrtr_server *srv; struct qrtr_node *node; unsigned long node_idx; unsigned long srv_idx; /* Accept only local observers */ if (from->sq_node != qrtr_ns.local_node) return -EINVAL; lookup = kzalloc(sizeof(*lookup), GFP_KERNEL); if (!lookup) return -ENOMEM; lookup->sq = *from; lookup->service = service; lookup->instance = instance; list_add_tail(&lookup->li, &qrtr_ns.lookups); memset(&filter, 0, sizeof(filter)); filter.service = service; filter.instance = instance; xa_for_each(&nodes, node_idx, node) { xa_for_each(&node->servers, srv_idx, srv) { if (!server_match(srv, &filter)) continue; lookup_notify(from, srv, true); } } /* Empty notification, to indicate end of listing */ lookup_notify(from, NULL, true); return 0; } static void ctrl_cmd_del_lookup(struct sockaddr_qrtr *from, unsigned int service, unsigned int instance) { struct qrtr_lookup *lookup; struct list_head *tmp; struct list_head *li; list_for_each_safe(li, tmp, &qrtr_ns.lookups) { lookup = container_of(li, struct qrtr_lookup, li); if (lookup->sq.sq_node != from->sq_node) continue; if (lookup->sq.sq_port != from->sq_port) continue; if (lookup->service != service) continue; if (lookup->instance && lookup->instance != instance) continue; list_del(&lookup->li); kfree(lookup); } } static void qrtr_ns_worker(struct work_struct *work) { const struct qrtr_ctrl_pkt *pkt; size_t recv_buf_size = 4096; struct sockaddr_qrtr sq; struct msghdr msg = { }; unsigned int cmd; ssize_t msglen; void *recv_buf; struct kvec iv; int ret; msg.msg_name = (struct sockaddr *)&sq; msg.msg_namelen = sizeof(sq); recv_buf = kzalloc(recv_buf_size, GFP_KERNEL); if (!recv_buf) return; for (;;) { iv.iov_base = recv_buf; iv.iov_len = recv_buf_size; msglen = kernel_recvmsg(qrtr_ns.sock, &msg, &iv, 1, iv.iov_len, MSG_DONTWAIT); if (msglen == -EAGAIN) break; if (msglen < 0) { pr_err("error receiving packet: %zd\n", msglen); break; } pkt = recv_buf; cmd = le32_to_cpu(pkt->cmd); if (cmd < ARRAY_SIZE(qrtr_ctrl_pkt_strings) && qrtr_ctrl_pkt_strings[cmd]) trace_qrtr_ns_message(qrtr_ctrl_pkt_strings[cmd], sq.sq_node, sq.sq_port); ret = 0; switch (cmd) { case QRTR_TYPE_HELLO: ret = ctrl_cmd_hello(&sq); break; case QRTR_TYPE_BYE: ret = ctrl_cmd_bye(&sq); break; case QRTR_TYPE_DEL_CLIENT: ret = ctrl_cmd_del_client(&sq, le32_to_cpu(pkt->client.node), le32_to_cpu(pkt->client.port)); break; case QRTR_TYPE_NEW_SERVER: ret = ctrl_cmd_new_server(&sq, le32_to_cpu(pkt->server.service), le32_to_cpu(pkt->server.instance), le32_to_cpu(pkt->server.node), le32_to_cpu(pkt->server.port)); break; case QRTR_TYPE_DEL_SERVER: ret = ctrl_cmd_del_server(&sq, le32_to_cpu(pkt->server.service), le32_to_cpu(pkt->server.instance), le32_to_cpu(pkt->server.node), le32_to_cpu(pkt->server.port)); break; case QRTR_TYPE_EXIT: case QRTR_TYPE_PING: case QRTR_TYPE_RESUME_TX: break; case QRTR_TYPE_NEW_LOOKUP: ret = ctrl_cmd_new_lookup(&sq, le32_to_cpu(pkt->server.service), le32_to_cpu(pkt->server.instance)); break; case QRTR_TYPE_DEL_LOOKUP: ctrl_cmd_del_lookup(&sq, le32_to_cpu(pkt->server.service), le32_to_cpu(pkt->server.instance)); break; } if (ret < 0) pr_err("failed while handling packet from %d:%d", sq.sq_node, sq.sq_port); } kfree(recv_buf); } static void qrtr_ns_data_ready(struct sock *sk) { trace_sk_data_ready(sk); queue_work(qrtr_ns.workqueue, &qrtr_ns.work); } int qrtr_ns_init(void) { struct sockaddr_qrtr sq; int ret; INIT_LIST_HEAD(&qrtr_ns.lookups); INIT_WORK(&qrtr_ns.work, qrtr_ns_worker); ret = sock_create_kern(&init_net, AF_QIPCRTR, SOCK_DGRAM, PF_QIPCRTR, &qrtr_ns.sock); if (ret < 0) return ret; ret = kernel_getsockname(qrtr_ns.sock, (struct sockaddr *)&sq); if (ret < 0) { pr_err("failed to get socket name\n"); goto err_sock; } qrtr_ns.workqueue = alloc_ordered_workqueue("qrtr_ns_handler", 0); if (!qrtr_ns.workqueue) { ret = -ENOMEM; goto err_sock; } qrtr_ns.sock->sk->sk_data_ready = qrtr_ns_data_ready; sq.sq_port = QRTR_PORT_CTRL; qrtr_ns.local_node = sq.sq_node; ret = kernel_bind(qrtr_ns.sock, (struct sockaddr *)&sq, sizeof(sq)); if (ret < 0) { pr_err("failed to bind to socket\n"); goto err_wq; } qrtr_ns.bcast_sq.sq_family = AF_QIPCRTR; qrtr_ns.bcast_sq.sq_node = QRTR_NODE_BCAST; qrtr_ns.bcast_sq.sq_port = QRTR_PORT_CTRL; ret = say_hello(&qrtr_ns.bcast_sq); if (ret < 0) goto err_wq; /* As the qrtr ns socket owner and creator is the same module, we have * to decrease the qrtr module reference count to guarantee that it * remains zero after the ns socket is created, otherwise, executing * "rmmod" command is unable to make the qrtr module deleted after the * qrtr module is inserted successfully. * * However, the reference count is increased twice in * sock_create_kern(): one is to increase the reference count of owner * of qrtr socket's proto_ops struct; another is to increment the * reference count of owner of qrtr proto struct. Therefore, we must * decrement the module reference count twice to ensure that it keeps * zero after server's listening socket is created. Of course, we * must bump the module reference count twice as well before the socket * is closed. */ module_put(qrtr_ns.sock->ops->owner); module_put(qrtr_ns.sock->sk->sk_prot_creator->owner); return 0; err_wq: destroy_workqueue(qrtr_ns.workqueue); err_sock: sock_release(qrtr_ns.sock); return ret; } EXPORT_SYMBOL_GPL(qrtr_ns_init); void qrtr_ns_remove(void) { cancel_work_sync(&qrtr_ns.work); destroy_workqueue(qrtr_ns.workqueue); /* sock_release() expects the two references that were put during * qrtr_ns_init(). This function is only called during module remove, * so try_stop_module() has already set the refcnt to 0. Use * __module_get() instead of try_module_get() to successfully take two * references. */ __module_get(qrtr_ns.sock->ops->owner); __module_get(qrtr_ns.sock->sk->sk_prot_creator->owner); sock_release(qrtr_ns.sock); } EXPORT_SYMBOL_GPL(qrtr_ns_remove); MODULE_AUTHOR("Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>"); MODULE_DESCRIPTION("Qualcomm IPC Router Nameservice"); MODULE_LICENSE("Dual BSD/GPL"); |
29 51 29 36 31 31 28 28 4 4 4 4 1 1 1 16 16 16 16 51 31 16 4 20 29 29 1 1 1 1 29 29 51 36 1 14 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 | // SPDX-License-Identifier: GPL-2.0-or-later /* Driver for Philips webcam Functions that send various control messages to the webcam, including video modes. (C) 1999-2003 Nemosoft Unv. (C) 2004-2006 Luc Saillard (luc@saillard.org) (C) 2011 Hans de Goede <hdegoede@redhat.com> NOTE: this version of pwc is an unofficial (modified) release of pwc & pcwx driver and thus may have bugs that are not present in the original version. Please send bug reports and support requests to <luc@saillard.org>. NOTE: this version of pwc is an unofficial (modified) release of pwc & pcwx driver and thus may have bugs that are not present in the original version. Please send bug reports and support requests to <luc@saillard.org>. The decompression routines have been implemented by reverse-engineering the Nemosoft binary pwcx module. Caveat emptor. */ /* Changes 2001/08/03 Alvarado Added methods for changing white balance and red/green gains */ /* Control functions for the cam; brightness, contrast, video mode, etc. */ #ifdef __KERNEL__ #include <linux/uaccess.h> #endif #include <asm/errno.h> #include "pwc.h" #include "pwc-kiara.h" #include "pwc-timon.h" #include "pwc-dec1.h" #include "pwc-dec23.h" /* Selectors for status controls used only in this file */ #define GET_STATUS_B00 0x0B00 #define SENSOR_TYPE_FORMATTER1 0x0C00 #define GET_STATUS_3000 0x3000 #define READ_RAW_Y_MEAN_FORMATTER 0x3100 #define SET_POWER_SAVE_MODE_FORMATTER 0x3200 #define MIRROR_IMAGE_FORMATTER 0x3300 #define LED_FORMATTER 0x3400 #define LOWLIGHT 0x3500 #define GET_STATUS_3600 0x3600 #define SENSOR_TYPE_FORMATTER2 0x3700 #define GET_STATUS_3800 0x3800 #define GET_STATUS_4000 0x4000 #define GET_STATUS_4100 0x4100 /* Get */ #define CTL_STATUS_4200 0x4200 /* [GS] 1 */ /* Formatters for the Video Endpoint controls [GS]ET_EP_STREAM_CTL */ #define VIDEO_OUTPUT_CONTROL_FORMATTER 0x0100 static const char *size2name[PSZ_MAX] = { "subQCIF", "QSIF", "QCIF", "SIF", "CIF", "VGA", }; /********/ /* Entries for the Nala (645/646) camera; the Nala doesn't have compression preferences, so you either get compressed or non-compressed streams. An alternate value of 0 means this mode is not available at all. */ #define PWC_FPS_MAX_NALA 8 struct Nala_table_entry { char alternate; /* USB alternate setting */ int compressed; /* Compressed yes/no */ unsigned char mode[3]; /* precomputed mode table */ }; static unsigned int Nala_fps_vector[PWC_FPS_MAX_NALA] = { 4, 5, 7, 10, 12, 15, 20, 24 }; static struct Nala_table_entry Nala_table[PSZ_MAX][PWC_FPS_MAX_NALA] = { #include "pwc-nala.h" }; /****************************************************************************/ static int recv_control_msg(struct pwc_device *pdev, u8 request, u16 value, int recv_count) { int rc; rc = usb_control_msg(pdev->udev, usb_rcvctrlpipe(pdev->udev, 0), request, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, pdev->vcinterface, pdev->ctrl_buf, recv_count, USB_CTRL_GET_TIMEOUT); if (rc < 0) PWC_ERROR("recv_control_msg error %d req %02x val %04x\n", rc, request, value); return rc; } static inline int send_video_command(struct pwc_device *pdev, int index, const unsigned char *buf, int buflen) { int rc; memcpy(pdev->ctrl_buf, buf, buflen); rc = usb_control_msg(pdev->udev, usb_sndctrlpipe(pdev->udev, 0), SET_EP_STREAM_CTL, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, VIDEO_OUTPUT_CONTROL_FORMATTER, index, pdev->ctrl_buf, buflen, USB_CTRL_SET_TIMEOUT); if (rc >= 0) memcpy(pdev->cmd_buf, buf, buflen); else PWC_ERROR("send_video_command error %d\n", rc); return rc; } int send_control_msg(struct pwc_device *pdev, u8 request, u16 value, void *buf, int buflen) { return usb_control_msg(pdev->udev, usb_sndctrlpipe(pdev->udev, 0), request, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, pdev->vcinterface, buf, buflen, USB_CTRL_SET_TIMEOUT); } static int set_video_mode_Nala(struct pwc_device *pdev, int size, int pixfmt, int frames, int *compression, int send_to_cam) { int fps, ret = 0; struct Nala_table_entry *pEntry; int frames2frames[31] = { /* closest match of framerate */ 0, 0, 0, 0, 4, /* 0-4 */ 5, 5, 7, 7, 10, /* 5-9 */ 10, 10, 12, 12, 15, /* 10-14 */ 15, 15, 15, 20, 20, /* 15-19 */ 20, 20, 20, 24, 24, /* 20-24 */ 24, 24, 24, 24, 24, /* 25-29 */ 24 /* 30 */ }; int frames2table[31] = { 0, 0, 0, 0, 0, /* 0-4 */ 1, 1, 1, 2, 2, /* 5-9 */ 3, 3, 4, 4, 4, /* 10-14 */ 5, 5, 5, 5, 5, /* 15-19 */ 6, 6, 6, 6, 7, /* 20-24 */ 7, 7, 7, 7, 7, /* 25-29 */ 7 /* 30 */ }; if (size < 0 || size > PSZ_CIF) return -EINVAL; if (frames < 4) frames = 4; else if (size > PSZ_QCIF && frames > 15) frames = 15; else if (frames > 25) frames = 25; frames = frames2frames[frames]; fps = frames2table[frames]; pEntry = &Nala_table[size][fps]; if (pEntry->alternate == 0) return -EINVAL; if (send_to_cam) ret = send_video_command(pdev, pdev->vendpoint, pEntry->mode, 3); if (ret < 0) return ret; if (pEntry->compressed && pixfmt == V4L2_PIX_FMT_YUV420) pwc_dec1_init(pdev, pEntry->mode); /* Set various parameters */ pdev->pixfmt = pixfmt; pdev->vframes = frames; pdev->valternate = pEntry->alternate; pdev->width = pwc_image_sizes[size][0]; pdev->height = pwc_image_sizes[size][1]; pdev->frame_size = (pdev->width * pdev->height * 3) / 2; if (pEntry->compressed) { if (pdev->release < 5) { /* 4 fold compression */ pdev->vbandlength = 528; pdev->frame_size /= 4; } else { pdev->vbandlength = 704; pdev->frame_size /= 3; } } else pdev->vbandlength = 0; /* Let pwc-if.c:isoc_init know we don't support higher compression */ *compression = 3; return 0; } static int set_video_mode_Timon(struct pwc_device *pdev, int size, int pixfmt, int frames, int *compression, int send_to_cam) { const struct Timon_table_entry *pChoose; int fps, ret = 0; if (size >= PSZ_MAX || *compression < 0 || *compression > 3) return -EINVAL; if (frames < 5) frames = 5; else if (size == PSZ_VGA && frames > 15) frames = 15; else if (frames > 30) frames = 30; fps = (frames / 5) - 1; /* Find a supported framerate with progressively higher compression */ do { pChoose = &Timon_table[size][fps][*compression]; if (pChoose->alternate != 0) break; (*compression)++; } while (*compression <= 3); if (pChoose->alternate == 0) return -ENOENT; /* Not supported. */ if (send_to_cam) ret = send_video_command(pdev, pdev->vendpoint, pChoose->mode, 13); if (ret < 0) return ret; if (pChoose->bandlength > 0 && pixfmt == V4L2_PIX_FMT_YUV420) pwc_dec23_init(pdev, pChoose->mode); /* Set various parameters */ pdev->pixfmt = pixfmt; pdev->vframes = (fps + 1) * 5; pdev->valternate = pChoose->alternate; pdev->width = pwc_image_sizes[size][0]; pdev->height = pwc_image_sizes[size][1]; pdev->vbandlength = pChoose->bandlength; if (pChoose->bandlength > 0) pdev->frame_size = (pChoose->bandlength * pdev->height) / 4; else pdev->frame_size = (pdev->width * pdev->height * 12) / 8; return 0; } static int set_video_mode_Kiara(struct pwc_device *pdev, int size, int pixfmt, int frames, int *compression, int send_to_cam) { const struct Kiara_table_entry *pChoose; int fps, ret = 0; if (size >= PSZ_MAX || *compression < 0 || *compression > 3) return -EINVAL; if (frames < 5) frames = 5; else if (size == PSZ_VGA && frames > 15) frames = 15; else if (frames > 30) frames = 30; fps = (frames / 5) - 1; /* Find a supported framerate with progressively higher compression */ do { pChoose = &Kiara_table[size][fps][*compression]; if (pChoose->alternate != 0) break; (*compression)++; } while (*compression <= 3); if (pChoose->alternate == 0) return -ENOENT; /* Not supported. */ /* Firmware bug: video endpoint is 5, but commands are sent to endpoint 4 */ if (send_to_cam) ret = send_video_command(pdev, 4, pChoose->mode, 12); if (ret < 0) return ret; if (pChoose->bandlength > 0 && pixfmt == V4L2_PIX_FMT_YUV420) pwc_dec23_init(pdev, pChoose->mode); /* All set and go */ pdev->pixfmt = pixfmt; pdev->vframes = (fps + 1) * 5; pdev->valternate = pChoose->alternate; pdev->width = pwc_image_sizes[size][0]; pdev->height = pwc_image_sizes[size][1]; pdev->vbandlength = pChoose->bandlength; if (pdev->vbandlength > 0) pdev->frame_size = (pdev->vbandlength * pdev->height) / 4; else pdev->frame_size = (pdev->width * pdev->height * 12) / 8; PWC_TRACE("frame_size=%d, vframes=%d, vsize=%d, vbandlength=%d\n", pdev->frame_size, pdev->vframes, size, pdev->vbandlength); return 0; } int pwc_set_video_mode(struct pwc_device *pdev, int width, int height, int pixfmt, int frames, int *compression, int send_to_cam) { int ret, size; PWC_DEBUG_FLOW("set_video_mode(%dx%d @ %d, pixfmt %08x).\n", width, height, frames, pixfmt); size = pwc_get_size(pdev, width, height); PWC_TRACE("decode_size = %d.\n", size); if (DEVICE_USE_CODEC1(pdev->type)) { ret = set_video_mode_Nala(pdev, size, pixfmt, frames, compression, send_to_cam); } else if (DEVICE_USE_CODEC3(pdev->type)) { ret = set_video_mode_Kiara(pdev, size, pixfmt, frames, compression, send_to_cam); } else { ret = set_video_mode_Timon(pdev, size, pixfmt, frames, compression, send_to_cam); } if (ret < 0) { PWC_ERROR("Failed to set video mode %s@%d fps; return code = %d\n", size2name[size], frames, ret); return ret; } pdev->frame_total_size = pdev->frame_size + pdev->frame_header_size + pdev->frame_trailer_size; PWC_DEBUG_SIZE("Set resolution to %dx%d\n", pdev->width, pdev->height); return 0; } static unsigned int pwc_get_fps_Nala(struct pwc_device *pdev, unsigned int index, unsigned int size) { unsigned int i; for (i = 0; i < PWC_FPS_MAX_NALA; i++) { if (Nala_table[size][i].alternate) { if (index--==0) return Nala_fps_vector[i]; } } return 0; } static unsigned int pwc_get_fps_Kiara(struct pwc_device *pdev, unsigned int index, unsigned int size) { unsigned int i; for (i = 0; i < PWC_FPS_MAX_KIARA; i++) { if (Kiara_table[size][i][3].alternate) { if (index--==0) return Kiara_fps_vector[i]; } } return 0; } static unsigned int pwc_get_fps_Timon(struct pwc_device *pdev, unsigned int index, unsigned int size) { unsigned int i; for (i=0; i < PWC_FPS_MAX_TIMON; i++) { if (Timon_table[size][i][3].alternate) { if (index--==0) return Timon_fps_vector[i]; } } return 0; } unsigned int pwc_get_fps(struct pwc_device *pdev, unsigned int index, unsigned int size) { unsigned int ret; if (DEVICE_USE_CODEC1(pdev->type)) { ret = pwc_get_fps_Nala(pdev, index, size); } else if (DEVICE_USE_CODEC3(pdev->type)) { ret = pwc_get_fps_Kiara(pdev, index, size); } else { ret = pwc_get_fps_Timon(pdev, index, size); } return ret; } int pwc_get_u8_ctrl(struct pwc_device *pdev, u8 request, u16 value, int *data) { int ret; ret = recv_control_msg(pdev, request, value, 1); if (ret < 0) return ret; *data = pdev->ctrl_buf[0]; return 0; } int pwc_set_u8_ctrl(struct pwc_device *pdev, u8 request, u16 value, u8 data) { int ret; pdev->ctrl_buf[0] = data; ret = send_control_msg(pdev, request, value, pdev->ctrl_buf, 1); if (ret < 0) return ret; return 0; } int pwc_get_s8_ctrl(struct pwc_device *pdev, u8 request, u16 value, int *data) { int ret; ret = recv_control_msg(pdev, request, value, 1); if (ret < 0) return ret; *data = ((s8 *)pdev->ctrl_buf)[0]; return 0; } int pwc_get_u16_ctrl(struct pwc_device *pdev, u8 request, u16 value, int *data) { int ret; ret = recv_control_msg(pdev, request, value, 2); if (ret < 0) return ret; *data = (pdev->ctrl_buf[1] << 8) | pdev->ctrl_buf[0]; return 0; } int pwc_set_u16_ctrl(struct pwc_device *pdev, u8 request, u16 value, u16 data) { int ret; pdev->ctrl_buf[0] = data & 0xff; pdev->ctrl_buf[1] = data >> 8; ret = send_control_msg(pdev, request, value, pdev->ctrl_buf, 2); if (ret < 0) return ret; return 0; } int pwc_button_ctrl(struct pwc_device *pdev, u16 value) { int ret; ret = send_control_msg(pdev, SET_STATUS_CTL, value, NULL, 0); if (ret < 0) return ret; return 0; } /* POWER */ void pwc_camera_power(struct pwc_device *pdev, int power) { int r; if (!pdev->power_save) return; if (pdev->type < 675 || (pdev->type < 730 && pdev->release < 6)) return; /* Not supported by Nala or Timon < release 6 */ if (power) pdev->ctrl_buf[0] = 0x00; /* active */ else pdev->ctrl_buf[0] = 0xFF; /* power save */ r = send_control_msg(pdev, SET_STATUS_CTL, SET_POWER_SAVE_MODE_FORMATTER, pdev->ctrl_buf, 1); if (r < 0) PWC_ERROR("Failed to power %s camera (%d)\n", power ? "on" : "off", r); } int pwc_set_leds(struct pwc_device *pdev, int on_value, int off_value) { int r; if (pdev->type < 730) return 0; on_value /= 100; off_value /= 100; if (on_value < 0) on_value = 0; if (on_value > 0xff) on_value = 0xff; if (off_value < 0) off_value = 0; if (off_value > 0xff) off_value = 0xff; pdev->ctrl_buf[0] = on_value; pdev->ctrl_buf[1] = off_value; r = send_control_msg(pdev, SET_STATUS_CTL, LED_FORMATTER, pdev->ctrl_buf, 2); if (r < 0) PWC_ERROR("Failed to set LED on/off time (%d)\n", r); return r; } #ifdef CONFIG_USB_PWC_DEBUG int pwc_get_cmos_sensor(struct pwc_device *pdev, int *sensor) { int ret, request; if (pdev->type < 675) request = SENSOR_TYPE_FORMATTER1; else if (pdev->type < 730) return -1; /* The Vesta series doesn't have this call */ else request = SENSOR_TYPE_FORMATTER2; ret = recv_control_msg(pdev, GET_STATUS_CTL, request, 1); if (ret < 0) return ret; if (pdev->type < 675) *sensor = pdev->ctrl_buf[0] | 0x100; else *sensor = pdev->ctrl_buf[0]; return 0; } #endif |
384 385 384 429 406 427 429 429 428 111 112 188 111 111 356 357 281 434 434 434 429 429 429 354 429 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Device management routines * Copyright (c) by Jaroslav Kysela <perex@perex.cz> */ #include <linux/slab.h> #include <linux/time.h> #include <linux/export.h> #include <linux/errno.h> #include <sound/core.h> /** * snd_device_new - create an ALSA device component * @card: the card instance * @type: the device type, SNDRV_DEV_XXX * @device_data: the data pointer of this device * @ops: the operator table * * Creates a new device component for the given data pointer. * The device will be assigned to the card and managed together * by the card. * * The data pointer plays a role as the identifier, too, so the * pointer address must be unique and unchanged. * * Return: Zero if successful, or a negative error code on failure. */ int snd_device_new(struct snd_card *card, enum snd_device_type type, void *device_data, const struct snd_device_ops *ops) { struct snd_device *dev; struct list_head *p; if (snd_BUG_ON(!card || !device_data || !ops)) return -ENXIO; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; INIT_LIST_HEAD(&dev->list); dev->card = card; dev->type = type; dev->state = SNDRV_DEV_BUILD; dev->device_data = device_data; dev->ops = ops; /* insert the entry in an incrementally sorted list */ list_for_each_prev(p, &card->devices) { struct snd_device *pdev = list_entry(p, struct snd_device, list); if ((unsigned int)pdev->type <= (unsigned int)type) break; } list_add(&dev->list, p); return 0; } EXPORT_SYMBOL(snd_device_new); static void __snd_device_disconnect(struct snd_device *dev) { if (dev->state == SNDRV_DEV_REGISTERED) { if (dev->ops->dev_disconnect && dev->ops->dev_disconnect(dev)) dev_err(dev->card->dev, "device disconnect failure\n"); dev->state = SNDRV_DEV_DISCONNECTED; } } static void __snd_device_free(struct snd_device *dev) { /* unlink */ list_del(&dev->list); __snd_device_disconnect(dev); if (dev->ops->dev_free) { if (dev->ops->dev_free(dev)) dev_err(dev->card->dev, "device free failure\n"); } kfree(dev); } static struct snd_device *look_for_dev(struct snd_card *card, void *device_data) { struct snd_device *dev; list_for_each_entry(dev, &card->devices, list) if (dev->device_data == device_data) return dev; return NULL; } /** * snd_device_disconnect - disconnect the device * @card: the card instance * @device_data: the data pointer to disconnect * * Turns the device into the disconnection state, invoking * dev_disconnect callback, if the device was already registered. * * Usually called from snd_card_disconnect(). * * Return: Zero if successful, or a negative error code on failure or if the * device not found. */ void snd_device_disconnect(struct snd_card *card, void *device_data) { struct snd_device *dev; if (snd_BUG_ON(!card || !device_data)) return; dev = look_for_dev(card, device_data); if (dev) __snd_device_disconnect(dev); else dev_dbg(card->dev, "device disconnect %p (from %pS), not found\n", device_data, __builtin_return_address(0)); } EXPORT_SYMBOL_GPL(snd_device_disconnect); /** * snd_device_free - release the device from the card * @card: the card instance * @device_data: the data pointer to release * * Removes the device from the list on the card and invokes the * callbacks, dev_disconnect and dev_free, corresponding to the state. * Then release the device. */ void snd_device_free(struct snd_card *card, void *device_data) { struct snd_device *dev; if (snd_BUG_ON(!card || !device_data)) return; dev = look_for_dev(card, device_data); if (dev) __snd_device_free(dev); else dev_dbg(card->dev, "device free %p (from %pS), not found\n", device_data, __builtin_return_address(0)); } EXPORT_SYMBOL(snd_device_free); static int __snd_device_register(struct snd_device *dev) { if (dev->state == SNDRV_DEV_BUILD) { if (dev->ops->dev_register) { int err = dev->ops->dev_register(dev); if (err < 0) return err; } dev->state = SNDRV_DEV_REGISTERED; } return 0; } /** * snd_device_register - register the device * @card: the card instance * @device_data: the data pointer to register * * Registers the device which was already created via * snd_device_new(). Usually this is called from snd_card_register(), * but it can be called later if any new devices are created after * invocation of snd_card_register(). * * Return: Zero if successful, or a negative error code on failure or if the * device not found. */ int snd_device_register(struct snd_card *card, void *device_data) { struct snd_device *dev; if (snd_BUG_ON(!card || !device_data)) return -ENXIO; dev = look_for_dev(card, device_data); if (dev) return __snd_device_register(dev); snd_BUG(); return -ENXIO; } EXPORT_SYMBOL(snd_device_register); /* * register all the devices on the card. * called from init.c */ int snd_device_register_all(struct snd_card *card) { struct snd_device *dev; int err; if (snd_BUG_ON(!card)) return -ENXIO; list_for_each_entry(dev, &card->devices, list) { err = __snd_device_register(dev); if (err < 0) return err; } return 0; } /* * disconnect all the devices on the card. * called from init.c */ void snd_device_disconnect_all(struct snd_card *card) { struct snd_device *dev; if (snd_BUG_ON(!card)) return; list_for_each_entry_reverse(dev, &card->devices, list) __snd_device_disconnect(dev); } /* * release all the devices on the card. * called from init.c */ void snd_device_free_all(struct snd_card *card) { struct snd_device *dev, *next; if (snd_BUG_ON(!card)) return; list_for_each_entry_safe_reverse(dev, next, &card->devices, list) { /* exception: free ctl and lowlevel stuff later */ if (dev->type == SNDRV_DEV_CONTROL || dev->type == SNDRV_DEV_LOWLEVEL) continue; __snd_device_free(dev); } /* free all */ list_for_each_entry_safe_reverse(dev, next, &card->devices, list) __snd_device_free(dev); } /** * snd_device_get_state - Get the current state of the given device * @card: the card instance * @device_data: the data pointer to release * * Returns the current state of the given device object. For the valid * device, either @SNDRV_DEV_BUILD, @SNDRV_DEV_REGISTERED or * @SNDRV_DEV_DISCONNECTED is returned. * Or for a non-existing device, -1 is returned as an error. * * Return: the current state, or -1 if not found */ int snd_device_get_state(struct snd_card *card, void *device_data) { struct snd_device *dev; dev = look_for_dev(card, device_data); if (dev) return dev->state; return -1; } EXPORT_SYMBOL_GPL(snd_device_get_state); |
2 13 114 8 9 101 107 3 105 110 109 110 99 2 101 92 34 61 107 106 124 121 119 9 2 2 107 87 20 107 1 8 7 10 2 3 3 7 6 2 4 9 3 5 75 96 57 92 96 56 98 6 6 6 111 2 2 1 2 3 104 8 87 103 23 80 10 93 103 83 20 103 118 3 117 10 107 8 107 7 3 2 7 101 79 62 2 99 99 99 5 94 21 6 15 3 2 1 3 1 2 7 6 3 3 3 3 2 2 3 3 5 2 3 1 3 1 2 1 2 2 1 1 1 1 4 1 3 3 5 148 13 6 2 112 15 3 5 4 3 12 12 5 1 2 1 1 1 4 1 1 2 155 147 5 4 11 11 9 2 5 2 1 6 3 6 5 1 100 6 98 97 98 6 4 1 5 5 4 1 3 3 3 1 4 2 6 1 1 2 16 3 5 8 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 1993 by Theodore Ts'o. */ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/sched.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/file.h> #include <linux/stat.h> #include <linux/errno.h> #include <linux/major.h> #include <linux/wait.h> #include <linux/blkpg.h> #include <linux/init.h> #include <linux/swap.h> #include <linux/slab.h> #include <linux/compat.h> #include <linux/suspend.h> #include <linux/freezer.h> #include <linux/mutex.h> #include <linux/writeback.h> #include <linux/completion.h> #include <linux/highmem.h> #include <linux/splice.h> #include <linux/sysfs.h> #include <linux/miscdevice.h> #include <linux/falloc.h> #include <linux/uio.h> #include <linux/ioprio.h> #include <linux/blk-cgroup.h> #include <linux/sched/mm.h> #include <linux/statfs.h> #include <linux/uaccess.h> #include <linux/blk-mq.h> #include <linux/spinlock.h> #include <uapi/linux/loop.h> /* Possible states of device */ enum { Lo_unbound, Lo_bound, Lo_rundown, Lo_deleting, }; struct loop_device { int lo_number; loff_t lo_offset; loff_t lo_sizelimit; int lo_flags; char lo_file_name[LO_NAME_SIZE]; struct file *lo_backing_file; unsigned int lo_min_dio_size; struct block_device *lo_device; gfp_t old_gfp_mask; spinlock_t lo_lock; int lo_state; spinlock_t lo_work_lock; struct workqueue_struct *workqueue; struct work_struct rootcg_work; struct list_head rootcg_cmd_list; struct list_head idle_worker_list; struct rb_root worker_tree; struct timer_list timer; bool sysfs_inited; struct request_queue *lo_queue; struct blk_mq_tag_set tag_set; struct gendisk *lo_disk; struct mutex lo_mutex; bool idr_visible; }; struct loop_cmd { struct list_head list_entry; bool use_aio; /* use AIO interface to handle I/O */ atomic_t ref; /* only for aio */ long ret; struct kiocb iocb; struct bio_vec *bvec; struct cgroup_subsys_state *blkcg_css; struct cgroup_subsys_state *memcg_css; }; #define LOOP_IDLE_WORKER_TIMEOUT (60 * HZ) #define LOOP_DEFAULT_HW_Q_DEPTH 128 static DEFINE_IDR(loop_index_idr); static DEFINE_MUTEX(loop_ctl_mutex); static DEFINE_MUTEX(loop_validate_mutex); /** * loop_global_lock_killable() - take locks for safe loop_validate_file() test * * @lo: struct loop_device * @global: true if @lo is about to bind another "struct loop_device", false otherwise * * Returns 0 on success, -EINTR otherwise. * * Since loop_validate_file() traverses on other "struct loop_device" if * is_loop_device() is true, we need a global lock for serializing concurrent * loop_configure()/loop_change_fd()/__loop_clr_fd() calls. */ static int loop_global_lock_killable(struct loop_device *lo, bool global) { int err; if (global) { err = mutex_lock_killable(&loop_validate_mutex); if (err) return err; } err = mutex_lock_killable(&lo->lo_mutex); if (err && global) mutex_unlock(&loop_validate_mutex); return err; } /** * loop_global_unlock() - release locks taken by loop_global_lock_killable() * * @lo: struct loop_device * @global: true if @lo was about to bind another "struct loop_device", false otherwise */ static void loop_global_unlock(struct loop_device *lo, bool global) { mutex_unlock(&lo->lo_mutex); if (global) mutex_unlock(&loop_validate_mutex); } static int max_part; static int part_shift; static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file) { loff_t loopsize; /* Compute loopsize in bytes */ loopsize = i_size_read(file->f_mapping->host); if (offset > 0) loopsize -= offset; /* offset is beyond i_size, weird but possible */ if (loopsize < 0) return 0; if (sizelimit > 0 && sizelimit < loopsize) loopsize = sizelimit; /* * Unfortunately, if we want to do I/O on the device, * the number of 512-byte sectors has to fit into a sector_t. */ return loopsize >> 9; } static loff_t get_loop_size(struct loop_device *lo, struct file *file) { return get_size(lo->lo_offset, lo->lo_sizelimit, file); } /* * We support direct I/O only if lo_offset is aligned with the logical I/O size * of backing device, and the logical block size of loop is bigger than that of * the backing device. */ static bool lo_can_use_dio(struct loop_device *lo) { if (!(lo->lo_backing_file->f_mode & FMODE_CAN_ODIRECT)) return false; if (queue_logical_block_size(lo->lo_queue) < lo->lo_min_dio_size) return false; if (lo->lo_offset & (lo->lo_min_dio_size - 1)) return false; return true; } /* * Direct I/O can be enabled either by using an O_DIRECT file descriptor, or by * passing in the LO_FLAGS_DIRECT_IO flag from userspace. It will be silently * disabled when the device block size is too small or the offset is unaligned. * * loop_get_status will always report the effective LO_FLAGS_DIRECT_IO flag and * not the originally passed in one. */ static inline void loop_update_dio(struct loop_device *lo) { lockdep_assert_held(&lo->lo_mutex); WARN_ON_ONCE(lo->lo_state == Lo_bound && lo->lo_queue->mq_freeze_depth == 0); if ((lo->lo_flags & LO_FLAGS_DIRECT_IO) && !lo_can_use_dio(lo)) lo->lo_flags &= ~LO_FLAGS_DIRECT_IO; } /** * loop_set_size() - sets device size and notifies userspace * @lo: struct loop_device to set the size for * @size: new size of the loop device * * Callers must validate that the size passed into this function fits into * a sector_t, eg using loop_validate_size() */ static void loop_set_size(struct loop_device *lo, loff_t size) { if (!set_capacity_and_notify(lo->lo_disk, size)) kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE); } static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos) { struct iov_iter i; ssize_t bw; iov_iter_bvec(&i, ITER_SOURCE, bvec, 1, bvec->bv_len); bw = vfs_iter_write(file, &i, ppos, 0); if (likely(bw == bvec->bv_len)) return 0; printk_ratelimited(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n", (unsigned long long)*ppos, bvec->bv_len); if (bw >= 0) bw = -EIO; return bw; } static int lo_write_simple(struct loop_device *lo, struct request *rq, loff_t pos) { struct bio_vec bvec; struct req_iterator iter; int ret = 0; rq_for_each_segment(bvec, rq, iter) { ret = lo_write_bvec(lo->lo_backing_file, &bvec, &pos); if (ret < 0) break; cond_resched(); } return ret; } static int lo_read_simple(struct loop_device *lo, struct request *rq, loff_t pos) { struct bio_vec bvec; struct req_iterator iter; struct iov_iter i; ssize_t len; rq_for_each_segment(bvec, rq, iter) { iov_iter_bvec(&i, ITER_DEST, &bvec, 1, bvec.bv_len); len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0); if (len < 0) return len; flush_dcache_page(bvec.bv_page); if (len != bvec.bv_len) { struct bio *bio; __rq_for_each_bio(bio, rq) zero_fill_bio(bio); break; } cond_resched(); } return 0; } static void loop_clear_limits(struct loop_device *lo, int mode) { struct queue_limits lim = queue_limits_start_update(lo->lo_queue); if (mode & FALLOC_FL_ZERO_RANGE) lim.max_write_zeroes_sectors = 0; if (mode & FALLOC_FL_PUNCH_HOLE) { lim.max_hw_discard_sectors = 0; lim.discard_granularity = 0; } /* * XXX: this updates the queue limits without freezing the queue, which * is against the locking protocol and dangerous. But we can't just * freeze the queue as we're inside the ->queue_rq method here. So this * should move out into a workqueue unless we get the file operations to * advertise if they support specific fallocate operations. */ queue_limits_commit_update(lo->lo_queue, &lim); } static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos, int mode) { /* * We use fallocate to manipulate the space mappings used by the image * a.k.a. discard/zerorange. */ struct file *file = lo->lo_backing_file; int ret; mode |= FALLOC_FL_KEEP_SIZE; if (!bdev_max_discard_sectors(lo->lo_device)) return -EOPNOTSUPP; ret = file->f_op->fallocate(file, mode, pos, blk_rq_bytes(rq)); if (unlikely(ret && ret != -EINVAL && ret != -EOPNOTSUPP)) return -EIO; /* * We initially configure the limits in a hope that fallocate is * supported and clear them here if that turns out not to be true. */ if (unlikely(ret == -EOPNOTSUPP)) loop_clear_limits(lo, mode); return ret; } static int lo_req_flush(struct loop_device *lo, struct request *rq) { int ret = vfs_fsync(lo->lo_backing_file, 0); if (unlikely(ret && ret != -EINVAL)) ret = -EIO; return ret; } static void lo_complete_rq(struct request *rq) { struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); blk_status_t ret = BLK_STS_OK; if (!cmd->use_aio || cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) || req_op(rq) != REQ_OP_READ) { if (cmd->ret < 0) ret = errno_to_blk_status(cmd->ret); goto end_io; } /* * Short READ - if we got some data, advance our request and * retry it. If we got no data, end the rest with EIO. */ if (cmd->ret) { blk_update_request(rq, BLK_STS_OK, cmd->ret); cmd->ret = 0; blk_mq_requeue_request(rq, true); } else { if (cmd->use_aio) { struct bio *bio = rq->bio; while (bio) { zero_fill_bio(bio); bio = bio->bi_next; } } ret = BLK_STS_IOERR; end_io: blk_mq_end_request(rq, ret); } } static void lo_rw_aio_do_completion(struct loop_cmd *cmd) { struct request *rq = blk_mq_rq_from_pdu(cmd); if (!atomic_dec_and_test(&cmd->ref)) return; kfree(cmd->bvec); cmd->bvec = NULL; if (likely(!blk_should_fake_timeout(rq->q))) blk_mq_complete_request(rq); } static void lo_rw_aio_complete(struct kiocb *iocb, long ret) { struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb); cmd->ret = ret; lo_rw_aio_do_completion(cmd); } static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd, loff_t pos, int rw) { struct iov_iter iter; struct req_iterator rq_iter; struct bio_vec *bvec; struct request *rq = blk_mq_rq_from_pdu(cmd); struct bio *bio = rq->bio; struct file *file = lo->lo_backing_file; struct bio_vec tmp; unsigned int offset; int nr_bvec = 0; int ret; rq_for_each_bvec(tmp, rq, rq_iter) nr_bvec++; if (rq->bio != rq->biotail) { bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec), GFP_NOIO); if (!bvec) return -EIO; cmd->bvec = bvec; /* * The bios of the request may be started from the middle of * the 'bvec' because of bio splitting, so we can't directly * copy bio->bi_iov_vec to new bvec. The rq_for_each_bvec * API will take care of all details for us. */ rq_for_each_bvec(tmp, rq, rq_iter) { *bvec = tmp; bvec++; } bvec = cmd->bvec; offset = 0; } else { /* * Same here, this bio may be started from the middle of the * 'bvec' because of bio splitting, so offset from the bvec * must be passed to iov iterator */ offset = bio->bi_iter.bi_bvec_done; bvec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); } atomic_set(&cmd->ref, 2); iov_iter_bvec(&iter, rw, bvec, nr_bvec, blk_rq_bytes(rq)); iter.iov_offset = offset; cmd->iocb.ki_pos = pos; cmd->iocb.ki_filp = file; cmd->iocb.ki_complete = lo_rw_aio_complete; cmd->iocb.ki_flags = IOCB_DIRECT; cmd->iocb.ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); if (rw == ITER_SOURCE) ret = file->f_op->write_iter(&cmd->iocb, &iter); else ret = file->f_op->read_iter(&cmd->iocb, &iter); lo_rw_aio_do_completion(cmd); if (ret != -EIOCBQUEUED) lo_rw_aio_complete(&cmd->iocb, ret); return 0; } static int do_req_filebacked(struct loop_device *lo, struct request *rq) { struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); loff_t pos = ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset; /* * lo_write_simple and lo_read_simple should have been covered * by io submit style function like lo_rw_aio(), one blocker * is that lo_read_simple() need to call flush_dcache_page after * the page is written from kernel, and it isn't easy to handle * this in io submit style function which submits all segments * of the req at one time. And direct read IO doesn't need to * run flush_dcache_page(). */ switch (req_op(rq)) { case REQ_OP_FLUSH: return lo_req_flush(lo, rq); case REQ_OP_WRITE_ZEROES: /* * If the caller doesn't want deallocation, call zeroout to * write zeroes the range. Otherwise, punch them out. */ return lo_fallocate(lo, rq, pos, (rq->cmd_flags & REQ_NOUNMAP) ? FALLOC_FL_ZERO_RANGE : FALLOC_FL_PUNCH_HOLE); case REQ_OP_DISCARD: return lo_fallocate(lo, rq, pos, FALLOC_FL_PUNCH_HOLE); case REQ_OP_WRITE: if (cmd->use_aio) return lo_rw_aio(lo, cmd, pos, ITER_SOURCE); else return lo_write_simple(lo, rq, pos); case REQ_OP_READ: if (cmd->use_aio) return lo_rw_aio(lo, cmd, pos, ITER_DEST); else return lo_read_simple(lo, rq, pos); default: WARN_ON_ONCE(1); return -EIO; } } static void loop_reread_partitions(struct loop_device *lo) { int rc; mutex_lock(&lo->lo_disk->open_mutex); rc = bdev_disk_changed(lo->lo_disk, false); mutex_unlock(&lo->lo_disk->open_mutex); if (rc) pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n", __func__, lo->lo_number, lo->lo_file_name, rc); } static unsigned int loop_query_min_dio_size(struct loop_device *lo) { struct file *file = lo->lo_backing_file; struct block_device *sb_bdev = file->f_mapping->host->i_sb->s_bdev; struct kstat st; /* * Use the minimal dio alignment of the file system if provided. */ if (!vfs_getattr(&file->f_path, &st, STATX_DIOALIGN, 0) && (st.result_mask & STATX_DIOALIGN)) return st.dio_offset_align; /* * In a perfect world this wouldn't be needed, but as of Linux 6.13 only * a handful of file systems support the STATX_DIOALIGN flag. */ if (sb_bdev) return bdev_logical_block_size(sb_bdev); return SECTOR_SIZE; } static inline int is_loop_device(struct file *file) { struct inode *i = file->f_mapping->host; return i && S_ISBLK(i->i_mode) && imajor(i) == LOOP_MAJOR; } static int loop_validate_file(struct file *file, struct block_device *bdev) { struct inode *inode = file->f_mapping->host; struct file *f = file; /* Avoid recursion */ while (is_loop_device(f)) { struct loop_device *l; lockdep_assert_held(&loop_validate_mutex); if (f->f_mapping->host->i_rdev == bdev->bd_dev) return -EBADF; l = I_BDEV(f->f_mapping->host)->bd_disk->private_data; if (l->lo_state != Lo_bound) return -EINVAL; /* Order wrt setting lo->lo_backing_file in loop_configure(). */ rmb(); f = l->lo_backing_file; } if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) return -EINVAL; return 0; } static void loop_assign_backing_file(struct loop_device *lo, struct file *file) { lo->lo_backing_file = file; lo->old_gfp_mask = mapping_gfp_mask(file->f_mapping); mapping_set_gfp_mask(file->f_mapping, lo->old_gfp_mask & ~(__GFP_IO | __GFP_FS)); if (lo->lo_backing_file->f_flags & O_DIRECT) lo->lo_flags |= LO_FLAGS_DIRECT_IO; lo->lo_min_dio_size = loop_query_min_dio_size(lo); } /* * loop_change_fd switched the backing store of a loopback device to * a new file. This is useful for operating system installers to free up * the original file and in High Availability environments to switch to * an alternative location for the content in case of server meltdown. * This can only work if the loop device is used read-only, and if the * new backing store is the same size and type as the old backing store. */ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, unsigned int arg) { struct file *file = fget(arg); struct file *old_file; unsigned int memflags; int error; bool partscan; bool is_loop; if (!file) return -EBADF; /* suppress uevents while reconfiguring the device */ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1); is_loop = is_loop_device(file); error = loop_global_lock_killable(lo, is_loop); if (error) goto out_putf; error = -ENXIO; if (lo->lo_state != Lo_bound) goto out_err; /* the loop device has to be read-only */ error = -EINVAL; if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) goto out_err; error = loop_validate_file(file, bdev); if (error) goto out_err; old_file = lo->lo_backing_file; error = -EINVAL; /* size of the new backing store needs to be the same */ if (get_loop_size(lo, file) != get_loop_size(lo, old_file)) goto out_err; /* * We might switch to direct I/O mode for the loop device, write back * all dirty data the page cache now that so that the individual I/O * operations don't have to do that. */ vfs_fsync(file, 0); /* and ... switch */ disk_force_media_change(lo->lo_disk); memflags = blk_mq_freeze_queue(lo->lo_queue); mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask); loop_assign_backing_file(lo, file); loop_update_dio(lo); blk_mq_unfreeze_queue(lo->lo_queue, memflags); partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; loop_global_unlock(lo, is_loop); /* * Flush loop_validate_file() before fput(), for l->lo_backing_file * might be pointing at old_file which might be the last reference. */ if (!is_loop) { mutex_lock(&loop_validate_mutex); mutex_unlock(&loop_validate_mutex); } /* * We must drop file reference outside of lo_mutex as dropping * the file ref can take open_mutex which creates circular locking * dependency. */ fput(old_file); if (partscan) loop_reread_partitions(lo); error = 0; done: /* enable and uncork uevent now that we are done */ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0); return error; out_err: loop_global_unlock(lo, is_loop); out_putf: fput(file); goto done; } /* loop sysfs attributes */ static ssize_t loop_attr_show(struct device *dev, char *page, ssize_t (*callback)(struct loop_device *, char *)) { struct gendisk *disk = dev_to_disk(dev); struct loop_device *lo = disk->private_data; return callback(lo, page); } #define LOOP_ATTR_RO(_name) \ static ssize_t loop_attr_##_name##_show(struct loop_device *, char *); \ static ssize_t loop_attr_do_show_##_name(struct device *d, \ struct device_attribute *attr, char *b) \ { \ return loop_attr_show(d, b, loop_attr_##_name##_show); \ } \ static struct device_attribute loop_attr_##_name = \ __ATTR(_name, 0444, loop_attr_do_show_##_name, NULL); static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf) { ssize_t ret; char *p = NULL; spin_lock_irq(&lo->lo_lock); if (lo->lo_backing_file) p = file_path(lo->lo_backing_file, buf, PAGE_SIZE - 1); spin_unlock_irq(&lo->lo_lock); if (IS_ERR_OR_NULL(p)) ret = PTR_ERR(p); else { ret = strlen(p); memmove(buf, p, ret); buf[ret++] = '\n'; buf[ret] = 0; } return ret; } static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf) { return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_offset); } static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf) { return sysfs_emit(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit); } static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf) { int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR); return sysfs_emit(buf, "%s\n", autoclear ? "1" : "0"); } static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf) { int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN); return sysfs_emit(buf, "%s\n", partscan ? "1" : "0"); } static ssize_t loop_attr_dio_show(struct loop_device *lo, char *buf) { int dio = (lo->lo_flags & LO_FLAGS_DIRECT_IO); return sysfs_emit(buf, "%s\n", dio ? "1" : "0"); } LOOP_ATTR_RO(backing_file); LOOP_ATTR_RO(offset); LOOP_ATTR_RO(sizelimit); LOOP_ATTR_RO(autoclear); LOOP_ATTR_RO(partscan); LOOP_ATTR_RO(dio); static struct attribute *loop_attrs[] = { &loop_attr_backing_file.attr, &loop_attr_offset.attr, &loop_attr_sizelimit.attr, &loop_attr_autoclear.attr, &loop_attr_partscan.attr, &loop_attr_dio.attr, NULL, }; static struct attribute_group loop_attribute_group = { .name = "loop", .attrs= loop_attrs, }; static void loop_sysfs_init(struct loop_device *lo) { lo->sysfs_inited = !sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj, &loop_attribute_group); } static void loop_sysfs_exit(struct loop_device *lo) { if (lo->sysfs_inited) sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj, &loop_attribute_group); } static void loop_get_discard_config(struct loop_device *lo, u32 *granularity, u32 *max_discard_sectors) { struct file *file = lo->lo_backing_file; struct inode *inode = file->f_mapping->host; struct kstatfs sbuf; /* * If the backing device is a block device, mirror its zeroing * capability. Set the discard sectors to the block device's zeroing * capabilities because loop discards result in blkdev_issue_zeroout(), * not blkdev_issue_discard(). This maintains consistent behavior with * file-backed loop devices: discarded regions read back as zero. */ if (S_ISBLK(inode->i_mode)) { struct block_device *bdev = I_BDEV(inode); *max_discard_sectors = bdev_write_zeroes_sectors(bdev); *granularity = bdev_discard_granularity(bdev); /* * We use punch hole to reclaim the free space used by the * image a.k.a. discard. */ } else if (file->f_op->fallocate && !vfs_statfs(&file->f_path, &sbuf)) { *max_discard_sectors = UINT_MAX >> 9; *granularity = sbuf.f_bsize; } } struct loop_worker { struct rb_node rb_node; struct work_struct work; struct list_head cmd_list; struct list_head idle_list; struct loop_device *lo; struct cgroup_subsys_state *blkcg_css; unsigned long last_ran_at; }; static void loop_workfn(struct work_struct *work); #ifdef CONFIG_BLK_CGROUP static inline int queue_on_root_worker(struct cgroup_subsys_state *css) { return !css || css == blkcg_root_css; } #else static inline int queue_on_root_worker(struct cgroup_subsys_state *css) { return !css; } #endif static void loop_queue_work(struct loop_device *lo, struct loop_cmd *cmd) { struct rb_node **node, *parent = NULL; struct loop_worker *cur_worker, *worker = NULL; struct work_struct *work; struct list_head *cmd_list; spin_lock_irq(&lo->lo_work_lock); if (queue_on_root_worker(cmd->blkcg_css)) goto queue_work; node = &lo->worker_tree.rb_node; while (*node) { parent = *node; cur_worker = container_of(*node, struct loop_worker, rb_node); if (cur_worker->blkcg_css == cmd->blkcg_css) { worker = cur_worker; break; } else if ((long)cur_worker->blkcg_css < (long)cmd->blkcg_css) { node = &(*node)->rb_left; } else { node = &(*node)->rb_right; } } if (worker) goto queue_work; worker = kzalloc(sizeof(struct loop_worker), GFP_NOWAIT | __GFP_NOWARN); /* * In the event we cannot allocate a worker, just queue on the * rootcg worker and issue the I/O as the rootcg */ if (!worker) { cmd->blkcg_css = NULL; if (cmd->memcg_css) css_put(cmd->memcg_css); cmd->memcg_css = NULL; goto queue_work; } worker->blkcg_css = cmd->blkcg_css; css_get(worker->blkcg_css); INIT_WORK(&worker->work, loop_workfn); INIT_LIST_HEAD(&worker->cmd_list); INIT_LIST_HEAD(&worker->idle_list); worker->lo = lo; rb_link_node(&worker->rb_node, parent, node); rb_insert_color(&worker->rb_node, &lo->worker_tree); queue_work: if (worker) { /* * We need to remove from the idle list here while * holding the lock so that the idle timer doesn't * free the worker */ if (!list_empty(&worker->idle_list)) list_del_init(&worker->idle_list); work = &worker->work; cmd_list = &worker->cmd_list; } else { work = &lo->rootcg_work; cmd_list = &lo->rootcg_cmd_list; } list_add_tail(&cmd->list_entry, cmd_list); queue_work(lo->workqueue, work); spin_unlock_irq(&lo->lo_work_lock); } static void loop_set_timer(struct loop_device *lo) { timer_reduce(&lo->timer, jiffies + LOOP_IDLE_WORKER_TIMEOUT); } static void loop_free_idle_workers(struct loop_device *lo, bool delete_all) { struct loop_worker *pos, *worker; spin_lock_irq(&lo->lo_work_lock); list_for_each_entry_safe(worker, pos, &lo->idle_worker_list, idle_list) { if (!delete_all && time_is_after_jiffies(worker->last_ran_at + LOOP_IDLE_WORKER_TIMEOUT)) break; list_del(&worker->idle_list); rb_erase(&worker->rb_node, &lo->worker_tree); css_put(worker->blkcg_css); kfree(worker); } if (!list_empty(&lo->idle_worker_list)) loop_set_timer(lo); spin_unlock_irq(&lo->lo_work_lock); } static void loop_free_idle_workers_timer(struct timer_list *timer) { struct loop_device *lo = container_of(timer, struct loop_device, timer); return loop_free_idle_workers(lo, false); } /** * loop_set_status_from_info - configure device from loop_info * @lo: struct loop_device to configure * @info: struct loop_info64 to configure the device with * * Configures the loop device parameters according to the passed * in loop_info64 configuration. */ static int loop_set_status_from_info(struct loop_device *lo, const struct loop_info64 *info) { if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) return -EINVAL; switch (info->lo_encrypt_type) { case LO_CRYPT_NONE: break; case LO_CRYPT_XOR: pr_warn("support for the xor transformation has been removed.\n"); return -EINVAL; case LO_CRYPT_CRYPTOAPI: pr_warn("support for cryptoloop has been removed. Use dm-crypt instead.\n"); return -EINVAL; default: return -EINVAL; } /* Avoid assigning overflow values */ if (info->lo_offset > LLONG_MAX || info->lo_sizelimit > LLONG_MAX) return -EOVERFLOW; lo->lo_offset = info->lo_offset; lo->lo_sizelimit = info->lo_sizelimit; memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); lo->lo_file_name[LO_NAME_SIZE-1] = 0; return 0; } static unsigned int loop_default_blocksize(struct loop_device *lo) { /* In case of direct I/O, match underlying minimum I/O size */ if (lo->lo_flags & LO_FLAGS_DIRECT_IO) return lo->lo_min_dio_size; return SECTOR_SIZE; } static void loop_update_limits(struct loop_device *lo, struct queue_limits *lim, unsigned int bsize) { struct file *file = lo->lo_backing_file; struct inode *inode = file->f_mapping->host; struct block_device *backing_bdev = NULL; u32 granularity = 0, max_discard_sectors = 0; if (S_ISBLK(inode->i_mode)) backing_bdev = I_BDEV(inode); else if (inode->i_sb->s_bdev) backing_bdev = inode->i_sb->s_bdev; if (!bsize) bsize = loop_default_blocksize(lo); loop_get_discard_config(lo, &granularity, &max_discard_sectors); lim->logical_block_size = bsize; lim->physical_block_size = bsize; lim->io_min = bsize; lim->features &= ~(BLK_FEAT_WRITE_CACHE | BLK_FEAT_ROTATIONAL); if (file->f_op->fsync && !(lo->lo_flags & LO_FLAGS_READ_ONLY)) lim->features |= BLK_FEAT_WRITE_CACHE; if (backing_bdev && !bdev_nonrot(backing_bdev)) lim->features |= BLK_FEAT_ROTATIONAL; lim->max_hw_discard_sectors = max_discard_sectors; lim->max_write_zeroes_sectors = max_discard_sectors; if (max_discard_sectors) lim->discard_granularity = granularity; else lim->discard_granularity = 0; } static int loop_configure(struct loop_device *lo, blk_mode_t mode, struct block_device *bdev, const struct loop_config *config) { struct file *file = fget(config->fd); struct queue_limits lim; int error; loff_t size; bool partscan; bool is_loop; if (!file) return -EBADF; is_loop = is_loop_device(file); /* This is safe, since we have a reference from open(). */ __module_get(THIS_MODULE); /* * If we don't hold exclusive handle for the device, upgrade to it * here to avoid changing device under exclusive owner. */ if (!(mode & BLK_OPEN_EXCL)) { error = bd_prepare_to_claim(bdev, loop_configure, NULL); if (error) goto out_putf; } error = loop_global_lock_killable(lo, is_loop); if (error) goto out_bdev; error = -EBUSY; if (lo->lo_state != Lo_unbound) goto out_unlock; error = loop_validate_file(file, bdev); if (error) goto out_unlock; if ((config->info.lo_flags & ~LOOP_CONFIGURE_SETTABLE_FLAGS) != 0) { error = -EINVAL; goto out_unlock; } error = loop_set_status_from_info(lo, &config->info); if (error) goto out_unlock; lo->lo_flags = config->info.lo_flags; if (!(file->f_mode & FMODE_WRITE) || !(mode & BLK_OPEN_WRITE) || !file->f_op->write_iter) lo->lo_flags |= LO_FLAGS_READ_ONLY; if (!lo->workqueue) { lo->workqueue = alloc_workqueue("loop%d", WQ_UNBOUND | WQ_FREEZABLE, 0, lo->lo_number); if (!lo->workqueue) { error = -ENOMEM; goto out_unlock; } } /* suppress uevents while reconfiguring the device */ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1); disk_force_media_change(lo->lo_disk); set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0); lo->lo_device = bdev; loop_assign_backing_file(lo, file); lim = queue_limits_start_update(lo->lo_queue); loop_update_limits(lo, &lim, config->block_size); /* No need to freeze the queue as the device isn't bound yet. */ error = queue_limits_commit_update(lo->lo_queue, &lim); if (error) goto out_unlock; /* * We might switch to direct I/O mode for the loop device, write back * all dirty data the page cache now that so that the individual I/O * operations don't have to do that. */ vfs_fsync(file, 0); loop_update_dio(lo); loop_sysfs_init(lo); size = get_loop_size(lo, file); loop_set_size(lo, size); /* Order wrt reading lo_state in loop_validate_file(). */ wmb(); lo->lo_state = Lo_bound; if (part_shift) lo->lo_flags |= LO_FLAGS_PARTSCAN; partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; if (partscan) clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); /* enable and uncork uevent now that we are done */ dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 0); loop_global_unlock(lo, is_loop); if (partscan) loop_reread_partitions(lo); if (!(mode & BLK_OPEN_EXCL)) bd_abort_claiming(bdev, loop_configure); return 0; out_unlock: loop_global_unlock(lo, is_loop); out_bdev: if (!(mode & BLK_OPEN_EXCL)) bd_abort_claiming(bdev, loop_configure); out_putf: fput(file); /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); return error; } static void __loop_clr_fd(struct loop_device *lo) { struct queue_limits lim; struct file *filp; gfp_t gfp = lo->old_gfp_mask; spin_lock_irq(&lo->lo_lock); filp = lo->lo_backing_file; lo->lo_backing_file = NULL; spin_unlock_irq(&lo->lo_lock); lo->lo_device = NULL; lo->lo_offset = 0; lo->lo_sizelimit = 0; memset(lo->lo_file_name, 0, LO_NAME_SIZE); /* * Reset the block size to the default. * * No queue freezing needed because this is called from the final * ->release call only, so there can't be any outstanding I/O. */ lim = queue_limits_start_update(lo->lo_queue); lim.logical_block_size = SECTOR_SIZE; lim.physical_block_size = SECTOR_SIZE; lim.io_min = SECTOR_SIZE; queue_limits_commit_update(lo->lo_queue, &lim); invalidate_disk(lo->lo_disk); loop_sysfs_exit(lo); /* let user-space know about this change */ kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE); mapping_set_gfp_mask(filp->f_mapping, gfp); /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); disk_force_media_change(lo->lo_disk); if (lo->lo_flags & LO_FLAGS_PARTSCAN) { int err; /* * open_mutex has been held already in release path, so don't * acquire it if this function is called in such case. * * If the reread partition isn't from release path, lo_refcnt * must be at least one and it can only become zero when the * current holder is released. */ err = bdev_disk_changed(lo->lo_disk, false); if (err) pr_warn("%s: partition scan of loop%d failed (rc=%d)\n", __func__, lo->lo_number, err); /* Device is gone, no point in returning error */ } /* * lo->lo_state is set to Lo_unbound here after above partscan has * finished. There cannot be anybody else entering __loop_clr_fd() as * Lo_rundown state protects us from all the other places trying to * change the 'lo' device. */ lo->lo_flags = 0; if (!part_shift) set_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); mutex_lock(&lo->lo_mutex); lo->lo_state = Lo_unbound; mutex_unlock(&lo->lo_mutex); /* * Need not hold lo_mutex to fput backing file. Calling fput holding * lo_mutex triggers a circular lock dependency possibility warning as * fput can take open_mutex which is usually taken before lo_mutex. */ fput(filp); } static int loop_clr_fd(struct loop_device *lo) { int err; /* * Since lo_ioctl() is called without locks held, it is possible that * loop_configure()/loop_change_fd() and loop_clr_fd() run in parallel. * * Therefore, use global lock when setting Lo_rundown state in order to * make sure that loop_validate_file() will fail if the "struct file" * which loop_configure()/loop_change_fd() found via fget() was this * loop device. */ err = loop_global_lock_killable(lo, true); if (err) return err; if (lo->lo_state != Lo_bound) { loop_global_unlock(lo, true); return -ENXIO; } /* * Mark the device for removing the backing device on last close. * If we are the only opener, also switch the state to roundown here to * prevent new openers from coming in. */ lo->lo_flags |= LO_FLAGS_AUTOCLEAR; if (disk_openers(lo->lo_disk) == 1) lo->lo_state = Lo_rundown; loop_global_unlock(lo, true); return 0; } static int loop_set_status(struct loop_device *lo, const struct loop_info64 *info) { int err; bool partscan = false; bool size_changed = false; unsigned int memflags; err = mutex_lock_killable(&lo->lo_mutex); if (err) return err; if (lo->lo_state != Lo_bound) { err = -ENXIO; goto out_unlock; } if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) { size_changed = true; sync_blockdev(lo->lo_device); invalidate_bdev(lo->lo_device); } /* I/O needs to be drained before changing lo_offset or lo_sizelimit */ memflags = blk_mq_freeze_queue(lo->lo_queue); err = loop_set_status_from_info(lo, info); if (err) goto out_unfreeze; partscan = !(lo->lo_flags & LO_FLAGS_PARTSCAN) && (info->lo_flags & LO_FLAGS_PARTSCAN); lo->lo_flags &= ~LOOP_SET_STATUS_CLEARABLE_FLAGS; lo->lo_flags |= (info->lo_flags & LOOP_SET_STATUS_SETTABLE_FLAGS); if (size_changed) { loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit, lo->lo_backing_file); loop_set_size(lo, new_size); } /* update the direct I/O flag if lo_offset changed */ loop_update_dio(lo); out_unfreeze: blk_mq_unfreeze_queue(lo->lo_queue, memflags); if (partscan) clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state); out_unlock: mutex_unlock(&lo->lo_mutex); if (partscan) loop_reread_partitions(lo); return err; } static int loop_get_status(struct loop_device *lo, struct loop_info64 *info) { struct path path; struct kstat stat; int ret; ret = mutex_lock_killable(&lo->lo_mutex); if (ret) return ret; if (lo->lo_state != Lo_bound) { mutex_unlock(&lo->lo_mutex); return -ENXIO; } memset(info, 0, sizeof(*info)); info->lo_number = lo->lo_number; info->lo_offset = lo->lo_offset; info->lo_sizelimit = lo->lo_sizelimit; info->lo_flags = lo->lo_flags; memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE); /* Drop lo_mutex while we call into the filesystem. */ path = lo->lo_backing_file->f_path; path_get(&path); mutex_unlock(&lo->lo_mutex); ret = vfs_getattr(&path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT); if (!ret) { info->lo_device = huge_encode_dev(stat.dev); info->lo_inode = stat.ino; info->lo_rdevice = huge_encode_dev(stat.rdev); } path_put(&path); return ret; } static void loop_info64_from_old(const struct loop_info *info, struct loop_info64 *info64) { memset(info64, 0, sizeof(*info64)); info64->lo_number = info->lo_number; info64->lo_device = info->lo_device; info64->lo_inode = info->lo_inode; info64->lo_rdevice = info->lo_rdevice; info64->lo_offset = info->lo_offset; info64->lo_sizelimit = 0; info64->lo_flags = info->lo_flags; memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE); } static int loop_info64_to_old(const struct loop_info64 *info64, struct loop_info *info) { memset(info, 0, sizeof(*info)); info->lo_number = info64->lo_number; info->lo_device = info64->lo_device; info->lo_inode = info64->lo_inode; info->lo_rdevice = info64->lo_rdevice; info->lo_offset = info64->lo_offset; info->lo_flags = info64->lo_flags; memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE); /* error in case values were truncated */ if (info->lo_device != info64->lo_device || info->lo_rdevice != info64->lo_rdevice || info->lo_inode != info64->lo_inode || info->lo_offset != info64->lo_offset) return -EOVERFLOW; return 0; } static int loop_set_status_old(struct loop_device *lo, const struct loop_info __user *arg) { struct loop_info info; struct loop_info64 info64; if (copy_from_user(&info, arg, sizeof (struct loop_info))) return -EFAULT; loop_info64_from_old(&info, &info64); return loop_set_status(lo, &info64); } static int loop_set_status64(struct loop_device *lo, const struct loop_info64 __user *arg) { struct loop_info64 info64; if (copy_from_user(&info64, arg, sizeof (struct loop_info64))) return -EFAULT; return loop_set_status(lo, &info64); } static int loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) { struct loop_info info; struct loop_info64 info64; int err; if (!arg) return -EINVAL; err = loop_get_status(lo, &info64); if (!err) err = loop_info64_to_old(&info64, &info); if (!err && copy_to_user(arg, &info, sizeof(info))) err = -EFAULT; return err; } static int loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) { struct loop_info64 info64; int err; if (!arg) return -EINVAL; err = loop_get_status(lo, &info64); if (!err && copy_to_user(arg, &info64, sizeof(info64))) err = -EFAULT; return err; } static int loop_set_capacity(struct loop_device *lo) { loff_t size; if (unlikely(lo->lo_state != Lo_bound)) return -ENXIO; size = get_loop_size(lo, lo->lo_backing_file); loop_set_size(lo, size); return 0; } static int loop_set_dio(struct loop_device *lo, unsigned long arg) { bool use_dio = !!arg; unsigned int memflags; if (lo->lo_state != Lo_bound) return -ENXIO; if (use_dio == !!(lo->lo_flags & LO_FLAGS_DIRECT_IO)) return 0; if (use_dio) { if (!lo_can_use_dio(lo)) return -EINVAL; /* flush dirty pages before starting to use direct I/O */ vfs_fsync(lo->lo_backing_file, 0); } memflags = blk_mq_freeze_queue(lo->lo_queue); if (use_dio) lo->lo_flags |= LO_FLAGS_DIRECT_IO; else lo->lo_flags &= ~LO_FLAGS_DIRECT_IO; blk_mq_unfreeze_queue(lo->lo_queue, memflags); return 0; } static int loop_set_block_size(struct loop_device *lo, unsigned long arg) { struct queue_limits lim; unsigned int memflags; int err = 0; if (lo->lo_state != Lo_bound) return -ENXIO; if (lo->lo_queue->limits.logical_block_size == arg) return 0; sync_blockdev(lo->lo_device); invalidate_bdev(lo->lo_device); lim = queue_limits_start_update(lo->lo_queue); loop_update_limits(lo, &lim, arg); memflags = blk_mq_freeze_queue(lo->lo_queue); err = queue_limits_commit_update(lo->lo_queue, &lim); loop_update_dio(lo); blk_mq_unfreeze_queue(lo->lo_queue, memflags); return err; } static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd, unsigned long arg) { int err; err = mutex_lock_killable(&lo->lo_mutex); if (err) return err; switch (cmd) { case LOOP_SET_CAPACITY: err = loop_set_capacity(lo); break; case LOOP_SET_DIRECT_IO: err = loop_set_dio(lo, arg); break; case LOOP_SET_BLOCK_SIZE: err = loop_set_block_size(lo, arg); break; default: err = -EINVAL; } mutex_unlock(&lo->lo_mutex); return err; } static int lo_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg) { struct loop_device *lo = bdev->bd_disk->private_data; void __user *argp = (void __user *) arg; int err; switch (cmd) { case LOOP_SET_FD: { /* * Legacy case - pass in a zeroed out struct loop_config with * only the file descriptor set , which corresponds with the * default parameters we'd have used otherwise. */ struct loop_config config; memset(&config, 0, sizeof(config)); config.fd = arg; return loop_configure(lo, mode, bdev, &config); } case LOOP_CONFIGURE: { struct loop_config config; if (copy_from_user(&config, argp, sizeof(config))) return -EFAULT; return loop_configure(lo, mode, bdev, &config); } case LOOP_CHANGE_FD: return loop_change_fd(lo, bdev, arg); case LOOP_CLR_FD: return loop_clr_fd(lo); case LOOP_SET_STATUS: err = -EPERM; if ((mode & BLK_OPEN_WRITE) || capable(CAP_SYS_ADMIN)) err = loop_set_status_old(lo, argp); break; case LOOP_GET_STATUS: return loop_get_status_old(lo, argp); case LOOP_SET_STATUS64: err = -EPERM; if ((mode & BLK_OPEN_WRITE) || capable(CAP_SYS_ADMIN)) err = loop_set_status64(lo, argp); break; case LOOP_GET_STATUS64: return loop_get_status64(lo, argp); case LOOP_SET_CAPACITY: case LOOP_SET_DIRECT_IO: case LOOP_SET_BLOCK_SIZE: if (!(mode & BLK_OPEN_WRITE) && !capable(CAP_SYS_ADMIN)) return -EPERM; fallthrough; default: err = lo_simple_ioctl(lo, cmd, arg); break; } return err; } #ifdef CONFIG_COMPAT struct compat_loop_info { compat_int_t lo_number; /* ioctl r/o */ compat_dev_t lo_device; /* ioctl r/o */ compat_ulong_t lo_inode; /* ioctl r/o */ compat_dev_t lo_rdevice; /* ioctl r/o */ compat_int_t lo_offset; compat_int_t lo_encrypt_type; /* obsolete, ignored */ compat_int_t lo_encrypt_key_size; /* ioctl w/o */ compat_int_t lo_flags; /* ioctl r/o */ char lo_name[LO_NAME_SIZE]; unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */ compat_ulong_t lo_init[2]; char reserved[4]; }; /* * Transfer 32-bit compatibility structure in userspace to 64-bit loop info * - noinlined to reduce stack space usage in main part of driver */ static noinline int loop_info64_from_compat(const struct compat_loop_info __user *arg, struct loop_info64 *info64) { struct compat_loop_info info; if (copy_from_user(&info, arg, sizeof(info))) return -EFAULT; memset(info64, 0, sizeof(*info64)); info64->lo_number = info.lo_number; info64->lo_device = info.lo_device; info64->lo_inode = info.lo_inode; info64->lo_rdevice = info.lo_rdevice; info64->lo_offset = info.lo_offset; info64->lo_sizelimit = 0; info64->lo_flags = info.lo_flags; memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE); return 0; } /* * Transfer 64-bit loop info to 32-bit compatibility structure in userspace * - noinlined to reduce stack space usage in main part of driver */ static noinline int loop_info64_to_compat(const struct loop_info64 *info64, struct compat_loop_info __user *arg) { struct compat_loop_info info; memset(&info, 0, sizeof(info)); info.lo_number = info64->lo_number; info.lo_device = info64->lo_device; info.lo_inode = info64->lo_inode; info.lo_rdevice = info64->lo_rdevice; info.lo_offset = info64->lo_offset; info.lo_flags = info64->lo_flags; memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE); /* error in case values were truncated */ if (info.lo_device != info64->lo_device || info.lo_rdevice != info64->lo_rdevice || info.lo_inode != info64->lo_inode || info.lo_offset != info64->lo_offset) return -EOVERFLOW; if (copy_to_user(arg, &info, sizeof(info))) return -EFAULT; return 0; } static int loop_set_status_compat(struct loop_device *lo, const struct compat_loop_info __user *arg) { struct loop_info64 info64; int ret; ret = loop_info64_from_compat(arg, &info64); if (ret < 0) return ret; return loop_set_status(lo, &info64); } static int loop_get_status_compat(struct loop_device *lo, struct compat_loop_info __user *arg) { struct loop_info64 info64; int err; if (!arg) return -EINVAL; err = loop_get_status(lo, &info64); if (!err) err = loop_info64_to_compat(&info64, arg); return err; } static int lo_compat_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg) { struct loop_device *lo = bdev->bd_disk->private_data; int err; switch(cmd) { case LOOP_SET_STATUS: err = loop_set_status_compat(lo, (const struct compat_loop_info __user *)arg); break; case LOOP_GET_STATUS: err = loop_get_status_compat(lo, (struct compat_loop_info __user *)arg); break; case LOOP_SET_CAPACITY: case LOOP_CLR_FD: case LOOP_GET_STATUS64: case LOOP_SET_STATUS64: case LOOP_CONFIGURE: arg = (unsigned long) compat_ptr(arg); fallthrough; case LOOP_SET_FD: case LOOP_CHANGE_FD: case LOOP_SET_BLOCK_SIZE: case LOOP_SET_DIRECT_IO: err = lo_ioctl(bdev, mode, cmd, arg); break; default: err = -ENOIOCTLCMD; break; } return err; } #endif static int lo_open(struct gendisk *disk, blk_mode_t mode) { struct loop_device *lo = disk->private_data; int err; err = mutex_lock_killable(&lo->lo_mutex); if (err) return err; if (lo->lo_state == Lo_deleting || lo->lo_state == Lo_rundown) err = -ENXIO; mutex_unlock(&lo->lo_mutex); return err; } static void lo_release(struct gendisk *disk) { struct loop_device *lo = disk->private_data; bool need_clear = false; if (disk_openers(disk) > 0) return; /* * Clear the backing device information if this is the last close of * a device that's been marked for auto clear, or on which LOOP_CLR_FD * has been called. */ mutex_lock(&lo->lo_mutex); if (lo->lo_state == Lo_bound && (lo->lo_flags & LO_FLAGS_AUTOCLEAR)) lo->lo_state = Lo_rundown; need_clear = (lo->lo_state == Lo_rundown); mutex_unlock(&lo->lo_mutex); if (need_clear) __loop_clr_fd(lo); } static void lo_free_disk(struct gendisk *disk) { struct loop_device *lo = disk->private_data; if (lo->workqueue) destroy_workqueue(lo->workqueue); loop_free_idle_workers(lo, true); timer_shutdown_sync(&lo->timer); mutex_destroy(&lo->lo_mutex); kfree(lo); } static const struct block_device_operations lo_fops = { .owner = THIS_MODULE, .open = lo_open, .release = lo_release, .ioctl = lo_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = lo_compat_ioctl, #endif .free_disk = lo_free_disk, }; /* * And now the modules code and kernel interface. */ /* * If max_loop is specified, create that many devices upfront. * This also becomes a hard limit. If max_loop is not specified, * the default isn't a hard limit (as before commit 85c50197716c * changed the default value from 0 for max_loop=0 reasons), just * create CONFIG_BLK_DEV_LOOP_MIN_COUNT loop devices at module * init time. Loop devices can be requested on-demand with the * /dev/loop-control interface, or be instantiated by accessing * a 'dead' device node. */ static int max_loop = CONFIG_BLK_DEV_LOOP_MIN_COUNT; #ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD static bool max_loop_specified; static int max_loop_param_set_int(const char *val, const struct kernel_param *kp) { int ret; ret = param_set_int(val, kp); if (ret < 0) return ret; max_loop_specified = true; return 0; } static const struct kernel_param_ops max_loop_param_ops = { .set = max_loop_param_set_int, .get = param_get_int, }; module_param_cb(max_loop, &max_loop_param_ops, &max_loop, 0444); MODULE_PARM_DESC(max_loop, "Maximum number of loop devices"); #else module_param(max_loop, int, 0444); MODULE_PARM_DESC(max_loop, "Initial number of loop devices"); #endif module_param(max_part, int, 0444); MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device"); static int hw_queue_depth = LOOP_DEFAULT_HW_Q_DEPTH; static int loop_set_hw_queue_depth(const char *s, const struct kernel_param *p) { int qd, ret; ret = kstrtoint(s, 0, &qd); if (ret < 0) return ret; if (qd < 1) return -EINVAL; hw_queue_depth = qd; return 0; } static const struct kernel_param_ops loop_hw_qdepth_param_ops = { .set = loop_set_hw_queue_depth, .get = param_get_int, }; device_param_cb(hw_queue_depth, &loop_hw_qdepth_param_ops, &hw_queue_depth, 0444); MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: " __stringify(LOOP_DEFAULT_HW_Q_DEPTH)); MODULE_DESCRIPTION("Loopback device support"); MODULE_LICENSE("GPL"); MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR); static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct request *rq = bd->rq; struct loop_cmd *cmd = blk_mq_rq_to_pdu(rq); struct loop_device *lo = rq->q->queuedata; blk_mq_start_request(rq); if (lo->lo_state != Lo_bound) return BLK_STS_IOERR; switch (req_op(rq)) { case REQ_OP_FLUSH: case REQ_OP_DISCARD: case REQ_OP_WRITE_ZEROES: cmd->use_aio = false; break; default: cmd->use_aio = lo->lo_flags & LO_FLAGS_DIRECT_IO; break; } /* always use the first bio's css */ cmd->blkcg_css = NULL; cmd->memcg_css = NULL; #ifdef CONFIG_BLK_CGROUP if (rq->bio) { cmd->blkcg_css = bio_blkcg_css(rq->bio); #ifdef CONFIG_MEMCG if (cmd->blkcg_css) { cmd->memcg_css = cgroup_get_e_css(cmd->blkcg_css->cgroup, &memory_cgrp_subsys); } #endif } #endif loop_queue_work(lo, cmd); return BLK_STS_OK; } static void loop_handle_cmd(struct loop_cmd *cmd) { struct cgroup_subsys_state *cmd_blkcg_css = cmd->blkcg_css; struct cgroup_subsys_state *cmd_memcg_css = cmd->memcg_css; struct request *rq = blk_mq_rq_from_pdu(cmd); const bool write = op_is_write(req_op(rq)); struct loop_device *lo = rq->q->queuedata; int ret = 0; struct mem_cgroup *old_memcg = NULL; const bool use_aio = cmd->use_aio; if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) { ret = -EIO; goto failed; } if (cmd_blkcg_css) kthread_associate_blkcg(cmd_blkcg_css); if (cmd_memcg_css) old_memcg = set_active_memcg( mem_cgroup_from_css(cmd_memcg_css)); /* * do_req_filebacked() may call blk_mq_complete_request() synchronously * or asynchronously if using aio. Hence, do not touch 'cmd' after * do_req_filebacked() has returned unless we are sure that 'cmd' has * not yet been completed. */ ret = do_req_filebacked(lo, rq); if (cmd_blkcg_css) kthread_associate_blkcg(NULL); if (cmd_memcg_css) { set_active_memcg(old_memcg); css_put(cmd_memcg_css); } failed: /* complete non-aio request */ if (!use_aio || ret) { if (ret == -EOPNOTSUPP) cmd->ret = ret; else cmd->ret = ret ? -EIO : 0; if (likely(!blk_should_fake_timeout(rq->q))) blk_mq_complete_request(rq); } } static void loop_process_work(struct loop_worker *worker, struct list_head *cmd_list, struct loop_device *lo) { int orig_flags = current->flags; struct loop_cmd *cmd; current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; spin_lock_irq(&lo->lo_work_lock); while (!list_empty(cmd_list)) { cmd = container_of( cmd_list->next, struct loop_cmd, list_entry); list_del(cmd_list->next); spin_unlock_irq(&lo->lo_work_lock); loop_handle_cmd(cmd); cond_resched(); spin_lock_irq(&lo->lo_work_lock); } /* * We only add to the idle list if there are no pending cmds * *and* the worker will not run again which ensures that it * is safe to free any worker on the idle list */ if (worker && !work_pending(&worker->work)) { worker->last_ran_at = jiffies; list_add_tail(&worker->idle_list, &lo->idle_worker_list); loop_set_timer(lo); } spin_unlock_irq(&lo->lo_work_lock); current->flags = orig_flags; } static void loop_workfn(struct work_struct *work) { struct loop_worker *worker = container_of(work, struct loop_worker, work); loop_process_work(worker, &worker->cmd_list, worker->lo); } static void loop_rootcg_workfn(struct work_struct *work) { struct loop_device *lo = container_of(work, struct loop_device, rootcg_work); loop_process_work(NULL, &lo->rootcg_cmd_list, lo); } static const struct blk_mq_ops loop_mq_ops = { .queue_rq = loop_queue_rq, .complete = lo_complete_rq, }; static int loop_add(int i) { struct queue_limits lim = { /* * Random number picked from the historic block max_sectors cap. */ .max_hw_sectors = 2560u, }; struct loop_device *lo; struct gendisk *disk; int err; err = -ENOMEM; lo = kzalloc(sizeof(*lo), GFP_KERNEL); if (!lo) goto out; lo->worker_tree = RB_ROOT; INIT_LIST_HEAD(&lo->idle_worker_list); timer_setup(&lo->timer, loop_free_idle_workers_timer, TIMER_DEFERRABLE); lo->lo_state = Lo_unbound; err = mutex_lock_killable(&loop_ctl_mutex); if (err) goto out_free_dev; /* allocate id, if @id >= 0, we're requesting that specific id */ if (i >= 0) { err = idr_alloc(&loop_index_idr, lo, i, i + 1, GFP_KERNEL); if (err == -ENOSPC) err = -EEXIST; } else { err = idr_alloc(&loop_index_idr, lo, 0, 0, GFP_KERNEL); } mutex_unlock(&loop_ctl_mutex); if (err < 0) goto out_free_dev; i = err; lo->tag_set.ops = &loop_mq_ops; lo->tag_set.nr_hw_queues = 1; lo->tag_set.queue_depth = hw_queue_depth; lo->tag_set.numa_node = NUMA_NO_NODE; lo->tag_set.cmd_size = sizeof(struct loop_cmd); lo->tag_set.flags = BLK_MQ_F_STACKING | BLK_MQ_F_NO_SCHED_BY_DEFAULT; lo->tag_set.driver_data = lo; err = blk_mq_alloc_tag_set(&lo->tag_set); if (err) goto out_free_idr; disk = lo->lo_disk = blk_mq_alloc_disk(&lo->tag_set, &lim, lo); if (IS_ERR(disk)) { err = PTR_ERR(disk); goto out_cleanup_tags; } lo->lo_queue = lo->lo_disk->queue; /* * Disable partition scanning by default. The in-kernel partition * scanning can be requested individually per-device during its * setup. Userspace can always add and remove partitions from all * devices. The needed partition minors are allocated from the * extended minor space, the main loop device numbers will continue * to match the loop minors, regardless of the number of partitions * used. * * If max_part is given, partition scanning is globally enabled for * all loop devices. The minors for the main loop devices will be * multiples of max_part. * * Note: Global-for-all-devices, set-only-at-init, read-only module * parameteters like 'max_loop' and 'max_part' make things needlessly * complicated, are too static, inflexible and may surprise * userspace tools. Parameters like this in general should be avoided. */ if (!part_shift) set_bit(GD_SUPPRESS_PART_SCAN, &disk->state); mutex_init(&lo->lo_mutex); lo->lo_number = i; spin_lock_init(&lo->lo_lock); spin_lock_init(&lo->lo_work_lock); INIT_WORK(&lo->rootcg_work, loop_rootcg_workfn); INIT_LIST_HEAD(&lo->rootcg_cmd_list); disk->major = LOOP_MAJOR; disk->first_minor = i << part_shift; disk->minors = 1 << part_shift; disk->fops = &lo_fops; disk->private_data = lo; disk->queue = lo->lo_queue; disk->events = DISK_EVENT_MEDIA_CHANGE; disk->event_flags = DISK_EVENT_FLAG_UEVENT; sprintf(disk->disk_name, "loop%d", i); /* Make this loop device reachable from pathname. */ err = add_disk(disk); if (err) goto out_cleanup_disk; /* Show this loop device. */ mutex_lock(&loop_ctl_mutex); lo->idr_visible = true; mutex_unlock(&loop_ctl_mutex); return i; out_cleanup_disk: put_disk(disk); out_cleanup_tags: blk_mq_free_tag_set(&lo->tag_set); out_free_idr: mutex_lock(&loop_ctl_mutex); idr_remove(&loop_index_idr, i); mutex_unlock(&loop_ctl_mutex); out_free_dev: kfree(lo); out: return err; } static void loop_remove(struct loop_device *lo) { /* Make this loop device unreachable from pathname. */ del_gendisk(lo->lo_disk); blk_mq_free_tag_set(&lo->tag_set); mutex_lock(&loop_ctl_mutex); idr_remove(&loop_index_idr, lo->lo_number); mutex_unlock(&loop_ctl_mutex); put_disk(lo->lo_disk); } #ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD static void loop_probe(dev_t dev) { int idx = MINOR(dev) >> part_shift; if (max_loop_specified && max_loop && idx >= max_loop) return; loop_add(idx); } #else #define loop_probe NULL #endif /* !CONFIG_BLOCK_LEGACY_AUTOLOAD */ static int loop_control_remove(int idx) { struct loop_device *lo; int ret; if (idx < 0) { pr_warn_once("deleting an unspecified loop device is not supported.\n"); return -EINVAL; } /* Hide this loop device for serialization. */ ret = mutex_lock_killable(&loop_ctl_mutex); if (ret) return ret; lo = idr_find(&loop_index_idr, idx); if (!lo || !lo->idr_visible) ret = -ENODEV; else lo->idr_visible = false; mutex_unlock(&loop_ctl_mutex); if (ret) return ret; /* Check whether this loop device can be removed. */ ret = mutex_lock_killable(&lo->lo_mutex); if (ret) goto mark_visible; if (lo->lo_state != Lo_unbound || disk_openers(lo->lo_disk) > 0) { mutex_unlock(&lo->lo_mutex); ret = -EBUSY; goto mark_visible; } /* Mark this loop device as no more bound, but not quite unbound yet */ lo->lo_state = Lo_deleting; mutex_unlock(&lo->lo_mutex); loop_remove(lo); return 0; mark_visible: /* Show this loop device again. */ mutex_lock(&loop_ctl_mutex); lo->idr_visible = true; mutex_unlock(&loop_ctl_mutex); return ret; } static int loop_control_get_free(int idx) { struct loop_device *lo; int id, ret; ret = mutex_lock_killable(&loop_ctl_mutex); if (ret) return ret; idr_for_each_entry(&loop_index_idr, lo, id) { /* Hitting a race results in creating a new loop device which is harmless. */ if (lo->idr_visible && data_race(lo->lo_state) == Lo_unbound) goto found; } mutex_unlock(&loop_ctl_mutex); return loop_add(-1); found: mutex_unlock(&loop_ctl_mutex); return id; } static long loop_control_ioctl(struct file *file, unsigned int cmd, unsigned long parm) { switch (cmd) { case LOOP_CTL_ADD: return loop_add(parm); case LOOP_CTL_REMOVE: return loop_control_remove(parm); case LOOP_CTL_GET_FREE: return loop_control_get_free(parm); default: return -ENOSYS; } } static const struct file_operations loop_ctl_fops = { .open = nonseekable_open, .unlocked_ioctl = loop_control_ioctl, .compat_ioctl = loop_control_ioctl, .owner = THIS_MODULE, .llseek = noop_llseek, }; static struct miscdevice loop_misc = { .minor = LOOP_CTRL_MINOR, .name = "loop-control", .fops = &loop_ctl_fops, }; MODULE_ALIAS_MISCDEV(LOOP_CTRL_MINOR); MODULE_ALIAS("devname:loop-control"); static int __init loop_init(void) { int i; int err; part_shift = 0; if (max_part > 0) { part_shift = fls(max_part); /* * Adjust max_part according to part_shift as it is exported * to user space so that user can decide correct minor number * if [s]he want to create more devices. * * Note that -1 is required because partition 0 is reserved * for the whole disk. */ max_part = (1UL << part_shift) - 1; } if ((1UL << part_shift) > DISK_MAX_PARTS) { err = -EINVAL; goto err_out; } if (max_loop > 1UL << (MINORBITS - part_shift)) { err = -EINVAL; goto err_out; } err = misc_register(&loop_misc); if (err < 0) goto err_out; if (__register_blkdev(LOOP_MAJOR, "loop", loop_probe)) { err = -EIO; goto misc_out; } /* pre-create number of devices given by config or max_loop */ for (i = 0; i < max_loop; i++) loop_add(i); printk(KERN_INFO "loop: module loaded\n"); return 0; misc_out: misc_deregister(&loop_misc); err_out: return err; } static void __exit loop_exit(void) { struct loop_device *lo; int id; unregister_blkdev(LOOP_MAJOR, "loop"); misc_deregister(&loop_misc); /* * There is no need to use loop_ctl_mutex here, for nobody else can * access loop_index_idr when this module is unloading (unless forced * module unloading is requested). If this is not a clean unloading, * we have no means to avoid kernel crash. */ idr_for_each_entry(&loop_index_idr, lo, id) loop_remove(lo); idr_destroy(&loop_index_idr); } module_init(loop_init); module_exit(loop_exit); #ifndef MODULE static int __init max_loop_setup(char *str) { max_loop = simple_strtol(str, NULL, 0); #ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD max_loop_specified = true; #endif return 1; } __setup("max_loop=", max_loop_setup); #endif |
182 184 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 | // SPDX-License-Identifier: GPL-2.0-only /* * scsi_error.c Copyright (C) 1997 Eric Youngdale * * SCSI error/timeout handling * Initial versions: Eric Youngdale. Based upon conversations with * Leonard Zubkoff and David Miller at Linux Expo, * ideas originating from all over the place. * * Restructured scsi_unjam_host and associated functions. * September 04, 2002 Mike Anderson (andmike@us.ibm.com) * * Forward port of Russell King's (rmk@arm.linux.org.uk) changes and * minor cleanups. * September 30, 2002 Mike Anderson (andmike@us.ibm.com) */ #include <linux/module.h> #include <linux/sched.h> #include <linux/gfp.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/kernel.h> #include <linux/freezer.h> #include <linux/kthread.h> #include <linux/interrupt.h> #include <linux/blkdev.h> #include <linux/delay.h> #include <linux/jiffies.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_dbg.h> #include <scsi/scsi_device.h> #include <scsi/scsi_driver.h> #include <scsi/scsi_eh.h> #include <scsi/scsi_common.h> #include <scsi/scsi_transport.h> #include <scsi/scsi_host.h> #include <scsi/scsi_ioctl.h> #include <scsi/scsi_dh.h> #include <scsi/scsi_devinfo.h> #include <scsi/sg.h> #include "scsi_priv.h" #include "scsi_logging.h" #include "scsi_transport_api.h" #include <trace/events/scsi.h> #include <linux/unaligned.h> /* * These should *probably* be handled by the host itself. * Since it is allowed to sleep, it probably should. */ #define BUS_RESET_SETTLE_TIME (10) #define HOST_RESET_SETTLE_TIME (10) static int scsi_eh_try_stu(struct scsi_cmnd *scmd); static enum scsi_disposition scsi_try_to_abort_cmd(const struct scsi_host_template *, struct scsi_cmnd *); void scsi_eh_wakeup(struct Scsi_Host *shost, unsigned int busy) { lockdep_assert_held(shost->host_lock); if (busy == shost->host_failed) { trace_scsi_eh_wakeup(shost); wake_up_process(shost->ehandler); SCSI_LOG_ERROR_RECOVERY(5, shost_printk(KERN_INFO, shost, "Waking error handler thread\n")); } } /** * scsi_schedule_eh - schedule EH for SCSI host * @shost: SCSI host to invoke error handling on. * * Schedule SCSI EH without scmd. */ void scsi_schedule_eh(struct Scsi_Host *shost) { unsigned long flags; spin_lock_irqsave(shost->host_lock, flags); if (scsi_host_set_state(shost, SHOST_RECOVERY) == 0 || scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY) == 0) { shost->host_eh_scheduled++; scsi_eh_wakeup(shost, scsi_host_busy(shost)); } spin_unlock_irqrestore(shost->host_lock, flags); } EXPORT_SYMBOL_GPL(scsi_schedule_eh); static int scsi_host_eh_past_deadline(struct Scsi_Host *shost) { if (!shost->last_reset || shost->eh_deadline == -1) return 0; /* * 32bit accesses are guaranteed to be atomic * (on all supported architectures), so instead * of using a spinlock we can as well double check * if eh_deadline has been set to 'off' during the * time_before call. */ if (time_before(jiffies, shost->last_reset + shost->eh_deadline) && shost->eh_deadline > -1) return 0; return 1; } static bool scsi_cmd_retry_allowed(struct scsi_cmnd *cmd) { if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT) return true; return ++cmd->retries <= cmd->allowed; } static bool scsi_eh_should_retry_cmd(struct scsi_cmnd *cmd) { struct scsi_device *sdev = cmd->device; struct Scsi_Host *host = sdev->host; if (host->hostt->eh_should_retry_cmd) return host->hostt->eh_should_retry_cmd(cmd); return true; } /** * scmd_eh_abort_handler - Handle command aborts * @work: command to be aborted. * * Note: this function must be called only for a command that has timed out. * Because the block layer marks a request as complete before it calls * scsi_timeout(), a .scsi_done() call from the LLD for a command that has * timed out do not have any effect. Hence it is safe to call * scsi_finish_command() from this function. */ void scmd_eh_abort_handler(struct work_struct *work) { struct scsi_cmnd *scmd = container_of(work, struct scsi_cmnd, abort_work.work); struct scsi_device *sdev = scmd->device; struct Scsi_Host *shost = sdev->host; enum scsi_disposition rtn; unsigned long flags; if (scsi_host_eh_past_deadline(shost)) { SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, "eh timeout, not aborting\n")); goto out; } SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, "aborting command\n")); rtn = scsi_try_to_abort_cmd(shost->hostt, scmd); if (rtn != SUCCESS) { SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, "cmd abort %s\n", (rtn == FAST_IO_FAIL) ? "not send" : "failed")); goto out; } set_host_byte(scmd, DID_TIME_OUT); if (scsi_host_eh_past_deadline(shost)) { SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, "eh timeout, not retrying " "aborted command\n")); goto out; } spin_lock_irqsave(shost->host_lock, flags); list_del_init(&scmd->eh_entry); /* * If the abort succeeds, and there is no further * EH action, clear the ->last_reset time. */ if (list_empty(&shost->eh_abort_list) && list_empty(&shost->eh_cmd_q)) if (shost->eh_deadline != -1) shost->last_reset = 0; spin_unlock_irqrestore(shost->host_lock, flags); if (!scsi_noretry_cmd(scmd) && scsi_cmd_retry_allowed(scmd) && scsi_eh_should_retry_cmd(scmd)) { SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_WARNING, scmd, "retry aborted command\n")); scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY); } else { SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_WARNING, scmd, "finish aborted command\n")); scsi_finish_command(scmd); } return; out: spin_lock_irqsave(shost->host_lock, flags); list_del_init(&scmd->eh_entry); spin_unlock_irqrestore(shost->host_lock, flags); scsi_eh_scmd_add(scmd); } /** * scsi_abort_command - schedule a command abort * @scmd: scmd to abort. * * We only need to abort commands after a command timeout */ static int scsi_abort_command(struct scsi_cmnd *scmd) { struct scsi_device *sdev = scmd->device; struct Scsi_Host *shost = sdev->host; unsigned long flags; if (!shost->hostt->eh_abort_handler) { /* No abort handler, fail command directly */ return FAILED; } if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) { /* * Retry after abort failed, escalate to next level. */ SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, "previous abort failed\n")); BUG_ON(delayed_work_pending(&scmd->abort_work)); return FAILED; } spin_lock_irqsave(shost->host_lock, flags); if (shost->eh_deadline != -1 && !shost->last_reset) shost->last_reset = jiffies; BUG_ON(!list_empty(&scmd->eh_entry)); list_add_tail(&scmd->eh_entry, &shost->eh_abort_list); spin_unlock_irqrestore(shost->host_lock, flags); scmd->eh_eflags |= SCSI_EH_ABORT_SCHEDULED; SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, "abort scheduled\n")); queue_delayed_work(shost->tmf_work_q, &scmd->abort_work, HZ / 100); return SUCCESS; } /** * scsi_eh_reset - call into ->eh_action to reset internal counters * @scmd: scmd to run eh on. * * The scsi driver might be carrying internal state about the * devices, so we need to call into the driver to reset the * internal state once the error handler is started. */ static void scsi_eh_reset(struct scsi_cmnd *scmd) { if (!blk_rq_is_passthrough(scsi_cmd_to_rq(scmd))) { struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd); if (sdrv->eh_reset) sdrv->eh_reset(scmd); } } static void scsi_eh_inc_host_failed(struct rcu_head *head) { struct scsi_cmnd *scmd = container_of(head, typeof(*scmd), rcu); struct Scsi_Host *shost = scmd->device->host; unsigned int busy = scsi_host_busy(shost); unsigned long flags; spin_lock_irqsave(shost->host_lock, flags); shost->host_failed++; scsi_eh_wakeup(shost, busy); spin_unlock_irqrestore(shost->host_lock, flags); } /** * scsi_eh_scmd_add - add scsi cmd to error handling. * @scmd: scmd to run eh on. */ void scsi_eh_scmd_add(struct scsi_cmnd *scmd) { struct Scsi_Host *shost = scmd->device->host; unsigned long flags; int ret; WARN_ON_ONCE(!shost->ehandler); WARN_ON_ONCE(!test_bit(SCMD_STATE_INFLIGHT, &scmd->state)); spin_lock_irqsave(shost->host_lock, flags); if (scsi_host_set_state(shost, SHOST_RECOVERY)) { ret = scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY); WARN_ON_ONCE(ret); } if (shost->eh_deadline != -1 && !shost->last_reset) shost->last_reset = jiffies; scsi_eh_reset(scmd); list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q); spin_unlock_irqrestore(shost->host_lock, flags); /* * Ensure that all tasks observe the host state change before the * host_failed change. */ call_rcu_hurry(&scmd->rcu, scsi_eh_inc_host_failed); } /** * scsi_timeout - Timeout function for normal scsi commands. * @req: request that is timing out. * * Notes: * We do not need to lock this. There is the potential for a race * only in that the normal completion handling might run, but if the * normal completion function determines that the timer has already * fired, then it mustn't do anything. */ enum blk_eh_timer_return scsi_timeout(struct request *req) { struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req); struct Scsi_Host *host = scmd->device->host; trace_scsi_dispatch_cmd_timeout(scmd); scsi_log_completion(scmd, TIMEOUT_ERROR); atomic_inc(&scmd->device->iotmo_cnt); if (host->eh_deadline != -1 && !host->last_reset) host->last_reset = jiffies; if (host->hostt->eh_timed_out) { switch (host->hostt->eh_timed_out(scmd)) { case SCSI_EH_DONE: return BLK_EH_DONE; case SCSI_EH_RESET_TIMER: return BLK_EH_RESET_TIMER; case SCSI_EH_NOT_HANDLED: break; } } /* * If scsi_done() has already set SCMD_STATE_COMPLETE, do not modify * *scmd. */ if (test_and_set_bit(SCMD_STATE_COMPLETE, &scmd->state)) return BLK_EH_DONE; atomic_inc(&scmd->device->iodone_cnt); if (scsi_abort_command(scmd) != SUCCESS) { set_host_byte(scmd, DID_TIME_OUT); scsi_eh_scmd_add(scmd); } return BLK_EH_DONE; } /** * scsi_block_when_processing_errors - Prevent cmds from being queued. * @sdev: Device on which we are performing recovery. * * Description: * We block until the host is out of error recovery, and then check to * see whether the host or the device is offline. * * Return value: * 0 when dev was taken offline by error recovery. 1 OK to proceed. */ int scsi_block_when_processing_errors(struct scsi_device *sdev) { int online; wait_event(sdev->host->host_wait, !scsi_host_in_recovery(sdev->host)); online = scsi_device_online(sdev); return online; } EXPORT_SYMBOL(scsi_block_when_processing_errors); #ifdef CONFIG_SCSI_LOGGING /** * scsi_eh_prt_fail_stats - Log info on failures. * @shost: scsi host being recovered. * @work_q: Queue of scsi cmds to process. */ static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost, struct list_head *work_q) { struct scsi_cmnd *scmd; struct scsi_device *sdev; int total_failures = 0; int cmd_failed = 0; int cmd_cancel = 0; int devices_failed = 0; shost_for_each_device(sdev, shost) { list_for_each_entry(scmd, work_q, eh_entry) { if (scmd->device == sdev) { ++total_failures; if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) ++cmd_cancel; else ++cmd_failed; } } if (cmd_cancel || cmd_failed) { SCSI_LOG_ERROR_RECOVERY(3, shost_printk(KERN_INFO, shost, "%s: cmds failed: %d, cancel: %d\n", __func__, cmd_failed, cmd_cancel)); cmd_cancel = 0; cmd_failed = 0; ++devices_failed; } } SCSI_LOG_ERROR_RECOVERY(2, shost_printk(KERN_INFO, shost, "Total of %d commands on %d" " devices require eh work\n", total_failures, devices_failed)); } #endif /** * scsi_report_lun_change - Set flag on all *other* devices on the same target * to indicate that a UNIT ATTENTION is expected. * @sdev: Device reporting the UNIT ATTENTION */ static void scsi_report_lun_change(struct scsi_device *sdev) { sdev->sdev_target->expecting_lun_change = 1; } /** * scsi_report_sense - Examine scsi sense information and log messages for * certain conditions, also issue uevents for some of them. * @sdev: Device reporting the sense code * @sshdr: sshdr to be examined */ static void scsi_report_sense(struct scsi_device *sdev, struct scsi_sense_hdr *sshdr) { enum scsi_device_event evt_type = SDEV_EVT_MAXBITS; /* i.e. none */ if (sshdr->sense_key == UNIT_ATTENTION) { if (sshdr->asc == 0x3f && sshdr->ascq == 0x03) { evt_type = SDEV_EVT_INQUIRY_CHANGE_REPORTED; sdev_printk(KERN_WARNING, sdev, "Inquiry data has changed"); } else if (sshdr->asc == 0x3f && sshdr->ascq == 0x0e) { evt_type = SDEV_EVT_LUN_CHANGE_REPORTED; scsi_report_lun_change(sdev); sdev_printk(KERN_WARNING, sdev, "LUN assignments on this target have " "changed. The Linux SCSI layer does not " "automatically remap LUN assignments.\n"); } else if (sshdr->asc == 0x3f) sdev_printk(KERN_WARNING, sdev, "Operating parameters on this target have " "changed. The Linux SCSI layer does not " "automatically adjust these parameters.\n"); if (sshdr->asc == 0x38 && sshdr->ascq == 0x07) { evt_type = SDEV_EVT_SOFT_THRESHOLD_REACHED_REPORTED; sdev_printk(KERN_WARNING, sdev, "Warning! Received an indication that the " "LUN reached a thin provisioning soft " "threshold.\n"); } if (sshdr->asc == 0x29) { evt_type = SDEV_EVT_POWER_ON_RESET_OCCURRED; /* * Do not print message if it is an expected side-effect * of runtime PM. */ if (!sdev->silence_suspend) sdev_printk(KERN_WARNING, sdev, "Power-on or device reset occurred\n"); } if (sshdr->asc == 0x2a && sshdr->ascq == 0x01) { evt_type = SDEV_EVT_MODE_PARAMETER_CHANGE_REPORTED; sdev_printk(KERN_WARNING, sdev, "Mode parameters changed"); } else if (sshdr->asc == 0x2a && sshdr->ascq == 0x06) { evt_type = SDEV_EVT_ALUA_STATE_CHANGE_REPORTED; sdev_printk(KERN_WARNING, sdev, "Asymmetric access state changed"); } else if (sshdr->asc == 0x2a && sshdr->ascq == 0x09) { evt_type = SDEV_EVT_CAPACITY_CHANGE_REPORTED; sdev_printk(KERN_WARNING, sdev, "Capacity data has changed"); } else if (sshdr->asc == 0x2a) sdev_printk(KERN_WARNING, sdev, "Parameters changed"); } if (evt_type != SDEV_EVT_MAXBITS) { set_bit(evt_type, sdev->pending_events); schedule_work(&sdev->event_work); } } static inline void set_scsi_ml_byte(struct scsi_cmnd *cmd, u8 status) { cmd->result = (cmd->result & 0xffff00ff) | (status << 8); } /** * scsi_check_sense - Examine scsi cmd sense * @scmd: Cmd to have sense checked. * * Return value: * SUCCESS or FAILED or NEEDS_RETRY or ADD_TO_MLQUEUE * * Notes: * When a deferred error is detected the current command has * not been executed and needs retrying. */ enum scsi_disposition scsi_check_sense(struct scsi_cmnd *scmd) { struct request *req = scsi_cmd_to_rq(scmd); struct scsi_device *sdev = scmd->device; struct scsi_sense_hdr sshdr; if (! scsi_command_normalize_sense(scmd, &sshdr)) return FAILED; /* no valid sense data */ scsi_report_sense(sdev, &sshdr); if (sshdr.sense_key == UNIT_ATTENTION) { /* * Increment the counters for Power on/Reset or New Media so * that all ULDs interested in these can see that those have * happened, even if someone else gets the sense data. */ if (sshdr.asc == 0x28) scmd->device->ua_new_media_ctr++; else if (sshdr.asc == 0x29) scmd->device->ua_por_ctr++; } if (scsi_sense_is_deferred(&sshdr)) return NEEDS_RETRY; if (sdev->handler && sdev->handler->check_sense) { enum scsi_disposition rc; rc = sdev->handler->check_sense(sdev, &sshdr); if (rc != SCSI_RETURN_NOT_HANDLED) return rc; /* handler does not care. Drop down to default handling */ } if (scmd->cmnd[0] == TEST_UNIT_READY && scmd->submitter != SUBMITTED_BY_SCSI_ERROR_HANDLER) /* * nasty: for mid-layer issued TURs, we need to return the * actual sense data without any recovery attempt. For eh * issued ones, we need to try to recover and interpret */ return SUCCESS; /* * Previous logic looked for FILEMARK, EOM or ILI which are * mainly associated with tapes and returned SUCCESS. */ if (sshdr.response_code == 0x70) { /* fixed format */ if (scmd->sense_buffer[2] & 0xe0) return SUCCESS; } else { /* * descriptor format: look for "stream commands sense data * descriptor" (see SSC-3). Assume single sense data * descriptor. Ignore ILI from SBC-2 READ LONG and WRITE LONG. */ if ((sshdr.additional_length > 3) && (scmd->sense_buffer[8] == 0x4) && (scmd->sense_buffer[11] & 0xe0)) return SUCCESS; } switch (sshdr.sense_key) { case NO_SENSE: return SUCCESS; case RECOVERED_ERROR: return /* soft_error */ SUCCESS; case ABORTED_COMMAND: if (sshdr.asc == 0x10) /* DIF */ return SUCCESS; /* * Check aborts due to command duration limit policy: * ABORTED COMMAND additional sense code with the * COMMAND TIMEOUT BEFORE PROCESSING or * COMMAND TIMEOUT DURING PROCESSING or * COMMAND TIMEOUT DURING PROCESSING DUE TO ERROR RECOVERY * additional sense code qualifiers. */ if (sshdr.asc == 0x2e && sshdr.ascq >= 0x01 && sshdr.ascq <= 0x03) { set_scsi_ml_byte(scmd, SCSIML_STAT_DL_TIMEOUT); req->cmd_flags |= REQ_FAILFAST_DEV; req->rq_flags |= RQF_QUIET; return SUCCESS; } if (sshdr.asc == 0x44 && sdev->sdev_bflags & BLIST_RETRY_ITF) return ADD_TO_MLQUEUE; if (sshdr.asc == 0xc1 && sshdr.ascq == 0x01 && sdev->sdev_bflags & BLIST_RETRY_ASC_C1) return ADD_TO_MLQUEUE; return NEEDS_RETRY; case NOT_READY: case UNIT_ATTENTION: /* * if we are expecting a cc/ua because of a bus reset that we * performed, treat this just as a retry. otherwise this is * information that we should pass up to the upper-level driver * so that we can deal with it there. */ if (scmd->device->expecting_cc_ua) { /* * Because some device does not queue unit * attentions correctly, we carefully check * additional sense code and qualifier so as * not to squash media change unit attention. */ if (sshdr.asc != 0x28 || sshdr.ascq != 0x00) { scmd->device->expecting_cc_ua = 0; return NEEDS_RETRY; } } /* * we might also expect a cc/ua if another LUN on the target * reported a UA with an ASC/ASCQ of 3F 0E - * REPORTED LUNS DATA HAS CHANGED. */ if (scmd->device->sdev_target->expecting_lun_change && sshdr.asc == 0x3f && sshdr.ascq == 0x0e) return NEEDS_RETRY; /* * if the device is in the process of becoming ready, we * should retry. */ if ((sshdr.asc == 0x04) && (sshdr.ascq == 0x01)) return NEEDS_RETRY; /* * if the device is not started, we need to wake * the error handler to start the motor */ if (scmd->device->allow_restart && (sshdr.asc == 0x04) && (sshdr.ascq == 0x02)) return FAILED; /* * Pass the UA upwards for a determination in the completion * functions. */ return SUCCESS; /* these are not supported */ case DATA_PROTECT: if (sshdr.asc == 0x27 && sshdr.ascq == 0x07) { /* Thin provisioning hard threshold reached */ set_scsi_ml_byte(scmd, SCSIML_STAT_NOSPC); return SUCCESS; } fallthrough; case COPY_ABORTED: case VOLUME_OVERFLOW: case MISCOMPARE: case BLANK_CHECK: set_scsi_ml_byte(scmd, SCSIML_STAT_TGT_FAILURE); return SUCCESS; case MEDIUM_ERROR: if (sshdr.asc == 0x11 || /* UNRECOVERED READ ERR */ sshdr.asc == 0x13 || /* AMNF DATA FIELD */ sshdr.asc == 0x14) { /* RECORD NOT FOUND */ set_scsi_ml_byte(scmd, SCSIML_STAT_MED_ERROR); return SUCCESS; } return NEEDS_RETRY; case HARDWARE_ERROR: if (scmd->device->retry_hwerror) return ADD_TO_MLQUEUE; else set_scsi_ml_byte(scmd, SCSIML_STAT_TGT_FAILURE); fallthrough; case ILLEGAL_REQUEST: if (sshdr.asc == 0x20 || /* Invalid command operation code */ sshdr.asc == 0x21 || /* Logical block address out of range */ sshdr.asc == 0x22 || /* Invalid function */ sshdr.asc == 0x24 || /* Invalid field in cdb */ sshdr.asc == 0x26 || /* Parameter value invalid */ sshdr.asc == 0x27) { /* Write protected */ set_scsi_ml_byte(scmd, SCSIML_STAT_TGT_FAILURE); } return SUCCESS; case COMPLETED: /* * A command using command duration limits (CDL) with a * descriptor set with policy 0xD may be completed with success * and the sense data DATA CURRENTLY UNAVAILABLE, indicating * that the command was in fact aborted because it exceeded its * duration limit. Never retry these commands. */ if (sshdr.asc == 0x55 && sshdr.ascq == 0x0a) { set_scsi_ml_byte(scmd, SCSIML_STAT_DL_TIMEOUT); req->cmd_flags |= REQ_FAILFAST_DEV; req->rq_flags |= RQF_QUIET; } return SUCCESS; default: return SUCCESS; } } EXPORT_SYMBOL_GPL(scsi_check_sense); static void scsi_handle_queue_ramp_up(struct scsi_device *sdev) { const struct scsi_host_template *sht = sdev->host->hostt; struct scsi_device *tmp_sdev; if (!sht->track_queue_depth || sdev->queue_depth >= sdev->max_queue_depth) return; if (time_before(jiffies, sdev->last_queue_ramp_up + sdev->queue_ramp_up_period)) return; if (time_before(jiffies, sdev->last_queue_full_time + sdev->queue_ramp_up_period)) return; /* * Walk all devices of a target and do * ramp up on them. */ shost_for_each_device(tmp_sdev, sdev->host) { if (tmp_sdev->channel != sdev->channel || tmp_sdev->id != sdev->id || tmp_sdev->queue_depth == sdev->max_queue_depth) continue; scsi_change_queue_depth(tmp_sdev, tmp_sdev->queue_depth + 1); sdev->last_queue_ramp_up = jiffies; } } static void scsi_handle_queue_full(struct scsi_device *sdev) { const struct scsi_host_template *sht = sdev->host->hostt; struct scsi_device *tmp_sdev; if (!sht->track_queue_depth) return; shost_for_each_device(tmp_sdev, sdev->host) { if (tmp_sdev->channel != sdev->channel || tmp_sdev->id != sdev->id) continue; /* * We do not know the number of commands that were at * the device when we got the queue full so we start * from the highest possible value and work our way down. */ scsi_track_queue_full(tmp_sdev, tmp_sdev->queue_depth - 1); } } /** * scsi_eh_completed_normally - Disposition a eh cmd on return from LLD. * @scmd: SCSI cmd to examine. * * Notes: * This is *only* called when we are examining the status of commands * queued during error recovery. the main difference here is that we * don't allow for the possibility of retries here, and we are a lot * more restrictive about what we consider acceptable. */ static enum scsi_disposition scsi_eh_completed_normally(struct scsi_cmnd *scmd) { /* * first check the host byte, to see if there is anything in there * that would indicate what we need to do. */ if (host_byte(scmd->result) == DID_RESET) { /* * rats. we are already in the error handler, so we now * get to try and figure out what to do next. if the sense * is valid, we have a pretty good idea of what to do. * if not, we mark it as FAILED. */ return scsi_check_sense(scmd); } if (host_byte(scmd->result) != DID_OK) return FAILED; /* * now, check the status byte to see if this indicates * anything special. */ switch (get_status_byte(scmd)) { case SAM_STAT_GOOD: scsi_handle_queue_ramp_up(scmd->device); if (scmd->sense_buffer && SCSI_SENSE_VALID(scmd)) /* * If we have sense data, call scsi_check_sense() in * order to set the correct SCSI ML byte (if any). * No point in checking the return value, since the * command has already completed successfully. */ scsi_check_sense(scmd); fallthrough; case SAM_STAT_COMMAND_TERMINATED: return SUCCESS; case SAM_STAT_CHECK_CONDITION: return scsi_check_sense(scmd); case SAM_STAT_CONDITION_MET: case SAM_STAT_INTERMEDIATE: case SAM_STAT_INTERMEDIATE_CONDITION_MET: /* * who knows? FIXME(eric) */ return SUCCESS; case SAM_STAT_RESERVATION_CONFLICT: if (scmd->cmnd[0] == TEST_UNIT_READY) /* it is a success, we probed the device and * found it */ return SUCCESS; /* otherwise, we failed to send the command */ return FAILED; case SAM_STAT_TASK_SET_FULL: scsi_handle_queue_full(scmd->device); fallthrough; case SAM_STAT_BUSY: return NEEDS_RETRY; default: return FAILED; } return FAILED; } /** * scsi_eh_done - Completion function for error handling. * @scmd: Cmd that is done. */ void scsi_eh_done(struct scsi_cmnd *scmd) { struct completion *eh_action; SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, "%s result: %x\n", __func__, scmd->result)); eh_action = scmd->device->host->eh_action; if (eh_action) complete(eh_action); } /** * scsi_try_host_reset - ask host adapter to reset itself * @scmd: SCSI cmd to send host reset. */ static enum scsi_disposition scsi_try_host_reset(struct scsi_cmnd *scmd) { unsigned long flags; enum scsi_disposition rtn; struct Scsi_Host *host = scmd->device->host; const struct scsi_host_template *hostt = host->hostt; SCSI_LOG_ERROR_RECOVERY(3, shost_printk(KERN_INFO, host, "Snd Host RST\n")); if (!hostt->eh_host_reset_handler) return FAILED; rtn = hostt->eh_host_reset_handler(scmd); if (rtn == SUCCESS) { if (!hostt->skip_settle_delay) ssleep(HOST_RESET_SETTLE_TIME); spin_lock_irqsave(host->host_lock, flags); scsi_report_bus_reset(host, scmd_channel(scmd)); spin_unlock_irqrestore(host->host_lock, flags); } return rtn; } /** * scsi_try_bus_reset - ask host to perform a bus reset * @scmd: SCSI cmd to send bus reset. */ static enum scsi_disposition scsi_try_bus_reset(struct scsi_cmnd *scmd) { unsigned long flags; enum scsi_disposition rtn; struct Scsi_Host *host = scmd->device->host; const struct scsi_host_template *hostt = host->hostt; SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, "%s: Snd Bus RST\n", __func__)); if (!hostt->eh_bus_reset_handler) return FAILED; rtn = hostt->eh_bus_reset_handler(scmd); if (rtn == SUCCESS) { if (!hostt->skip_settle_delay) ssleep(BUS_RESET_SETTLE_TIME); spin_lock_irqsave(host->host_lock, flags); scsi_report_bus_reset(host, scmd_channel(scmd)); spin_unlock_irqrestore(host->host_lock, flags); } return rtn; } static void __scsi_report_device_reset(struct scsi_device *sdev, void *data) { sdev->was_reset = 1; sdev->expecting_cc_ua = 1; } /** * scsi_try_target_reset - Ask host to perform a target reset * @scmd: SCSI cmd used to send a target reset * * Notes: * There is no timeout for this operation. if this operation is * unreliable for a given host, then the host itself needs to put a * timer on it, and set the host back to a consistent state prior to * returning. */ static enum scsi_disposition scsi_try_target_reset(struct scsi_cmnd *scmd) { unsigned long flags; enum scsi_disposition rtn; struct Scsi_Host *host = scmd->device->host; const struct scsi_host_template *hostt = host->hostt; if (!hostt->eh_target_reset_handler) return FAILED; rtn = hostt->eh_target_reset_handler(scmd); if (rtn == SUCCESS) { spin_lock_irqsave(host->host_lock, flags); __starget_for_each_device(scsi_target(scmd->device), NULL, __scsi_report_device_reset); spin_unlock_irqrestore(host->host_lock, flags); } return rtn; } /** * scsi_try_bus_device_reset - Ask host to perform a BDR on a dev * @scmd: SCSI cmd used to send BDR * * Notes: * There is no timeout for this operation. if this operation is * unreliable for a given host, then the host itself needs to put a * timer on it, and set the host back to a consistent state prior to * returning. */ static enum scsi_disposition scsi_try_bus_device_reset(struct scsi_cmnd *scmd) { enum scsi_disposition rtn; const struct scsi_host_template *hostt = scmd->device->host->hostt; if (!hostt->eh_device_reset_handler) return FAILED; rtn = hostt->eh_device_reset_handler(scmd); if (rtn == SUCCESS) __scsi_report_device_reset(scmd->device, NULL); return rtn; } /** * scsi_try_to_abort_cmd - Ask host to abort a SCSI command * @hostt: SCSI driver host template * @scmd: SCSI cmd used to send a target reset * * Return value: * SUCCESS, FAILED, or FAST_IO_FAIL * * Notes: * SUCCESS does not necessarily indicate that the command * has been aborted; it only indicates that the LLDDs * has cleared all references to that command. * LLDDs should return FAILED only if an abort was required * but could not be executed. LLDDs should return FAST_IO_FAIL * if the device is temporarily unavailable (eg due to a * link down on FibreChannel) */ static enum scsi_disposition scsi_try_to_abort_cmd(const struct scsi_host_template *hostt, struct scsi_cmnd *scmd) { if (!hostt->eh_abort_handler) return FAILED; return hostt->eh_abort_handler(scmd); } static void scsi_abort_eh_cmnd(struct scsi_cmnd *scmd) { if (scsi_try_to_abort_cmd(scmd->device->host->hostt, scmd) != SUCCESS) if (scsi_try_bus_device_reset(scmd) != SUCCESS) if (scsi_try_target_reset(scmd) != SUCCESS) if (scsi_try_bus_reset(scmd) != SUCCESS) scsi_try_host_reset(scmd); } /** * scsi_eh_prep_cmnd - Save a scsi command info as part of error recovery * @scmd: SCSI command structure to hijack * @ses: structure to save restore information * @cmnd: CDB to send. Can be NULL if no new cmnd is needed * @cmnd_size: size in bytes of @cmnd (must be <= MAX_COMMAND_SIZE) * @sense_bytes: size of sense data to copy. or 0 (if != 0 @cmnd is ignored) * * This function is used to save a scsi command information before re-execution * as part of the error recovery process. If @sense_bytes is 0 the command * sent must be one that does not transfer any data. If @sense_bytes != 0 * @cmnd is ignored and this functions sets up a REQUEST_SENSE command * and cmnd buffers to read @sense_bytes into @scmd->sense_buffer. */ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses, unsigned char *cmnd, int cmnd_size, unsigned sense_bytes) { struct scsi_device *sdev = scmd->device; /* * We need saved copies of a number of fields - this is because * error handling may need to overwrite these with different values * to run different commands, and once error handling is complete, * we will need to restore these values prior to running the actual * command. */ ses->cmd_len = scmd->cmd_len; ses->data_direction = scmd->sc_data_direction; ses->sdb = scmd->sdb; ses->result = scmd->result; ses->resid_len = scmd->resid_len; ses->underflow = scmd->underflow; ses->prot_op = scmd->prot_op; ses->eh_eflags = scmd->eh_eflags; scmd->prot_op = SCSI_PROT_NORMAL; scmd->eh_eflags = 0; memcpy(ses->cmnd, scmd->cmnd, sizeof(ses->cmnd)); memset(scmd->cmnd, 0, sizeof(scmd->cmnd)); memset(&scmd->sdb, 0, sizeof(scmd->sdb)); scmd->result = 0; scmd->resid_len = 0; if (sense_bytes) { scmd->sdb.length = min_t(unsigned, SCSI_SENSE_BUFFERSIZE, sense_bytes); sg_init_one(&ses->sense_sgl, scmd->sense_buffer, scmd->sdb.length); scmd->sdb.table.sgl = &ses->sense_sgl; scmd->sc_data_direction = DMA_FROM_DEVICE; scmd->sdb.table.nents = scmd->sdb.table.orig_nents = 1; scmd->cmnd[0] = REQUEST_SENSE; scmd->cmnd[4] = scmd->sdb.length; scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]); } else { scmd->sc_data_direction = DMA_NONE; if (cmnd) { BUG_ON(cmnd_size > sizeof(scmd->cmnd)); memcpy(scmd->cmnd, cmnd, cmnd_size); scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]); } } scmd->underflow = 0; if (sdev->scsi_level <= SCSI_2 && sdev->scsi_level != SCSI_UNKNOWN) scmd->cmnd[1] = (scmd->cmnd[1] & 0x1f) | (sdev->lun << 5 & 0xe0); /* * Zero the sense buffer. The scsi spec mandates that any * untransferred sense data should be interpreted as being zero. */ memset(scmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); } EXPORT_SYMBOL(scsi_eh_prep_cmnd); /** * scsi_eh_restore_cmnd - Restore a scsi command info as part of error recovery * @scmd: SCSI command structure to restore * @ses: saved information from a coresponding call to scsi_eh_prep_cmnd * * Undo any damage done by above scsi_eh_prep_cmnd(). */ void scsi_eh_restore_cmnd(struct scsi_cmnd* scmd, struct scsi_eh_save *ses) { /* * Restore original data */ scmd->cmd_len = ses->cmd_len; memcpy(scmd->cmnd, ses->cmnd, sizeof(ses->cmnd)); scmd->sc_data_direction = ses->data_direction; scmd->sdb = ses->sdb; scmd->result = ses->result; scmd->resid_len = ses->resid_len; scmd->underflow = ses->underflow; scmd->prot_op = ses->prot_op; scmd->eh_eflags = ses->eh_eflags; } EXPORT_SYMBOL(scsi_eh_restore_cmnd); /** * scsi_send_eh_cmnd - submit a scsi command as part of error recovery * @scmd: SCSI command structure to hijack * @cmnd: CDB to send * @cmnd_size: size in bytes of @cmnd * @timeout: timeout for this request * @sense_bytes: size of sense data to copy or 0 * * This function is used to send a scsi command down to a target device * as part of the error recovery process. See also scsi_eh_prep_cmnd() above. * * Return value: * SUCCESS or FAILED or NEEDS_RETRY */ static enum scsi_disposition scsi_send_eh_cmnd(struct scsi_cmnd *scmd, unsigned char *cmnd, int cmnd_size, int timeout, unsigned sense_bytes) { struct scsi_device *sdev = scmd->device; struct Scsi_Host *shost = sdev->host; DECLARE_COMPLETION_ONSTACK(done); unsigned long timeleft = timeout, delay; struct scsi_eh_save ses; const unsigned long stall_for = msecs_to_jiffies(100); int rtn; retry: scsi_eh_prep_cmnd(scmd, &ses, cmnd, cmnd_size, sense_bytes); shost->eh_action = &done; scsi_log_send(scmd); scmd->submitter = SUBMITTED_BY_SCSI_ERROR_HANDLER; scmd->flags |= SCMD_LAST; /* * Lock sdev->state_mutex to avoid that scsi_device_quiesce() can * change the SCSI device state after we have examined it and before * .queuecommand() is called. */ mutex_lock(&sdev->state_mutex); while (sdev->sdev_state == SDEV_BLOCK && timeleft > 0) { mutex_unlock(&sdev->state_mutex); SCSI_LOG_ERROR_RECOVERY(5, sdev_printk(KERN_DEBUG, sdev, "%s: state %d <> %d\n", __func__, sdev->sdev_state, SDEV_BLOCK)); delay = min(timeleft, stall_for); timeleft -= delay; msleep(jiffies_to_msecs(delay)); mutex_lock(&sdev->state_mutex); } if (sdev->sdev_state != SDEV_BLOCK) rtn = shost->hostt->queuecommand(shost, scmd); else rtn = FAILED; mutex_unlock(&sdev->state_mutex); if (rtn) { if (timeleft > stall_for) { scsi_eh_restore_cmnd(scmd, &ses); timeleft -= stall_for; msleep(jiffies_to_msecs(stall_for)); goto retry; } /* signal not to enter either branch of the if () below */ timeleft = 0; rtn = FAILED; } else { timeleft = wait_for_completion_timeout(&done, timeout); rtn = SUCCESS; } shost->eh_action = NULL; scsi_log_completion(scmd, rtn); SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, "%s timeleft: %ld\n", __func__, timeleft)); /* * If there is time left scsi_eh_done got called, and we will examine * the actual status codes to see whether the command actually did * complete normally, else if we have a zero return and no time left, * the command must still be pending, so abort it and return FAILED. * If we never actually managed to issue the command, because * ->queuecommand() kept returning non zero, use the rtn = FAILED * value above (so don't execute either branch of the if) */ if (timeleft) { rtn = scsi_eh_completed_normally(scmd); SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, "%s: scsi_eh_completed_normally %x\n", __func__, rtn)); switch (rtn) { case SUCCESS: case NEEDS_RETRY: case FAILED: break; case ADD_TO_MLQUEUE: rtn = NEEDS_RETRY; break; default: rtn = FAILED; break; } } else if (rtn != FAILED) { scsi_abort_eh_cmnd(scmd); rtn = FAILED; } scsi_eh_restore_cmnd(scmd, &ses); return rtn; } /** * scsi_request_sense - Request sense data from a particular target. * @scmd: SCSI cmd for request sense. * * Notes: * Some hosts automatically obtain this information, others require * that we obtain it on our own. This function will *not* return until * the command either times out, or it completes. */ static enum scsi_disposition scsi_request_sense(struct scsi_cmnd *scmd) { return scsi_send_eh_cmnd(scmd, NULL, 0, scmd->device->eh_timeout, ~0); } static enum scsi_disposition scsi_eh_action(struct scsi_cmnd *scmd, enum scsi_disposition rtn) { if (!blk_rq_is_passthrough(scsi_cmd_to_rq(scmd))) { struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd); if (sdrv->eh_action) rtn = sdrv->eh_action(scmd, rtn); } return rtn; } /** * scsi_eh_finish_cmd - Handle a cmd that eh is finished with. * @scmd: Original SCSI cmd that eh has finished. * @done_q: Queue for processed commands. * * Notes: * We don't want to use the normal command completion while we are are * still handling errors - it may cause other commands to be queued, * and that would disturb what we are doing. Thus we really want to * keep a list of pending commands for final completion, and once we * are ready to leave error handling we handle completion for real. */ void scsi_eh_finish_cmd(struct scsi_cmnd *scmd, struct list_head *done_q) { list_move_tail(&scmd->eh_entry, done_q); } EXPORT_SYMBOL(scsi_eh_finish_cmd); /** * scsi_eh_get_sense - Get device sense data. * @work_q: Queue of commands to process. * @done_q: Queue of processed commands. * * Description: * See if we need to request sense information. if so, then get it * now, so we have a better idea of what to do. * * Notes: * This has the unfortunate side effect that if a shost adapter does * not automatically request sense information, we end up shutting * it down before we request it. * * All drivers should request sense information internally these days, * so for now all I have to say is tough noogies if you end up in here. * * XXX: Long term this code should go away, but that needs an audit of * all LLDDs first. */ int scsi_eh_get_sense(struct list_head *work_q, struct list_head *done_q) { struct scsi_cmnd *scmd, *next; struct Scsi_Host *shost; enum scsi_disposition rtn; /* * If SCSI_EH_ABORT_SCHEDULED has been set, it is timeout IO, * should not get sense. */ list_for_each_entry_safe(scmd, next, work_q, eh_entry) { if ((scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) || SCSI_SENSE_VALID(scmd)) continue; shost = scmd->device->host; if (scsi_host_eh_past_deadline(shost)) { SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, "%s: skip request sense, past eh deadline\n", current->comm)); break; } if (!scsi_status_is_check_condition(scmd->result)) /* * don't request sense if there's no check condition * status because the error we're processing isn't one * that has a sense code (and some devices get * confused by sense requests out of the blue) */ continue; SCSI_LOG_ERROR_RECOVERY(2, scmd_printk(KERN_INFO, scmd, "%s: requesting sense\n", current->comm)); rtn = scsi_request_sense(scmd); if (rtn != SUCCESS) continue; SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, "sense requested, result %x\n", scmd->result)); SCSI_LOG_ERROR_RECOVERY(3, scsi_print_sense(scmd)); rtn = scsi_decide_disposition(scmd); /* * if the result was normal, then just pass it along to the * upper level. */ if (rtn == SUCCESS) /* * We don't want this command reissued, just finished * with the sense data, so set retries to the max * allowed to ensure it won't get reissued. If the user * has requested infinite retries, we also want to * finish this command, so force completion by setting * retries and allowed to the same value. */ if (scmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT) scmd->retries = scmd->allowed = 1; else scmd->retries = scmd->allowed; else if (rtn != NEEDS_RETRY) continue; scsi_eh_finish_cmd(scmd, done_q); } return list_empty(work_q); } EXPORT_SYMBOL_GPL(scsi_eh_get_sense); /** * scsi_eh_tur - Send TUR to device. * @scmd: &scsi_cmnd to send TUR * * Return value: * 0 - Device is ready. 1 - Device NOT ready. */ static int scsi_eh_tur(struct scsi_cmnd *scmd) { static unsigned char tur_command[6] = {TEST_UNIT_READY, 0, 0, 0, 0, 0}; int retry_cnt = 1; enum scsi_disposition rtn; retry_tur: rtn = scsi_send_eh_cmnd(scmd, tur_command, 6, scmd->device->eh_timeout, 0); SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, "%s return: %x\n", __func__, rtn)); switch (rtn) { case NEEDS_RETRY: if (retry_cnt--) goto retry_tur; fallthrough; case SUCCESS: return 0; default: return 1; } } /** * scsi_eh_test_devices - check if devices are responding from error recovery. * @cmd_list: scsi commands in error recovery. * @work_q: queue for commands which still need more error recovery * @done_q: queue for commands which are finished * @try_stu: boolean on if a STU command should be tried in addition to TUR. * * Decription: * Tests if devices are in a working state. Commands to devices now in * a working state are sent to the done_q while commands to devices which * are still failing to respond are returned to the work_q for more * processing. **/ static int scsi_eh_test_devices(struct list_head *cmd_list, struct list_head *work_q, struct list_head *done_q, int try_stu) { struct scsi_cmnd *scmd, *next; struct scsi_device *sdev; int finish_cmds; while (!list_empty(cmd_list)) { scmd = list_entry(cmd_list->next, struct scsi_cmnd, eh_entry); sdev = scmd->device; if (!try_stu) { if (scsi_host_eh_past_deadline(sdev->host)) { /* Push items back onto work_q */ list_splice_init(cmd_list, work_q); SCSI_LOG_ERROR_RECOVERY(3, sdev_printk(KERN_INFO, sdev, "%s: skip test device, past eh deadline", current->comm)); break; } } finish_cmds = !scsi_device_online(scmd->device) || (try_stu && !scsi_eh_try_stu(scmd) && !scsi_eh_tur(scmd)) || !scsi_eh_tur(scmd); list_for_each_entry_safe(scmd, next, cmd_list, eh_entry) if (scmd->device == sdev) { if (finish_cmds && (try_stu || scsi_eh_action(scmd, SUCCESS) == SUCCESS)) scsi_eh_finish_cmd(scmd, done_q); else list_move_tail(&scmd->eh_entry, work_q); } } return list_empty(work_q); } /** * scsi_eh_try_stu - Send START_UNIT to device. * @scmd: &scsi_cmnd to send START_UNIT * * Return value: * 0 - Device is ready. 1 - Device NOT ready. */ static int scsi_eh_try_stu(struct scsi_cmnd *scmd) { static unsigned char stu_command[6] = {START_STOP, 0, 0, 0, 1, 0}; if (scmd->device->allow_restart) { int i; enum scsi_disposition rtn = NEEDS_RETRY; for (i = 0; rtn == NEEDS_RETRY && i < 2; i++) rtn = scsi_send_eh_cmnd(scmd, stu_command, 6, scmd->device->eh_timeout, 0); if (rtn == SUCCESS) return 0; } return 1; } /** * scsi_eh_stu - send START_UNIT if needed * @shost: &scsi host being recovered. * @work_q: &list_head for pending commands. * @done_q: &list_head for processed commands. * * Notes: * If commands are failing due to not ready, initializing command required, * try revalidating the device, which will end up sending a start unit. */ static int scsi_eh_stu(struct Scsi_Host *shost, struct list_head *work_q, struct list_head *done_q) { struct scsi_cmnd *scmd, *stu_scmd, *next; struct scsi_device *sdev; shost_for_each_device(sdev, shost) { if (scsi_host_eh_past_deadline(shost)) { SCSI_LOG_ERROR_RECOVERY(3, sdev_printk(KERN_INFO, sdev, "%s: skip START_UNIT, past eh deadline\n", current->comm)); scsi_device_put(sdev); break; } stu_scmd = NULL; list_for_each_entry(scmd, work_q, eh_entry) if (scmd->device == sdev && SCSI_SENSE_VALID(scmd) && scsi_check_sense(scmd) == FAILED ) { stu_scmd = scmd; break; } if (!stu_scmd) continue; SCSI_LOG_ERROR_RECOVERY(3, sdev_printk(KERN_INFO, sdev, "%s: Sending START_UNIT\n", current->comm)); if (!scsi_eh_try_stu(stu_scmd)) { if (!scsi_device_online(sdev) || !scsi_eh_tur(stu_scmd)) { list_for_each_entry_safe(scmd, next, work_q, eh_entry) { if (scmd->device == sdev && scsi_eh_action(scmd, SUCCESS) == SUCCESS) scsi_eh_finish_cmd(scmd, done_q); } } } else { SCSI_LOG_ERROR_RECOVERY(3, sdev_printk(KERN_INFO, sdev, "%s: START_UNIT failed\n", current->comm)); } } return list_empty(work_q); } /** * scsi_eh_bus_device_reset - send bdr if needed * @shost: scsi host being recovered. * @work_q: &list_head for pending commands. * @done_q: &list_head for processed commands. * * Notes: * Try a bus device reset. Still, look to see whether we have multiple * devices that are jammed or not - if we have multiple devices, it * makes no sense to try bus_device_reset - we really would need to try * a bus_reset instead. */ static int scsi_eh_bus_device_reset(struct Scsi_Host *shost, struct list_head *work_q, struct list_head *done_q) { struct scsi_cmnd *scmd, *bdr_scmd, *next; struct scsi_device *sdev; enum scsi_disposition rtn; shost_for_each_device(sdev, shost) { if (scsi_host_eh_past_deadline(shost)) { SCSI_LOG_ERROR_RECOVERY(3, sdev_printk(KERN_INFO, sdev, "%s: skip BDR, past eh deadline\n", current->comm)); scsi_device_put(sdev); break; } bdr_scmd = NULL; list_for_each_entry(scmd, work_q, eh_entry) if (scmd->device == sdev) { bdr_scmd = scmd; break; } if (!bdr_scmd) continue; SCSI_LOG_ERROR_RECOVERY(3, sdev_printk(KERN_INFO, sdev, "%s: Sending BDR\n", current->comm)); rtn = scsi_try_bus_device_reset(bdr_scmd); if (rtn == SUCCESS || rtn == FAST_IO_FAIL) { if (!scsi_device_online(sdev) || rtn == FAST_IO_FAIL || !scsi_eh_tur(bdr_scmd)) { list_for_each_entry_safe(scmd, next, work_q, eh_entry) { if (scmd->device == sdev && scsi_eh_action(scmd, rtn) != FAILED) scsi_eh_finish_cmd(scmd, done_q); } } } else { SCSI_LOG_ERROR_RECOVERY(3, sdev_printk(KERN_INFO, sdev, "%s: BDR failed\n", current->comm)); } } return list_empty(work_q); } /** * scsi_eh_target_reset - send target reset if needed * @shost: scsi host being recovered. * @work_q: &list_head for pending commands. * @done_q: &list_head for processed commands. * * Notes: * Try a target reset. */ static int scsi_eh_target_reset(struct Scsi_Host *shost, struct list_head *work_q, struct list_head *done_q) { LIST_HEAD(tmp_list); LIST_HEAD(check_list); list_splice_init(work_q, &tmp_list); while (!list_empty(&tmp_list)) { struct scsi_cmnd *next, *scmd; enum scsi_disposition rtn; unsigned int id; if (scsi_host_eh_past_deadline(shost)) { /* push back on work queue for further processing */ list_splice_init(&check_list, work_q); list_splice_init(&tmp_list, work_q); SCSI_LOG_ERROR_RECOVERY(3, shost_printk(KERN_INFO, shost, "%s: Skip target reset, past eh deadline\n", current->comm)); return list_empty(work_q); } scmd = list_entry(tmp_list.next, struct scsi_cmnd, eh_entry); id = scmd_id(scmd); SCSI_LOG_ERROR_RECOVERY(3, shost_printk(KERN_INFO, shost, "%s: Sending target reset to target %d\n", current->comm, id)); rtn = scsi_try_target_reset(scmd); if (rtn != SUCCESS && rtn != FAST_IO_FAIL) SCSI_LOG_ERROR_RECOVERY(3, shost_printk(KERN_INFO, shost, "%s: Target reset failed" " target: %d\n", current->comm, id)); list_for_each_entry_safe(scmd, next, &tmp_list, eh_entry) { if (scmd_id(scmd) != id) continue; if (rtn == SUCCESS) list_move_tail(&scmd->eh_entry, &check_list); else if (rtn == FAST_IO_FAIL) scsi_eh_finish_cmd(scmd, done_q); else /* push back on work queue for further processing */ list_move(&scmd->eh_entry, work_q); } } return scsi_eh_test_devices(&check_list, work_q, done_q, 0); } /** * scsi_eh_bus_reset - send a bus reset * @shost: &scsi host being recovered. * @work_q: &list_head for pending commands. * @done_q: &list_head for processed commands. */ static int scsi_eh_bus_reset(struct Scsi_Host *shost, struct list_head *work_q, struct list_head *done_q) { struct scsi_cmnd *scmd, *chan_scmd, *next; LIST_HEAD(check_list); unsigned int channel; enum scsi_disposition rtn; /* * we really want to loop over the various channels, and do this on * a channel by channel basis. we should also check to see if any * of the failed commands are on soft_reset devices, and if so, skip * the reset. */ for (channel = 0; channel <= shost->max_channel; channel++) { if (scsi_host_eh_past_deadline(shost)) { list_splice_init(&check_list, work_q); SCSI_LOG_ERROR_RECOVERY(3, shost_printk(KERN_INFO, shost, "%s: skip BRST, past eh deadline\n", current->comm)); return list_empty(work_q); } chan_scmd = NULL; list_for_each_entry(scmd, work_q, eh_entry) { if (channel == scmd_channel(scmd)) { chan_scmd = scmd; break; /* * FIXME add back in some support for * soft_reset devices. */ } } if (!chan_scmd) continue; SCSI_LOG_ERROR_RECOVERY(3, shost_printk(KERN_INFO, shost, "%s: Sending BRST chan: %d\n", current->comm, channel)); rtn = scsi_try_bus_reset(chan_scmd); if (rtn == SUCCESS || rtn == FAST_IO_FAIL) { list_for_each_entry_safe(scmd, next, work_q, eh_entry) { if (channel == scmd_channel(scmd)) { if (rtn == FAST_IO_FAIL) scsi_eh_finish_cmd(scmd, done_q); else list_move_tail(&scmd->eh_entry, &check_list); } } } else { SCSI_LOG_ERROR_RECOVERY(3, shost_printk(KERN_INFO, shost, "%s: BRST failed chan: %d\n", current->comm, channel)); } } return scsi_eh_test_devices(&check_list, work_q, done_q, 0); } /** * scsi_eh_host_reset - send a host reset * @shost: host to be reset. * @work_q: &list_head for pending commands. * @done_q: &list_head for processed commands. */ static int scsi_eh_host_reset(struct Scsi_Host *shost, struct list_head *work_q, struct list_head *done_q) { struct scsi_cmnd *scmd, *next; LIST_HEAD(check_list); enum scsi_disposition rtn; if (!list_empty(work_q)) { scmd = list_entry(work_q->next, struct scsi_cmnd, eh_entry); SCSI_LOG_ERROR_RECOVERY(3, shost_printk(KERN_INFO, shost, "%s: Sending HRST\n", current->comm)); rtn = scsi_try_host_reset(scmd); if (rtn == SUCCESS) { list_splice_init(work_q, &check_list); } else if (rtn == FAST_IO_FAIL) { list_for_each_entry_safe(scmd, next, work_q, eh_entry) { scsi_eh_finish_cmd(scmd, done_q); } } else { SCSI_LOG_ERROR_RECOVERY(3, shost_printk(KERN_INFO, shost, "%s: HRST failed\n", current->comm)); } } return scsi_eh_test_devices(&check_list, work_q, done_q, 1); } /** * scsi_eh_offline_sdevs - offline scsi devices that fail to recover * @work_q: &list_head for pending commands. * @done_q: &list_head for processed commands. */ static void scsi_eh_offline_sdevs(struct list_head *work_q, struct list_head *done_q) { struct scsi_cmnd *scmd, *next; struct scsi_device *sdev; list_for_each_entry_safe(scmd, next, work_q, eh_entry) { sdev_printk(KERN_INFO, scmd->device, "Device offlined - " "not ready after error recovery\n"); sdev = scmd->device; mutex_lock(&sdev->state_mutex); scsi_device_set_state(sdev, SDEV_OFFLINE); mutex_unlock(&sdev->state_mutex); scsi_eh_finish_cmd(scmd, done_q); } return; } /** * scsi_noretry_cmd - determine if command should be failed fast * @scmd: SCSI cmd to examine. */ bool scsi_noretry_cmd(struct scsi_cmnd *scmd) { struct request *req = scsi_cmd_to_rq(scmd); switch (host_byte(scmd->result)) { case DID_OK: break; case DID_TIME_OUT: goto check_type; case DID_BUS_BUSY: return !!(req->cmd_flags & REQ_FAILFAST_TRANSPORT); case DID_PARITY: return !!(req->cmd_flags & REQ_FAILFAST_DEV); case DID_ERROR: if (get_status_byte(scmd) == SAM_STAT_RESERVATION_CONFLICT) return false; fallthrough; case DID_SOFT_ERROR: return !!(req->cmd_flags & REQ_FAILFAST_DRIVER); } /* Never retry commands aborted due to a duration limit timeout */ if (scsi_ml_byte(scmd->result) == SCSIML_STAT_DL_TIMEOUT) return true; if (!scsi_status_is_check_condition(scmd->result)) return false; check_type: /* * assume caller has checked sense and determined * the check condition was retryable. */ if (req->cmd_flags & REQ_FAILFAST_DEV || blk_rq_is_passthrough(req)) return true; return false; } /** * scsi_decide_disposition - Disposition a cmd on return from LLD. * @scmd: SCSI cmd to examine. * * Notes: * This is *only* called when we are examining the status after sending * out the actual data command. any commands that are queued for error * recovery (e.g. test_unit_ready) do *not* come through here. * * When this routine returns failed, it means the error handler thread * is woken. In cases where the error code indicates an error that * doesn't require the error handler read (i.e. we don't need to * abort/reset), this function should return SUCCESS. */ enum scsi_disposition scsi_decide_disposition(struct scsi_cmnd *scmd) { enum scsi_disposition rtn; /* * if the device is offline, then we clearly just pass the result back * up to the top level. */ if (!scsi_device_online(scmd->device)) { SCSI_LOG_ERROR_RECOVERY(5, scmd_printk(KERN_INFO, scmd, "%s: device offline - report as SUCCESS\n", __func__)); return SUCCESS; } /* * first check the host byte, to see if there is anything in there * that would indicate what we need to do. */ switch (host_byte(scmd->result)) { case DID_PASSTHROUGH: /* * no matter what, pass this through to the upper layer. * nuke this special code so that it looks like we are saying * did_ok. */ scmd->result &= 0xff00ffff; return SUCCESS; case DID_OK: /* * looks good. drop through, and check the next byte. */ break; case DID_ABORT: if (scmd->eh_eflags & SCSI_EH_ABORT_SCHEDULED) { set_host_byte(scmd, DID_TIME_OUT); return SUCCESS; } fallthrough; case DID_NO_CONNECT: case DID_BAD_TARGET: /* * note - this means that we just report the status back * to the top level driver, not that we actually think * that it indicates SUCCESS. */ return SUCCESS; case DID_SOFT_ERROR: /* * when the low level driver returns did_soft_error, * it is responsible for keeping an internal retry counter * in order to avoid endless loops (db) */ goto maybe_retry; case DID_IMM_RETRY: return NEEDS_RETRY; case DID_REQUEUE: return ADD_TO_MLQUEUE; case DID_TRANSPORT_DISRUPTED: /* * LLD/transport was disrupted during processing of the IO. * The transport class is now blocked/blocking, * and the transport will decide what to do with the IO * based on its timers and recovery capablilities if * there are enough retries. */ goto maybe_retry; case DID_TRANSPORT_FAILFAST: /* * The transport decided to failfast the IO (most likely * the fast io fail tmo fired), so send IO directly upwards. */ return SUCCESS; case DID_TRANSPORT_MARGINAL: /* * caller has decided not to do retries on * abort success, so send IO directly upwards */ return SUCCESS; case DID_ERROR: if (get_status_byte(scmd) == SAM_STAT_RESERVATION_CONFLICT) /* * execute reservation conflict processing code * lower down */ break; fallthrough; case DID_BUS_BUSY: case DID_PARITY: goto maybe_retry; case DID_TIME_OUT: /* * when we scan the bus, we get timeout messages for * these commands if there is no device available. * other hosts report did_no_connect for the same thing. */ if ((scmd->cmnd[0] == TEST_UNIT_READY || scmd->cmnd[0] == INQUIRY)) { return SUCCESS; } else { return FAILED; } case DID_RESET: return SUCCESS; default: return FAILED; } /* * check the status byte to see if this indicates anything special. */ switch (get_status_byte(scmd)) { case SAM_STAT_TASK_SET_FULL: scsi_handle_queue_full(scmd->device); /* * the case of trying to send too many commands to a * tagged queueing device. */ fallthrough; case SAM_STAT_BUSY: /* * device can't talk to us at the moment. Should only * occur (SAM-3) when the task queue is empty, so will cause * the empty queue handling to trigger a stall in the * device. */ return ADD_TO_MLQUEUE; case SAM_STAT_GOOD: if (scmd->cmnd[0] == REPORT_LUNS) scmd->device->sdev_target->expecting_lun_change = 0; scsi_handle_queue_ramp_up(scmd->device); if (scmd->sense_buffer && SCSI_SENSE_VALID(scmd)) /* * If we have sense data, call scsi_check_sense() in * order to set the correct SCSI ML byte (if any). * No point in checking the return value, since the * command has already completed successfully. */ scsi_check_sense(scmd); fallthrough; case SAM_STAT_COMMAND_TERMINATED: return SUCCESS; case SAM_STAT_TASK_ABORTED: goto maybe_retry; case SAM_STAT_CHECK_CONDITION: rtn = scsi_check_sense(scmd); if (rtn == NEEDS_RETRY) goto maybe_retry; /* if rtn == FAILED, we have no sense information; * returning FAILED will wake the error handler thread * to collect the sense and redo the decide * disposition */ return rtn; case SAM_STAT_CONDITION_MET: case SAM_STAT_INTERMEDIATE: case SAM_STAT_INTERMEDIATE_CONDITION_MET: case SAM_STAT_ACA_ACTIVE: /* * who knows? FIXME(eric) */ return SUCCESS; case SAM_STAT_RESERVATION_CONFLICT: sdev_printk(KERN_INFO, scmd->device, "reservation conflict\n"); set_scsi_ml_byte(scmd, SCSIML_STAT_RESV_CONFLICT); return SUCCESS; /* causes immediate i/o error */ } return FAILED; maybe_retry: /* we requeue for retry because the error was retryable, and * the request was not marked fast fail. Note that above, * even if the request is marked fast fail, we still requeue * for queue congestion conditions (QUEUE_FULL or BUSY) */ if (scsi_cmd_retry_allowed(scmd) && !scsi_noretry_cmd(scmd)) { return NEEDS_RETRY; } else { /* * no more retries - report this one back to upper level. */ return SUCCESS; } } static enum rq_end_io_ret eh_lock_door_done(struct request *req, blk_status_t status) { blk_mq_free_request(req); return RQ_END_IO_NONE; } /** * scsi_eh_lock_door - Prevent medium removal for the specified device * @sdev: SCSI device to prevent medium removal * * Locking: * We must be called from process context. * * Notes: * We queue up an asynchronous "ALLOW MEDIUM REMOVAL" request on the * head of the devices request queue, and continue. */ static void scsi_eh_lock_door(struct scsi_device *sdev) { struct scsi_cmnd *scmd; struct request *req; req = scsi_alloc_request(sdev->request_queue, REQ_OP_DRV_IN, 0); if (IS_ERR(req)) return; scmd = blk_mq_rq_to_pdu(req); scmd->cmnd[0] = ALLOW_MEDIUM_REMOVAL; scmd->cmnd[1] = 0; scmd->cmnd[2] = 0; scmd->cmnd[3] = 0; scmd->cmnd[4] = SCSI_REMOVAL_PREVENT; scmd->cmnd[5] = 0; scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]); scmd->allowed = 5; req->rq_flags |= RQF_QUIET; req->timeout = 10 * HZ; req->end_io = eh_lock_door_done; blk_execute_rq_nowait(req, true); } /** * scsi_restart_operations - restart io operations to the specified host. * @shost: Host we are restarting. * * Notes: * When we entered the error handler, we blocked all further i/o to * this device. we need to 'reverse' this process. */ static void scsi_restart_operations(struct Scsi_Host *shost) { struct scsi_device *sdev; unsigned long flags; /* * If the door was locked, we need to insert a door lock request * onto the head of the SCSI request queue for the device. There * is no point trying to lock the door of an off-line device. */ shost_for_each_device(sdev, shost) { if (scsi_device_online(sdev) && sdev->was_reset && sdev->locked) { scsi_eh_lock_door(sdev); sdev->was_reset = 0; } } /* * next free up anything directly waiting upon the host. this * will be requests for character device operations, and also for * ioctls to queued block devices. */ SCSI_LOG_ERROR_RECOVERY(3, shost_printk(KERN_INFO, shost, "waking up host to restart\n")); spin_lock_irqsave(shost->host_lock, flags); if (scsi_host_set_state(shost, SHOST_RUNNING)) if (scsi_host_set_state(shost, SHOST_CANCEL)) BUG_ON(scsi_host_set_state(shost, SHOST_DEL)); spin_unlock_irqrestore(shost->host_lock, flags); wake_up(&shost->host_wait); /* * finally we need to re-initiate requests that may be pending. we will * have had everything blocked while error handling is taking place, and * now that error recovery is done, we will need to ensure that these * requests are started. */ scsi_run_host_queues(shost); /* * if eh is active and host_eh_scheduled is pending we need to re-run * recovery. we do this check after scsi_run_host_queues() to allow * everything pent up since the last eh run a chance to make forward * progress before we sync again. Either we'll immediately re-run * recovery or scsi_device_unbusy() will wake us again when these * pending commands complete. */ spin_lock_irqsave(shost->host_lock, flags); if (shost->host_eh_scheduled) if (scsi_host_set_state(shost, SHOST_RECOVERY)) WARN_ON(scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY)); spin_unlock_irqrestore(shost->host_lock, flags); } /** * scsi_eh_ready_devs - check device ready state and recover if not. * @shost: host to be recovered. * @work_q: &list_head for pending commands. * @done_q: &list_head for processed commands. */ void scsi_eh_ready_devs(struct Scsi_Host *shost, struct list_head *work_q, struct list_head *done_q) { if (!scsi_eh_stu(shost, work_q, done_q)) if (!scsi_eh_bus_device_reset(shost, work_q, done_q)) if (!scsi_eh_target_reset(shost, work_q, done_q)) if (!scsi_eh_bus_reset(shost, work_q, done_q)) if (!scsi_eh_host_reset(shost, work_q, done_q)) scsi_eh_offline_sdevs(work_q, done_q); } EXPORT_SYMBOL_GPL(scsi_eh_ready_devs); /** * scsi_eh_flush_done_q - finish processed commands or retry them. * @done_q: list_head of processed commands. */ void scsi_eh_flush_done_q(struct list_head *done_q) { struct scsi_cmnd *scmd, *next; list_for_each_entry_safe(scmd, next, done_q, eh_entry) { struct scsi_device *sdev = scmd->device; list_del_init(&scmd->eh_entry); if (scsi_device_online(sdev) && !scsi_noretry_cmd(scmd) && scsi_cmd_retry_allowed(scmd) && scsi_eh_should_retry_cmd(scmd)) { SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, "%s: flush retry cmd\n", current->comm)); scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY); blk_mq_kick_requeue_list(sdev->request_queue); } else { /* * If just we got sense for the device (called * scsi_eh_get_sense), scmd->result is already * set, do not set DID_TIME_OUT. */ if (!scmd->result && !(scmd->flags & SCMD_FORCE_EH_SUCCESS)) scmd->result |= (DID_TIME_OUT << 16); SCSI_LOG_ERROR_RECOVERY(3, scmd_printk(KERN_INFO, scmd, "%s: flush finish cmd\n", current->comm)); scsi_finish_command(scmd); } } } EXPORT_SYMBOL(scsi_eh_flush_done_q); /** * scsi_unjam_host - Attempt to fix a host which has a cmd that failed. * @shost: Host to unjam. * * Notes: * When we come in here, we *know* that all commands on the bus have * either completed, failed or timed out. we also know that no further * commands are being sent to the host, so things are relatively quiet * and we have freedom to fiddle with things as we wish. * * This is only the *default* implementation. it is possible for * individual drivers to supply their own version of this function, and * if the maintainer wishes to do this, it is strongly suggested that * this function be taken as a template and modified. this function * was designed to correctly handle problems for about 95% of the * different cases out there, and it should always provide at least a * reasonable amount of error recovery. * * Any command marked 'failed' or 'timeout' must eventually have * scsi_finish_cmd() called for it. we do all of the retry stuff * here, so when we restart the host after we return it should have an * empty queue. */ static void scsi_unjam_host(struct Scsi_Host *shost) { unsigned long flags; LIST_HEAD(eh_work_q); LIST_HEAD(eh_done_q); spin_lock_irqsave(shost->host_lock, flags); list_splice_init(&shost->eh_cmd_q, &eh_work_q); spin_unlock_irqrestore(shost->host_lock, flags); SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(shost, &eh_work_q)); if (!scsi_eh_get_sense(&eh_work_q, &eh_done_q)) scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q); spin_lock_irqsave(shost->host_lock, flags); if (shost->eh_deadline != -1) shost->last_reset = 0; spin_unlock_irqrestore(shost->host_lock, flags); scsi_eh_flush_done_q(&eh_done_q); } /** * scsi_error_handler - SCSI error handler thread * @data: Host for which we are running. * * Notes: * This is the main error handling loop. This is run as a kernel thread * for every SCSI host and handles all error handling activity. */ int scsi_error_handler(void *data) { struct Scsi_Host *shost = data; /* * We use TASK_INTERRUPTIBLE so that the thread is not * counted against the load average as a running process. * We never actually get interrupted because kthread_run * disables signal delivery for the created thread. */ while (true) { /* * The sequence in kthread_stop() sets the stop flag first * then wakes the process. To avoid missed wakeups, the task * should always be in a non running state before the stop * flag is checked */ set_current_state(TASK_INTERRUPTIBLE); if (kthread_should_stop()) break; if ((shost->host_failed == 0 && shost->host_eh_scheduled == 0) || shost->host_failed != scsi_host_busy(shost)) { SCSI_LOG_ERROR_RECOVERY(1, shost_printk(KERN_INFO, shost, "scsi_eh_%d: sleeping\n", shost->host_no)); schedule(); continue; } __set_current_state(TASK_RUNNING); SCSI_LOG_ERROR_RECOVERY(1, shost_printk(KERN_INFO, shost, "scsi_eh_%d: waking up %d/%d/%d\n", shost->host_no, shost->host_eh_scheduled, shost->host_failed, scsi_host_busy(shost))); /* * We have a host that is failing for some reason. Figure out * what we need to do to get it up and online again (if we can). * If we fail, we end up taking the thing offline. */ if (!shost->eh_noresume && scsi_autopm_get_host(shost) != 0) { SCSI_LOG_ERROR_RECOVERY(1, shost_printk(KERN_ERR, shost, "scsi_eh_%d: unable to autoresume\n", shost->host_no)); continue; } if (shost->transportt->eh_strategy_handler) shost->transportt->eh_strategy_handler(shost); else scsi_unjam_host(shost); /* All scmds have been handled */ shost->host_failed = 0; /* * Note - if the above fails completely, the action is to take * individual devices offline and flush the queue of any * outstanding requests that may have been pending. When we * restart, we restart any I/O to any other devices on the bus * which are still online. */ scsi_restart_operations(shost); if (!shost->eh_noresume) scsi_autopm_put_host(shost); } __set_current_state(TASK_RUNNING); SCSI_LOG_ERROR_RECOVERY(1, shost_printk(KERN_INFO, shost, "Error handler scsi_eh_%d exiting\n", shost->host_no)); shost->ehandler = NULL; return 0; } /** * scsi_report_bus_reset() - report bus reset observed * * Utility function used by low-level drivers to report that * they have observed a bus reset on the bus being handled. * * @shost: Host in question * @channel: channel on which reset was observed. * * Returns: Nothing * * Lock status: Host lock must be held. * * Notes: This only needs to be called if the reset is one which * originates from an unknown location. Resets originated * by the mid-level itself don't need to call this, but there * should be no harm. * * The main purpose of this is to make sure that a CHECK_CONDITION * is properly treated. */ void scsi_report_bus_reset(struct Scsi_Host *shost, int channel) { struct scsi_device *sdev; __shost_for_each_device(sdev, shost) { if (channel == sdev_channel(sdev)) __scsi_report_device_reset(sdev, NULL); } } EXPORT_SYMBOL(scsi_report_bus_reset); /** * scsi_report_device_reset() - report device reset observed * * Utility function used by low-level drivers to report that * they have observed a device reset on the device being handled. * * @shost: Host in question * @channel: channel on which reset was observed * @target: target on which reset was observed * * Returns: Nothing * * Lock status: Host lock must be held * * Notes: This only needs to be called if the reset is one which * originates from an unknown location. Resets originated * by the mid-level itself don't need to call this, but there * should be no harm. * * The main purpose of this is to make sure that a CHECK_CONDITION * is properly treated. */ void scsi_report_device_reset(struct Scsi_Host *shost, int channel, int target) { struct scsi_device *sdev; __shost_for_each_device(sdev, shost) { if (channel == sdev_channel(sdev) && target == sdev_id(sdev)) __scsi_report_device_reset(sdev, NULL); } } EXPORT_SYMBOL(scsi_report_device_reset); /** * scsi_ioctl_reset: explicitly reset a host/bus/target/device * @dev: scsi_device to operate on * @arg: reset type (see sg.h) */ int scsi_ioctl_reset(struct scsi_device *dev, int __user *arg) { struct scsi_cmnd *scmd; struct Scsi_Host *shost = dev->host; struct request *rq; unsigned long flags; int error = 0, val; enum scsi_disposition rtn; if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) return -EACCES; error = get_user(val, arg); if (error) return error; if (scsi_autopm_get_host(shost) < 0) return -EIO; error = -EIO; rq = kzalloc(sizeof(struct request) + sizeof(struct scsi_cmnd) + shost->hostt->cmd_size, GFP_KERNEL); if (!rq) goto out_put_autopm_host; blk_rq_init(NULL, rq); scmd = (struct scsi_cmnd *)(rq + 1); scsi_init_command(dev, scmd); scmd->submitter = SUBMITTED_BY_SCSI_RESET_IOCTL; scmd->flags |= SCMD_LAST; memset(&scmd->sdb, 0, sizeof(scmd->sdb)); scmd->cmd_len = 0; scmd->sc_data_direction = DMA_BIDIRECTIONAL; spin_lock_irqsave(shost->host_lock, flags); shost->tmf_in_progress = 1; spin_unlock_irqrestore(shost->host_lock, flags); switch (val & ~SG_SCSI_RESET_NO_ESCALATE) { case SG_SCSI_RESET_NOTHING: rtn = SUCCESS; break; case SG_SCSI_RESET_DEVICE: rtn = scsi_try_bus_device_reset(scmd); if (rtn == SUCCESS || (val & SG_SCSI_RESET_NO_ESCALATE)) break; fallthrough; case SG_SCSI_RESET_TARGET: rtn = scsi_try_target_reset(scmd); if (rtn == SUCCESS || (val & SG_SCSI_RESET_NO_ESCALATE)) break; fallthrough; case SG_SCSI_RESET_BUS: rtn = scsi_try_bus_reset(scmd); if (rtn == SUCCESS || (val & SG_SCSI_RESET_NO_ESCALATE)) break; fallthrough; case SG_SCSI_RESET_HOST: rtn = scsi_try_host_reset(scmd); if (rtn == SUCCESS) break; fallthrough; default: rtn = FAILED; break; } error = (rtn == SUCCESS) ? 0 : -EIO; spin_lock_irqsave(shost->host_lock, flags); shost->tmf_in_progress = 0; spin_unlock_irqrestore(shost->host_lock, flags); /* * be sure to wake up anyone who was sleeping or had their queue * suspended while we performed the TMF. */ SCSI_LOG_ERROR_RECOVERY(3, shost_printk(KERN_INFO, shost, "waking up host to restart after TMF\n")); wake_up(&shost->host_wait); scsi_run_host_queues(shost); kfree(rq); out_put_autopm_host: scsi_autopm_put_host(shost); return error; } bool scsi_command_normalize_sense(const struct scsi_cmnd *cmd, struct scsi_sense_hdr *sshdr) { return scsi_normalize_sense(cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE, sshdr); } EXPORT_SYMBOL(scsi_command_normalize_sense); /** * scsi_get_sense_info_fld - get information field from sense data (either fixed or descriptor format) * @sense_buffer: byte array of sense data * @sb_len: number of valid bytes in sense_buffer * @info_out: pointer to 64 integer where 8 or 4 byte information * field will be placed if found. * * Return value: * true if information field found, false if not found. */ bool scsi_get_sense_info_fld(const u8 *sense_buffer, int sb_len, u64 *info_out) { const u8 * ucp; if (sb_len < 7) return false; switch (sense_buffer[0] & 0x7f) { case 0x70: case 0x71: if (sense_buffer[0] & 0x80) { *info_out = get_unaligned_be32(&sense_buffer[3]); return true; } return false; case 0x72: case 0x73: ucp = scsi_sense_desc_find(sense_buffer, sb_len, 0 /* info desc */); if (ucp && (0xa == ucp[1])) { *info_out = get_unaligned_be64(&ucp[4]); return true; } return false; default: return false; } } EXPORT_SYMBOL(scsi_get_sense_info_fld); |
3 328 2 328 2 45 17 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_GFP_H #define __LINUX_GFP_H #include <linux/gfp_types.h> #include <linux/mmzone.h> #include <linux/topology.h> #include <linux/alloc_tag.h> #include <linux/sched.h> struct vm_area_struct; struct mempolicy; /* Convert GFP flags to their corresponding migrate type */ #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) #define GFP_MOVABLE_SHIFT 3 static inline int gfp_migratetype(const gfp_t gfp_flags) { VM_WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE); BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE); BUILD_BUG_ON((___GFP_RECLAIMABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_RECLAIMABLE); BUILD_BUG_ON(((___GFP_MOVABLE | ___GFP_RECLAIMABLE) >> GFP_MOVABLE_SHIFT) != MIGRATE_HIGHATOMIC); if (unlikely(page_group_by_mobility_disabled)) return MIGRATE_UNMOVABLE; /* Group based on mobility */ return (__force unsigned long)(gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT; } #undef GFP_MOVABLE_MASK #undef GFP_MOVABLE_SHIFT static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) { return !!(gfp_flags & __GFP_DIRECT_RECLAIM); } static inline bool gfpflags_allow_spinning(const gfp_t gfp_flags) { /* * !__GFP_DIRECT_RECLAIM -> direct claim is not allowed. * !__GFP_KSWAPD_RECLAIM -> it's not safe to wake up kswapd. * All GFP_* flags including GFP_NOWAIT use one or both flags. * try_alloc_pages() is the only API that doesn't specify either flag. * * This is stronger than GFP_NOWAIT or GFP_ATOMIC because * those are guaranteed to never block on a sleeping lock. * Here we are enforcing that the allocation doesn't ever spin * on any locks (i.e. only trylocks). There is no high level * GFP_$FOO flag for this use in try_alloc_pages() as the * regular page allocator doesn't fully support this * allocation mode. */ return !!(gfp_flags & __GFP_RECLAIM); } #ifdef CONFIG_HIGHMEM #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM #else #define OPT_ZONE_HIGHMEM ZONE_NORMAL #endif #ifdef CONFIG_ZONE_DMA #define OPT_ZONE_DMA ZONE_DMA #else #define OPT_ZONE_DMA ZONE_NORMAL #endif #ifdef CONFIG_ZONE_DMA32 #define OPT_ZONE_DMA32 ZONE_DMA32 #else #define OPT_ZONE_DMA32 ZONE_NORMAL #endif /* * GFP_ZONE_TABLE is a word size bitstring that is used for looking up the * zone to use given the lowest 4 bits of gfp_t. Entries are GFP_ZONES_SHIFT * bits long and there are 16 of them to cover all possible combinations of * __GFP_DMA, __GFP_DMA32, __GFP_MOVABLE and __GFP_HIGHMEM. * * The zone fallback order is MOVABLE=>HIGHMEM=>NORMAL=>DMA32=>DMA. * But GFP_MOVABLE is not only a zone specifier but also an allocation * policy. Therefore __GFP_MOVABLE plus another zone selector is valid. * Only 1 bit of the lowest 3 bits (DMA,DMA32,HIGHMEM) can be set to "1". * * bit result * ================= * 0x0 => NORMAL * 0x1 => DMA or NORMAL * 0x2 => HIGHMEM or NORMAL * 0x3 => BAD (DMA+HIGHMEM) * 0x4 => DMA32 or NORMAL * 0x5 => BAD (DMA+DMA32) * 0x6 => BAD (HIGHMEM+DMA32) * 0x7 => BAD (HIGHMEM+DMA32+DMA) * 0x8 => NORMAL (MOVABLE+0) * 0x9 => DMA or NORMAL (MOVABLE+DMA) * 0xa => MOVABLE (Movable is valid only if HIGHMEM is set too) * 0xb => BAD (MOVABLE+HIGHMEM+DMA) * 0xc => DMA32 or NORMAL (MOVABLE+DMA32) * 0xd => BAD (MOVABLE+DMA32+DMA) * 0xe => BAD (MOVABLE+DMA32+HIGHMEM) * 0xf => BAD (MOVABLE+DMA32+HIGHMEM+DMA) * * GFP_ZONES_SHIFT must be <= 2 on 32 bit platforms. */ #if defined(CONFIG_ZONE_DEVICE) && (MAX_NR_ZONES-1) <= 4 /* ZONE_DEVICE is not a valid GFP zone specifier */ #define GFP_ZONES_SHIFT 2 #else #define GFP_ZONES_SHIFT ZONES_SHIFT #endif #if 16 * GFP_ZONES_SHIFT > BITS_PER_LONG #error GFP_ZONES_SHIFT too large to create GFP_ZONE_TABLE integer #endif #define GFP_ZONE_TABLE ( \ (ZONE_NORMAL << 0 * GFP_ZONES_SHIFT) \ | (OPT_ZONE_DMA << ___GFP_DMA * GFP_ZONES_SHIFT) \ | (OPT_ZONE_HIGHMEM << ___GFP_HIGHMEM * GFP_ZONES_SHIFT) \ | (OPT_ZONE_DMA32 << ___GFP_DMA32 * GFP_ZONES_SHIFT) \ | (ZONE_NORMAL << ___GFP_MOVABLE * GFP_ZONES_SHIFT) \ | (OPT_ZONE_DMA << (___GFP_MOVABLE | ___GFP_DMA) * GFP_ZONES_SHIFT) \ | (ZONE_MOVABLE << (___GFP_MOVABLE | ___GFP_HIGHMEM) * GFP_ZONES_SHIFT)\ | (OPT_ZONE_DMA32 << (___GFP_MOVABLE | ___GFP_DMA32) * GFP_ZONES_SHIFT)\ ) /* * GFP_ZONE_BAD is a bitmap for all combinations of __GFP_DMA, __GFP_DMA32 * __GFP_HIGHMEM and __GFP_MOVABLE that are not permitted. One flag per * entry starting with bit 0. Bit is set if the combination is not * allowed. */ #define GFP_ZONE_BAD ( \ 1 << (___GFP_DMA | ___GFP_HIGHMEM) \ | 1 << (___GFP_DMA | ___GFP_DMA32) \ | 1 << (___GFP_DMA32 | ___GFP_HIGHMEM) \ | 1 << (___GFP_DMA | ___GFP_DMA32 | ___GFP_HIGHMEM) \ | 1 << (___GFP_MOVABLE | ___GFP_HIGHMEM | ___GFP_DMA) \ | 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA) \ | 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_HIGHMEM) \ | 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA | ___GFP_HIGHMEM) \ ) static inline enum zone_type gfp_zone(gfp_t flags) { enum zone_type z; int bit = (__force int) (flags & GFP_ZONEMASK); z = (GFP_ZONE_TABLE >> (bit * GFP_ZONES_SHIFT)) & ((1 << GFP_ZONES_SHIFT) - 1); VM_BUG_ON((GFP_ZONE_BAD >> bit) & 1); return z; } /* * There is only one page-allocator function, and two main namespaces to * it. The alloc_page*() variants return 'struct page *' and as such * can allocate highmem pages, the *get*page*() variants return * virtual kernel addresses to the allocated page(s). */ static inline int gfp_zonelist(gfp_t flags) { #ifdef CONFIG_NUMA if (unlikely(flags & __GFP_THISNODE)) return ZONELIST_NOFALLBACK; #endif return ZONELIST_FALLBACK; } /* * gfp flag masking for nested internal allocations. * * For code that needs to do allocations inside the public allocation API (e.g. * memory allocation tracking code) the allocations need to obey the caller * allocation context constrains to prevent allocation context mismatches (e.g. * GFP_KERNEL allocations in GFP_NOFS contexts) from potential deadlock * situations. * * It is also assumed that these nested allocations are for internal kernel * object storage purposes only and are not going to be used for DMA, etc. Hence * we strip out all the zone information and leave just the context information * intact. * * Further, internal allocations must fail before the higher level allocation * can fail, so we must make them fail faster and fail silently. We also don't * want them to deplete emergency reserves. Hence nested allocations must be * prepared for these allocations to fail. */ static inline gfp_t gfp_nested_mask(gfp_t flags) { return ((flags & (GFP_KERNEL | GFP_ATOMIC | __GFP_NOLOCKDEP)) | (__GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN)); } /* * We get the zone list from the current node and the gfp_mask. * This zone list contains a maximum of MAX_NUMNODES*MAX_NR_ZONES zones. * There are two zonelists per node, one for all zones with memory and * one containing just zones from the node the zonelist belongs to. * * For the case of non-NUMA systems the NODE_DATA() gets optimized to * &contig_page_data at compile-time. */ static inline struct zonelist *node_zonelist(int nid, gfp_t flags) { return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags); } #ifndef HAVE_ARCH_FREE_PAGE static inline void arch_free_page(struct page *page, int order) { } #endif #ifndef HAVE_ARCH_ALLOC_PAGE static inline void arch_alloc_page(struct page *page, int order) { } #endif struct page *__alloc_pages_noprof(gfp_t gfp, unsigned int order, int preferred_nid, nodemask_t *nodemask); #define __alloc_pages(...) alloc_hooks(__alloc_pages_noprof(__VA_ARGS__)) struct folio *__folio_alloc_noprof(gfp_t gfp, unsigned int order, int preferred_nid, nodemask_t *nodemask); #define __folio_alloc(...) alloc_hooks(__folio_alloc_noprof(__VA_ARGS__)) unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid, nodemask_t *nodemask, int nr_pages, struct page **page_array); #define __alloc_pages_bulk(...) alloc_hooks(alloc_pages_bulk_noprof(__VA_ARGS__)) unsigned long alloc_pages_bulk_mempolicy_noprof(gfp_t gfp, unsigned long nr_pages, struct page **page_array); #define alloc_pages_bulk_mempolicy(...) \ alloc_hooks(alloc_pages_bulk_mempolicy_noprof(__VA_ARGS__)) /* Bulk allocate order-0 pages */ #define alloc_pages_bulk(_gfp, _nr_pages, _page_array) \ __alloc_pages_bulk(_gfp, numa_mem_id(), NULL, _nr_pages, _page_array) static inline unsigned long alloc_pages_bulk_node_noprof(gfp_t gfp, int nid, unsigned long nr_pages, struct page **page_array) { if (nid == NUMA_NO_NODE) nid = numa_mem_id(); return alloc_pages_bulk_noprof(gfp, nid, NULL, nr_pages, page_array); } #define alloc_pages_bulk_node(...) \ alloc_hooks(alloc_pages_bulk_node_noprof(__VA_ARGS__)) static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask) { gfp_t warn_gfp = gfp_mask & (__GFP_THISNODE|__GFP_NOWARN); if (warn_gfp != (__GFP_THISNODE|__GFP_NOWARN)) return; if (node_online(this_node)) return; pr_warn("%pGg allocation from offline node %d\n", &gfp_mask, this_node); dump_stack(); } /* * Allocate pages, preferring the node given as nid. The node must be valid and * online. For more general interface, see alloc_pages_node(). */ static inline struct page * __alloc_pages_node_noprof(int nid, gfp_t gfp_mask, unsigned int order) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); warn_if_node_offline(nid, gfp_mask); return __alloc_pages_noprof(gfp_mask, order, nid, NULL); } #define __alloc_pages_node(...) alloc_hooks(__alloc_pages_node_noprof(__VA_ARGS__)) static inline struct folio *__folio_alloc_node_noprof(gfp_t gfp, unsigned int order, int nid) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); warn_if_node_offline(nid, gfp); return __folio_alloc_noprof(gfp, order, nid, NULL); } #define __folio_alloc_node(...) alloc_hooks(__folio_alloc_node_noprof(__VA_ARGS__)) /* * Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE, * prefer the current CPU's closest node. Otherwise node must be valid and * online. */ static inline struct page *alloc_pages_node_noprof(int nid, gfp_t gfp_mask, unsigned int order) { if (nid == NUMA_NO_NODE) nid = numa_mem_id(); return __alloc_pages_node_noprof(nid, gfp_mask, order); } #define alloc_pages_node(...) alloc_hooks(alloc_pages_node_noprof(__VA_ARGS__)) #ifdef CONFIG_NUMA struct page *alloc_pages_noprof(gfp_t gfp, unsigned int order); struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order); struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int order, struct mempolicy *mpol, pgoff_t ilx, int nid); struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, struct vm_area_struct *vma, unsigned long addr); #else static inline struct page *alloc_pages_noprof(gfp_t gfp_mask, unsigned int order) { return alloc_pages_node_noprof(numa_node_id(), gfp_mask, order); } static inline struct folio *folio_alloc_noprof(gfp_t gfp, unsigned int order) { return __folio_alloc_node_noprof(gfp, order, numa_node_id()); } static inline struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int order, struct mempolicy *mpol, pgoff_t ilx, int nid) { return folio_alloc_noprof(gfp, order); } #define vma_alloc_folio_noprof(gfp, order, vma, addr) \ folio_alloc_noprof(gfp, order) #endif #define alloc_pages(...) alloc_hooks(alloc_pages_noprof(__VA_ARGS__)) #define folio_alloc(...) alloc_hooks(folio_alloc_noprof(__VA_ARGS__)) #define folio_alloc_mpol(...) alloc_hooks(folio_alloc_mpol_noprof(__VA_ARGS__)) #define vma_alloc_folio(...) alloc_hooks(vma_alloc_folio_noprof(__VA_ARGS__)) #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) static inline struct page *alloc_page_vma_noprof(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) { struct folio *folio = vma_alloc_folio_noprof(gfp, 0, vma, addr); return &folio->page; } #define alloc_page_vma(...) alloc_hooks(alloc_page_vma_noprof(__VA_ARGS__)) struct page *try_alloc_pages_noprof(int nid, unsigned int order); #define try_alloc_pages(...) alloc_hooks(try_alloc_pages_noprof(__VA_ARGS__)) extern unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order); #define __get_free_pages(...) alloc_hooks(get_free_pages_noprof(__VA_ARGS__)) extern unsigned long get_zeroed_page_noprof(gfp_t gfp_mask); #define get_zeroed_page(...) alloc_hooks(get_zeroed_page_noprof(__VA_ARGS__)) void *alloc_pages_exact_noprof(size_t size, gfp_t gfp_mask) __alloc_size(1); #define alloc_pages_exact(...) alloc_hooks(alloc_pages_exact_noprof(__VA_ARGS__)) void free_pages_exact(void *virt, size_t size); __meminit void *alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mask) __alloc_size(2); #define alloc_pages_exact_nid(...) \ alloc_hooks(alloc_pages_exact_nid_noprof(__VA_ARGS__)) #define __get_free_page(gfp_mask) \ __get_free_pages((gfp_mask), 0) #define __get_dma_pages(gfp_mask, order) \ __get_free_pages((gfp_mask) | GFP_DMA, (order)) extern void __free_pages(struct page *page, unsigned int order); extern void free_pages_nolock(struct page *page, unsigned int order); extern void free_pages(unsigned long addr, unsigned int order); #define __free_page(page) __free_pages((page), 0) #define free_page(addr) free_pages((addr), 0) void page_alloc_init_cpuhp(void); int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp); void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp); void drain_all_pages(struct zone *zone); void drain_local_pages(struct zone *zone); void page_alloc_init_late(void); void setup_pcp_cacheinfo(unsigned int cpu); /* * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what * GFP flags are used before interrupts are enabled. Once interrupts are * enabled, it is set to __GFP_BITS_MASK while the system is running. During * hibernation, it is used by PM to avoid I/O during memory allocation while * devices are suspended. */ extern gfp_t gfp_allowed_mask; /* Returns true if the gfp_mask allows use of ALLOC_NO_WATERMARK */ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask); static inline bool gfp_has_io_fs(gfp_t gfp) { return (gfp & (__GFP_IO | __GFP_FS)) == (__GFP_IO | __GFP_FS); } /* * Check if the gfp flags allow compaction - GFP_NOIO is a really * tricky context because the migration might require IO. */ static inline bool gfp_compaction_allowed(gfp_t gfp_mask) { return IS_ENABLED(CONFIG_COMPACTION) && (gfp_mask & __GFP_IO); } extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma); #ifdef CONFIG_CONTIG_ALLOC /* The below functions must be run on a range from a single zone. */ extern int alloc_contig_range_noprof(unsigned long start, unsigned long end, unsigned migratetype, gfp_t gfp_mask); #define alloc_contig_range(...) alloc_hooks(alloc_contig_range_noprof(__VA_ARGS__)) extern struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask, int nid, nodemask_t *nodemask); #define alloc_contig_pages(...) alloc_hooks(alloc_contig_pages_noprof(__VA_ARGS__)) #endif void free_contig_range(unsigned long pfn, unsigned long nr_pages); #ifdef CONFIG_CONTIG_ALLOC static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp, int nid, nodemask_t *node) { struct page *page; if (WARN_ON(!order || !(gfp & __GFP_COMP))) return NULL; page = alloc_contig_pages_noprof(1 << order, gfp, nid, node); return page ? page_folio(page) : NULL; } #else static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp, int nid, nodemask_t *node) { return NULL; } #endif /* This should be paired with folio_put() rather than free_contig_range(). */ #define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__)) #endif /* __LINUX_GFP_H */ |
1981 1980 65 13 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright 2011-2014 Autronica Fire and Security AS * * 2011-2014 Arvid Brodin, arvid.brodin@alten.se * * include file for HSR and PRP. */ #ifndef __HSR_SLAVE_H #define __HSR_SLAVE_H #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/rtnetlink.h> #include "hsr_main.h" int hsr_add_port(struct hsr_priv *hsr, struct net_device *dev, enum hsr_port_type pt, struct netlink_ext_ack *extack); void hsr_del_port(struct hsr_port *port); bool hsr_port_exists(const struct net_device *dev); static inline struct hsr_port *hsr_port_get_rtnl(const struct net_device *dev) { ASSERT_RTNL(); return hsr_port_exists(dev) ? rtnl_dereference(dev->rx_handler_data) : NULL; } static inline struct hsr_port *hsr_port_get_rcu(const struct net_device *dev) { return hsr_port_exists(dev) ? rcu_dereference(dev->rx_handler_data) : NULL; } bool hsr_invalid_dan_ingress_frame(__be16 protocol); #endif /* __HSR_SLAVE_H */ |
876 1181 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * Access to user system call parameters and results * * Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved. * * See asm-generic/syscall.h for descriptions of what we must do here. */ #ifndef _ASM_X86_SYSCALL_H #define _ASM_X86_SYSCALL_H #include <uapi/linux/audit.h> #include <linux/sched.h> #include <linux/err.h> #include <asm/thread_info.h> /* for TS_COMPAT */ #include <asm/unistd.h> /* This is used purely for kernel/trace/trace_syscalls.c */ typedef long (*sys_call_ptr_t)(const struct pt_regs *); extern const sys_call_ptr_t sys_call_table[]; /* * These may not exist, but still put the prototypes in so we * can use IS_ENABLED(). */ extern long ia32_sys_call(const struct pt_regs *, unsigned int nr); extern long x32_sys_call(const struct pt_regs *, unsigned int nr); extern long x64_sys_call(const struct pt_regs *, unsigned int nr); /* * Only the low 32 bits of orig_ax are meaningful, so we return int. * This importantly ignores the high bits on 64-bit, so comparisons * sign-extend the low 32 bits. */ static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs) { return regs->orig_ax; } static inline void syscall_rollback(struct task_struct *task, struct pt_regs *regs) { regs->ax = regs->orig_ax; } static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { unsigned long error = regs->ax; #ifdef CONFIG_IA32_EMULATION /* * TS_COMPAT is set for 32-bit syscall entries and then * remains set until we return to user mode. */ if (task->thread_info.status & (TS_COMPAT|TS_I386_REGS_POKED)) /* * Sign-extend the value so (int)-EFOO becomes (long)-EFOO * and will match correctly in comparisons. */ error = (long) (int) error; #endif return IS_ERR_VALUE(error) ? error : 0; } static inline long syscall_get_return_value(struct task_struct *task, struct pt_regs *regs) { return regs->ax; } static inline void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs, int error, long val) { regs->ax = (long) error ?: val; } #ifdef CONFIG_X86_32 static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, unsigned long *args) { args[0] = regs->bx; args[1] = regs->cx; args[2] = regs->dx; args[3] = regs->si; args[4] = regs->di; args[5] = regs->bp; } static inline int syscall_get_arch(struct task_struct *task) { return AUDIT_ARCH_I386; } #else /* CONFIG_X86_64 */ static inline void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs, unsigned long *args) { # ifdef CONFIG_IA32_EMULATION if (task->thread_info.status & TS_COMPAT) { *args++ = regs->bx; *args++ = regs->cx; *args++ = regs->dx; *args++ = regs->si; *args++ = regs->di; *args = regs->bp; } else # endif { *args++ = regs->di; *args++ = regs->si; *args++ = regs->dx; *args++ = regs->r10; *args++ = regs->r8; *args = regs->r9; } } static inline int syscall_get_arch(struct task_struct *task) { /* x32 tasks should be considered AUDIT_ARCH_X86_64. */ return (IS_ENABLED(CONFIG_IA32_EMULATION) && task->thread_info.status & TS_COMPAT) ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; } bool do_syscall_64(struct pt_regs *regs, int nr); void do_int80_emulation(struct pt_regs *regs); #endif /* CONFIG_X86_32 */ void do_int80_syscall_32(struct pt_regs *regs); bool do_fast_syscall_32(struct pt_regs *regs); bool do_SYSENTER_32(struct pt_regs *regs); #endif /* _ASM_X86_SYSCALL_H */ |
2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_UNWIND_H #define _ASM_X86_UNWIND_H #include <linux/sched.h> #include <linux/ftrace.h> #include <linux/rethook.h> #include <asm/ptrace.h> #include <asm/stacktrace.h> #define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip)) #define IRET_FRAME_SIZE (sizeof(struct pt_regs) - IRET_FRAME_OFFSET) struct unwind_state { struct stack_info stack_info; unsigned long stack_mask; struct task_struct *task; int graph_idx; #if defined(CONFIG_RETHOOK) struct llist_node *kr_cur; #endif bool error; #if defined(CONFIG_UNWINDER_ORC) bool signal, full_regs; unsigned long sp, bp, ip; struct pt_regs *regs, *prev_regs; #elif defined(CONFIG_UNWINDER_FRAME_POINTER) bool got_irq; unsigned long *bp, *orig_sp, ip; /* * If non-NULL: The current frame is incomplete and doesn't contain a * valid BP. When looking for the next frame, use this instead of the * non-existent saved BP. */ unsigned long *next_bp; struct pt_regs *regs; #else unsigned long *sp; #endif }; void __unwind_start(struct unwind_state *state, struct task_struct *task, struct pt_regs *regs, unsigned long *first_frame); bool unwind_next_frame(struct unwind_state *state); unsigned long unwind_get_return_address(struct unwind_state *state); unsigned long *unwind_get_return_address_ptr(struct unwind_state *state); static inline bool unwind_done(struct unwind_state *state) { return state->stack_info.type == STACK_TYPE_UNKNOWN; } static inline bool unwind_error(struct unwind_state *state) { return state->error; } static inline void unwind_start(struct unwind_state *state, struct task_struct *task, struct pt_regs *regs, unsigned long *first_frame) { first_frame = first_frame ? : get_stack_pointer(task, regs); __unwind_start(state, task, regs, first_frame); } #if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER) /* * If 'partial' returns true, only the iret frame registers are valid. */ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state, bool *partial) { if (unwind_done(state)) return NULL; if (partial) { #ifdef CONFIG_UNWINDER_ORC *partial = !state->full_regs; #else *partial = false; #endif } return state->regs; } #else static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state, bool *partial) { return NULL; } #endif #ifdef CONFIG_UNWINDER_ORC void unwind_init(void); void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size); #else static inline void unwind_init(void) {} static inline void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size) {} #endif static inline unsigned long unwind_recover_rethook(struct unwind_state *state, unsigned long addr, unsigned long *addr_p) { #ifdef CONFIG_RETHOOK if (is_rethook_trampoline(addr)) return rethook_find_ret_addr(state->task, (unsigned long)addr_p, &state->kr_cur); #endif return addr; } /* Recover the return address modified by rethook and ftrace_graph. */ static inline unsigned long unwind_recover_ret_addr(struct unwind_state *state, unsigned long addr, unsigned long *addr_p) { unsigned long ret; ret = ftrace_graph_ret_addr(state->task, &state->graph_idx, addr, addr_p); return unwind_recover_rethook(state, ret, addr_p); } /* * This disables KASAN checking when reading a value from another task's stack, * since the other task could be running on another CPU and could have poisoned * the stack in the meantime. */ #define READ_ONCE_TASK_STACK(task, x) \ ({ \ unsigned long val; \ if (task == current) \ val = READ_ONCE(x); \ else \ val = READ_ONCE_NOCHECK(x); \ val; \ }) static inline bool task_on_another_cpu(struct task_struct *task) { #ifdef CONFIG_SMP return task != current && task->on_cpu; #else return false; #endif } #endif /* _ASM_X86_UNWIND_H */ |
14 10 4 8 2 4 2 3 1 2 1 1 14 14 14 14 14 5 5 5 1249 1254 1134 125 8 8 1 1 1 3 3 1 1177 45 1134 1131 1170 1283 27 86 5 775 6 412 909 260 1178 125 6 39 83 125 125 124 181 11 47 125 16035 184 183 156 13 110 941 940 647 115 464 853 859 372 47 299 14494 14477 14053 19 467 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 | // SPDX-License-Identifier: GPL-2.0 /* * security/tomoyo/network.c * * Copyright (C) 2005-2011 NTT DATA CORPORATION */ #include "common.h" #include <linux/slab.h> /* Structure for holding inet domain socket's address. */ struct tomoyo_inet_addr_info { __be16 port; /* In network byte order. */ const __be32 *address; /* In network byte order. */ bool is_ipv6; }; /* Structure for holding unix domain socket's address. */ struct tomoyo_unix_addr_info { u8 *addr; /* This may not be '\0' terminated string. */ unsigned int addr_len; }; /* Structure for holding socket address. */ struct tomoyo_addr_info { u8 protocol; u8 operation; struct tomoyo_inet_addr_info inet; struct tomoyo_unix_addr_info unix0; }; /* String table for socket's protocols. */ const char * const tomoyo_proto_keyword[TOMOYO_SOCK_MAX] = { [SOCK_STREAM] = "stream", [SOCK_DGRAM] = "dgram", [SOCK_RAW] = "raw", [SOCK_SEQPACKET] = "seqpacket", [0] = " ", /* Dummy for avoiding NULL pointer dereference. */ [4] = " ", /* Dummy for avoiding NULL pointer dereference. */ }; /** * tomoyo_parse_ipaddr_union - Parse an IP address. * * @param: Pointer to "struct tomoyo_acl_param". * @ptr: Pointer to "struct tomoyo_ipaddr_union". * * Returns true on success, false otherwise. */ bool tomoyo_parse_ipaddr_union(struct tomoyo_acl_param *param, struct tomoyo_ipaddr_union *ptr) { u8 * const min = ptr->ip[0].in6_u.u6_addr8; u8 * const max = ptr->ip[1].in6_u.u6_addr8; char *address = tomoyo_read_token(param); const char *end; if (!strchr(address, ':') && in4_pton(address, -1, min, '-', &end) > 0) { ptr->is_ipv6 = false; if (!*end) ptr->ip[1].s6_addr32[0] = ptr->ip[0].s6_addr32[0]; else if (*end++ != '-' || in4_pton(end, -1, max, '\0', &end) <= 0 || *end) return false; return true; } if (in6_pton(address, -1, min, '-', &end) > 0) { ptr->is_ipv6 = true; if (!*end) memmove(max, min, sizeof(u16) * 8); else if (*end++ != '-' || in6_pton(end, -1, max, '\0', &end) <= 0 || *end) return false; return true; } return false; } /** * tomoyo_print_ipv4 - Print an IPv4 address. * * @buffer: Buffer to write to. * @buffer_len: Size of @buffer. * @min_ip: Pointer to __be32. * @max_ip: Pointer to __be32. * * Returns nothing. */ static void tomoyo_print_ipv4(char *buffer, const unsigned int buffer_len, const __be32 *min_ip, const __be32 *max_ip) { snprintf(buffer, buffer_len, "%pI4%c%pI4", min_ip, *min_ip == *max_ip ? '\0' : '-', max_ip); } /** * tomoyo_print_ipv6 - Print an IPv6 address. * * @buffer: Buffer to write to. * @buffer_len: Size of @buffer. * @min_ip: Pointer to "struct in6_addr". * @max_ip: Pointer to "struct in6_addr". * * Returns nothing. */ static void tomoyo_print_ipv6(char *buffer, const unsigned int buffer_len, const struct in6_addr *min_ip, const struct in6_addr *max_ip) { snprintf(buffer, buffer_len, "%pI6c%c%pI6c", min_ip, !memcmp(min_ip, max_ip, 16) ? '\0' : '-', max_ip); } /** * tomoyo_print_ip - Print an IP address. * * @buf: Buffer to write to. * @size: Size of @buf. * @ptr: Pointer to "struct ipaddr_union". * * Returns nothing. */ void tomoyo_print_ip(char *buf, const unsigned int size, const struct tomoyo_ipaddr_union *ptr) { if (ptr->is_ipv6) tomoyo_print_ipv6(buf, size, &ptr->ip[0], &ptr->ip[1]); else tomoyo_print_ipv4(buf, size, &ptr->ip[0].s6_addr32[0], &ptr->ip[1].s6_addr32[0]); } /* * Mapping table from "enum tomoyo_network_acl_index" to * "enum tomoyo_mac_index" for inet domain socket. */ static const u8 tomoyo_inet2mac [TOMOYO_SOCK_MAX][TOMOYO_MAX_NETWORK_OPERATION] = { [SOCK_STREAM] = { [TOMOYO_NETWORK_BIND] = TOMOYO_MAC_NETWORK_INET_STREAM_BIND, [TOMOYO_NETWORK_LISTEN] = TOMOYO_MAC_NETWORK_INET_STREAM_LISTEN, [TOMOYO_NETWORK_CONNECT] = TOMOYO_MAC_NETWORK_INET_STREAM_CONNECT, }, [SOCK_DGRAM] = { [TOMOYO_NETWORK_BIND] = TOMOYO_MAC_NETWORK_INET_DGRAM_BIND, [TOMOYO_NETWORK_SEND] = TOMOYO_MAC_NETWORK_INET_DGRAM_SEND, }, [SOCK_RAW] = { [TOMOYO_NETWORK_BIND] = TOMOYO_MAC_NETWORK_INET_RAW_BIND, [TOMOYO_NETWORK_SEND] = TOMOYO_MAC_NETWORK_INET_RAW_SEND, }, }; /* * Mapping table from "enum tomoyo_network_acl_index" to * "enum tomoyo_mac_index" for unix domain socket. */ static const u8 tomoyo_unix2mac [TOMOYO_SOCK_MAX][TOMOYO_MAX_NETWORK_OPERATION] = { [SOCK_STREAM] = { [TOMOYO_NETWORK_BIND] = TOMOYO_MAC_NETWORK_UNIX_STREAM_BIND, [TOMOYO_NETWORK_LISTEN] = TOMOYO_MAC_NETWORK_UNIX_STREAM_LISTEN, [TOMOYO_NETWORK_CONNECT] = TOMOYO_MAC_NETWORK_UNIX_STREAM_CONNECT, }, [SOCK_DGRAM] = { [TOMOYO_NETWORK_BIND] = TOMOYO_MAC_NETWORK_UNIX_DGRAM_BIND, [TOMOYO_NETWORK_SEND] = TOMOYO_MAC_NETWORK_UNIX_DGRAM_SEND, }, [SOCK_SEQPACKET] = { [TOMOYO_NETWORK_BIND] = TOMOYO_MAC_NETWORK_UNIX_SEQPACKET_BIND, [TOMOYO_NETWORK_LISTEN] = TOMOYO_MAC_NETWORK_UNIX_SEQPACKET_LISTEN, [TOMOYO_NETWORK_CONNECT] = TOMOYO_MAC_NETWORK_UNIX_SEQPACKET_CONNECT, }, }; /** * tomoyo_same_inet_acl - Check for duplicated "struct tomoyo_inet_acl" entry. * * @a: Pointer to "struct tomoyo_acl_info". * @b: Pointer to "struct tomoyo_acl_info". * * Returns true if @a == @b except permission bits, false otherwise. */ static bool tomoyo_same_inet_acl(const struct tomoyo_acl_info *a, const struct tomoyo_acl_info *b) { const struct tomoyo_inet_acl *p1 = container_of(a, typeof(*p1), head); const struct tomoyo_inet_acl *p2 = container_of(b, typeof(*p2), head); return p1->protocol == p2->protocol && tomoyo_same_ipaddr_union(&p1->address, &p2->address) && tomoyo_same_number_union(&p1->port, &p2->port); } /** * tomoyo_same_unix_acl - Check for duplicated "struct tomoyo_unix_acl" entry. * * @a: Pointer to "struct tomoyo_acl_info". * @b: Pointer to "struct tomoyo_acl_info". * * Returns true if @a == @b except permission bits, false otherwise. */ static bool tomoyo_same_unix_acl(const struct tomoyo_acl_info *a, const struct tomoyo_acl_info *b) { const struct tomoyo_unix_acl *p1 = container_of(a, typeof(*p1), head); const struct tomoyo_unix_acl *p2 = container_of(b, typeof(*p2), head); return p1->protocol == p2->protocol && tomoyo_same_name_union(&p1->name, &p2->name); } /** * tomoyo_merge_inet_acl - Merge duplicated "struct tomoyo_inet_acl" entry. * * @a: Pointer to "struct tomoyo_acl_info". * @b: Pointer to "struct tomoyo_acl_info". * @is_delete: True for @a &= ~@b, false for @a |= @b. * * Returns true if @a is empty, false otherwise. */ static bool tomoyo_merge_inet_acl(struct tomoyo_acl_info *a, struct tomoyo_acl_info *b, const bool is_delete) { u8 * const a_perm = &container_of(a, struct tomoyo_inet_acl, head)->perm; u8 perm = READ_ONCE(*a_perm); const u8 b_perm = container_of(b, struct tomoyo_inet_acl, head)->perm; if (is_delete) perm &= ~b_perm; else perm |= b_perm; WRITE_ONCE(*a_perm, perm); return !perm; } /** * tomoyo_merge_unix_acl - Merge duplicated "struct tomoyo_unix_acl" entry. * * @a: Pointer to "struct tomoyo_acl_info". * @b: Pointer to "struct tomoyo_acl_info". * @is_delete: True for @a &= ~@b, false for @a |= @b. * * Returns true if @a is empty, false otherwise. */ static bool tomoyo_merge_unix_acl(struct tomoyo_acl_info *a, struct tomoyo_acl_info *b, const bool is_delete) { u8 * const a_perm = &container_of(a, struct tomoyo_unix_acl, head)->perm; u8 perm = READ_ONCE(*a_perm); const u8 b_perm = container_of(b, struct tomoyo_unix_acl, head)->perm; if (is_delete) perm &= ~b_perm; else perm |= b_perm; WRITE_ONCE(*a_perm, perm); return !perm; } /** * tomoyo_write_inet_network - Write "struct tomoyo_inet_acl" list. * * @param: Pointer to "struct tomoyo_acl_param". * * Returns 0 on success, negative value otherwise. * * Caller holds tomoyo_read_lock(). */ int tomoyo_write_inet_network(struct tomoyo_acl_param *param) { struct tomoyo_inet_acl e = { .head.type = TOMOYO_TYPE_INET_ACL }; int error = -EINVAL; u8 type; const char *protocol = tomoyo_read_token(param); const char *operation = tomoyo_read_token(param); for (e.protocol = 0; e.protocol < TOMOYO_SOCK_MAX; e.protocol++) if (!strcmp(protocol, tomoyo_proto_keyword[e.protocol])) break; for (type = 0; type < TOMOYO_MAX_NETWORK_OPERATION; type++) if (tomoyo_permstr(operation, tomoyo_socket_keyword[type])) e.perm |= 1 << type; if (e.protocol == TOMOYO_SOCK_MAX || !e.perm) return -EINVAL; if (param->data[0] == '@') { param->data++; e.address.group = tomoyo_get_group(param, TOMOYO_ADDRESS_GROUP); if (!e.address.group) return -ENOMEM; } else { if (!tomoyo_parse_ipaddr_union(param, &e.address)) goto out; } if (!tomoyo_parse_number_union(param, &e.port) || e.port.values[1] > 65535) goto out; error = tomoyo_update_domain(&e.head, sizeof(e), param, tomoyo_same_inet_acl, tomoyo_merge_inet_acl); out: tomoyo_put_group(e.address.group); tomoyo_put_number_union(&e.port); return error; } /** * tomoyo_write_unix_network - Write "struct tomoyo_unix_acl" list. * * @param: Pointer to "struct tomoyo_acl_param". * * Returns 0 on success, negative value otherwise. */ int tomoyo_write_unix_network(struct tomoyo_acl_param *param) { struct tomoyo_unix_acl e = { .head.type = TOMOYO_TYPE_UNIX_ACL }; int error; u8 type; const char *protocol = tomoyo_read_token(param); const char *operation = tomoyo_read_token(param); for (e.protocol = 0; e.protocol < TOMOYO_SOCK_MAX; e.protocol++) if (!strcmp(protocol, tomoyo_proto_keyword[e.protocol])) break; for (type = 0; type < TOMOYO_MAX_NETWORK_OPERATION; type++) if (tomoyo_permstr(operation, tomoyo_socket_keyword[type])) e.perm |= 1 << type; if (e.protocol == TOMOYO_SOCK_MAX || !e.perm) return -EINVAL; if (!tomoyo_parse_name_union(param, &e.name)) return -EINVAL; error = tomoyo_update_domain(&e.head, sizeof(e), param, tomoyo_same_unix_acl, tomoyo_merge_unix_acl); tomoyo_put_name_union(&e.name); return error; } /** * tomoyo_audit_net_log - Audit network log. * * @r: Pointer to "struct tomoyo_request_info". * @family: Name of socket family ("inet" or "unix"). * @protocol: Name of protocol in @family. * @operation: Name of socket operation. * @address: Name of address. * * Returns 0 on success, negative value otherwise. */ static int tomoyo_audit_net_log(struct tomoyo_request_info *r, const char *family, const u8 protocol, const u8 operation, const char *address) { return tomoyo_supervisor(r, "network %s %s %s %s\n", family, tomoyo_proto_keyword[protocol], tomoyo_socket_keyword[operation], address); } /** * tomoyo_audit_inet_log - Audit INET network log. * * @r: Pointer to "struct tomoyo_request_info". * * Returns 0 on success, negative value otherwise. */ static int tomoyo_audit_inet_log(struct tomoyo_request_info *r) { char buf[128]; int len; const __be32 *address = r->param.inet_network.address; if (r->param.inet_network.is_ipv6) tomoyo_print_ipv6(buf, sizeof(buf), (const struct in6_addr *) address, (const struct in6_addr *) address); else tomoyo_print_ipv4(buf, sizeof(buf), address, address); len = strlen(buf); snprintf(buf + len, sizeof(buf) - len, " %u", r->param.inet_network.port); return tomoyo_audit_net_log(r, "inet", r->param.inet_network.protocol, r->param.inet_network.operation, buf); } /** * tomoyo_audit_unix_log - Audit UNIX network log. * * @r: Pointer to "struct tomoyo_request_info". * * Returns 0 on success, negative value otherwise. */ static int tomoyo_audit_unix_log(struct tomoyo_request_info *r) { return tomoyo_audit_net_log(r, "unix", r->param.unix_network.protocol, r->param.unix_network.operation, r->param.unix_network.address->name); } /** * tomoyo_check_inet_acl - Check permission for inet domain socket operation. * * @r: Pointer to "struct tomoyo_request_info". * @ptr: Pointer to "struct tomoyo_acl_info". * * Returns true if granted, false otherwise. */ static bool tomoyo_check_inet_acl(struct tomoyo_request_info *r, const struct tomoyo_acl_info *ptr) { const struct tomoyo_inet_acl *acl = container_of(ptr, typeof(*acl), head); const u8 size = r->param.inet_network.is_ipv6 ? 16 : 4; if (!(acl->perm & (1 << r->param.inet_network.operation)) || !tomoyo_compare_number_union(r->param.inet_network.port, &acl->port)) return false; if (acl->address.group) return tomoyo_address_matches_group (r->param.inet_network.is_ipv6, r->param.inet_network.address, acl->address.group); return acl->address.is_ipv6 == r->param.inet_network.is_ipv6 && memcmp(&acl->address.ip[0], r->param.inet_network.address, size) <= 0 && memcmp(r->param.inet_network.address, &acl->address.ip[1], size) <= 0; } /** * tomoyo_check_unix_acl - Check permission for unix domain socket operation. * * @r: Pointer to "struct tomoyo_request_info". * @ptr: Pointer to "struct tomoyo_acl_info". * * Returns true if granted, false otherwise. */ static bool tomoyo_check_unix_acl(struct tomoyo_request_info *r, const struct tomoyo_acl_info *ptr) { const struct tomoyo_unix_acl *acl = container_of(ptr, typeof(*acl), head); return (acl->perm & (1 << r->param.unix_network.operation)) && tomoyo_compare_name_union(r->param.unix_network.address, &acl->name); } /** * tomoyo_inet_entry - Check permission for INET network operation. * * @address: Pointer to "struct tomoyo_addr_info". * * Returns 0 on success, negative value otherwise. */ static int tomoyo_inet_entry(const struct tomoyo_addr_info *address) { const int idx = tomoyo_read_lock(); struct tomoyo_request_info r; int error = 0; const u8 type = tomoyo_inet2mac[address->protocol][address->operation]; if (type && tomoyo_init_request_info(&r, NULL, type) != TOMOYO_CONFIG_DISABLED) { r.param_type = TOMOYO_TYPE_INET_ACL; r.param.inet_network.protocol = address->protocol; r.param.inet_network.operation = address->operation; r.param.inet_network.is_ipv6 = address->inet.is_ipv6; r.param.inet_network.address = address->inet.address; r.param.inet_network.port = ntohs(address->inet.port); do { tomoyo_check_acl(&r, tomoyo_check_inet_acl); error = tomoyo_audit_inet_log(&r); } while (error == TOMOYO_RETRY_REQUEST); } tomoyo_read_unlock(idx); return error; } /** * tomoyo_check_inet_address - Check permission for inet domain socket's operation. * * @addr: Pointer to "struct sockaddr". * @addr_len: Size of @addr. * @port: Port number. * @address: Pointer to "struct tomoyo_addr_info". * * Returns 0 on success, negative value otherwise. */ static int tomoyo_check_inet_address(const struct sockaddr *addr, const unsigned int addr_len, const u16 port, struct tomoyo_addr_info *address) { struct tomoyo_inet_addr_info *i = &address->inet; if (addr_len < offsetofend(struct sockaddr, sa_family)) return 0; switch (addr->sa_family) { case AF_INET6: if (addr_len < SIN6_LEN_RFC2133) goto skip; i->is_ipv6 = true; i->address = (__be32 *) ((struct sockaddr_in6 *) addr)->sin6_addr.s6_addr; i->port = ((struct sockaddr_in6 *) addr)->sin6_port; break; case AF_INET: if (addr_len < sizeof(struct sockaddr_in)) goto skip; i->is_ipv6 = false; i->address = (__be32 *) &((struct sockaddr_in *) addr)->sin_addr; i->port = ((struct sockaddr_in *) addr)->sin_port; break; default: goto skip; } if (address->protocol == SOCK_RAW) i->port = htons(port); return tomoyo_inet_entry(address); skip: return 0; } /** * tomoyo_unix_entry - Check permission for UNIX network operation. * * @address: Pointer to "struct tomoyo_addr_info". * * Returns 0 on success, negative value otherwise. */ static int tomoyo_unix_entry(const struct tomoyo_addr_info *address) { const int idx = tomoyo_read_lock(); struct tomoyo_request_info r; int error = 0; const u8 type = tomoyo_unix2mac[address->protocol][address->operation]; if (type && tomoyo_init_request_info(&r, NULL, type) != TOMOYO_CONFIG_DISABLED) { char *buf = address->unix0.addr; int len = address->unix0.addr_len - sizeof(sa_family_t); if (len <= 0) { buf = "anonymous"; len = 9; } else if (buf[0]) { len = strnlen(buf, len); } buf = tomoyo_encode2(buf, len); if (buf) { struct tomoyo_path_info addr; addr.name = buf; tomoyo_fill_path_info(&addr); r.param_type = TOMOYO_TYPE_UNIX_ACL; r.param.unix_network.protocol = address->protocol; r.param.unix_network.operation = address->operation; r.param.unix_network.address = &addr; do { tomoyo_check_acl(&r, tomoyo_check_unix_acl); error = tomoyo_audit_unix_log(&r); } while (error == TOMOYO_RETRY_REQUEST); kfree(buf); } else error = -ENOMEM; } tomoyo_read_unlock(idx); return error; } /** * tomoyo_check_unix_address - Check permission for unix domain socket's operation. * * @addr: Pointer to "struct sockaddr". * @addr_len: Size of @addr. * @address: Pointer to "struct tomoyo_addr_info". * * Returns 0 on success, negative value otherwise. */ static int tomoyo_check_unix_address(struct sockaddr *addr, const unsigned int addr_len, struct tomoyo_addr_info *address) { struct tomoyo_unix_addr_info *u = &address->unix0; if (addr_len < offsetofend(struct sockaddr, sa_family)) return 0; if (addr->sa_family != AF_UNIX) return 0; u->addr = ((struct sockaddr_un *) addr)->sun_path; u->addr_len = addr_len; return tomoyo_unix_entry(address); } /** * tomoyo_kernel_service - Check whether I'm kernel service or not. * * Returns true if I'm kernel service, false otherwise. */ static bool tomoyo_kernel_service(void) { /* Nothing to do if I am a kernel service. */ return current->flags & PF_KTHREAD; } /** * tomoyo_sock_family - Get socket's family. * * @sk: Pointer to "struct sock". * * Returns one of PF_INET, PF_INET6, PF_UNIX or 0. */ static u8 tomoyo_sock_family(struct sock *sk) { u8 family; if (tomoyo_kernel_service()) return 0; family = sk->sk_family; switch (family) { case PF_INET: case PF_INET6: case PF_UNIX: return family; default: return 0; } } /** * tomoyo_socket_listen_permission - Check permission for listening a socket. * * @sock: Pointer to "struct socket". * * Returns 0 on success, negative value otherwise. */ int tomoyo_socket_listen_permission(struct socket *sock) { struct tomoyo_addr_info address; const u8 family = tomoyo_sock_family(sock->sk); const unsigned int type = sock->type; struct sockaddr_storage addr; int addr_len; if (!family || (type != SOCK_STREAM && type != SOCK_SEQPACKET)) return 0; { const int error = sock->ops->getname(sock, (struct sockaddr *) &addr, 0); if (error < 0) return error; addr_len = error; } address.protocol = type; address.operation = TOMOYO_NETWORK_LISTEN; if (family == PF_UNIX) return tomoyo_check_unix_address((struct sockaddr *) &addr, addr_len, &address); return tomoyo_check_inet_address((struct sockaddr *) &addr, addr_len, 0, &address); } /** * tomoyo_socket_connect_permission - Check permission for setting the remote address of a socket. * * @sock: Pointer to "struct socket". * @addr: Pointer to "struct sockaddr". * @addr_len: Size of @addr. * * Returns 0 on success, negative value otherwise. */ int tomoyo_socket_connect_permission(struct socket *sock, struct sockaddr *addr, int addr_len) { struct tomoyo_addr_info address; const u8 family = tomoyo_sock_family(sock->sk); const unsigned int type = sock->type; if (!family) return 0; address.protocol = type; switch (type) { case SOCK_DGRAM: case SOCK_RAW: address.operation = TOMOYO_NETWORK_SEND; break; case SOCK_STREAM: case SOCK_SEQPACKET: address.operation = TOMOYO_NETWORK_CONNECT; break; default: return 0; } if (family == PF_UNIX) return tomoyo_check_unix_address(addr, addr_len, &address); return tomoyo_check_inet_address(addr, addr_len, sock->sk->sk_protocol, &address); } /** * tomoyo_socket_bind_permission - Check permission for setting the local address of a socket. * * @sock: Pointer to "struct socket". * @addr: Pointer to "struct sockaddr". * @addr_len: Size of @addr. * * Returns 0 on success, negative value otherwise. */ int tomoyo_socket_bind_permission(struct socket *sock, struct sockaddr *addr, int addr_len) { struct tomoyo_addr_info address; const u8 family = tomoyo_sock_family(sock->sk); const unsigned int type = sock->type; if (!family) return 0; switch (type) { case SOCK_STREAM: case SOCK_DGRAM: case SOCK_RAW: case SOCK_SEQPACKET: address.protocol = type; address.operation = TOMOYO_NETWORK_BIND; break; default: return 0; } if (family == PF_UNIX) return tomoyo_check_unix_address(addr, addr_len, &address); return tomoyo_check_inet_address(addr, addr_len, sock->sk->sk_protocol, &address); } /** * tomoyo_socket_sendmsg_permission - Check permission for sending a datagram. * * @sock: Pointer to "struct socket". * @msg: Pointer to "struct msghdr". * @size: Unused. * * Returns 0 on success, negative value otherwise. */ int tomoyo_socket_sendmsg_permission(struct socket *sock, struct msghdr *msg, int size) { struct tomoyo_addr_info address; const u8 family = tomoyo_sock_family(sock->sk); const unsigned int type = sock->type; if (!msg->msg_name || !family || (type != SOCK_DGRAM && type != SOCK_RAW)) return 0; address.protocol = type; address.operation = TOMOYO_NETWORK_SEND; if (family == PF_UNIX) return tomoyo_check_unix_address((struct sockaddr *) msg->msg_name, msg->msg_namelen, &address); return tomoyo_check_inet_address((struct sockaddr *) msg->msg_name, msg->msg_namelen, sock->sk->sk_protocol, &address); } |
123 88 2 124 2 124 115 48 91 92 92 11 56 92 36 36 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 | /* mpicoder.c - Coder for the external representation of MPIs * Copyright (C) 1998, 1999 Free Software Foundation, Inc. * * This file is part of GnuPG. * * GnuPG is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * GnuPG is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ #include <linux/bitops.h> #include <linux/count_zeros.h> #include <linux/byteorder/generic.h> #include <linux/scatterlist.h> #include <linux/string.h> #include "mpi-internal.h" #define MAX_EXTERN_MPI_BITS 16384 /** * mpi_read_raw_data - Read a raw byte stream as a positive integer * @xbuffer: The data to read * @nbytes: The amount of data to read */ MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes) { const uint8_t *buffer = xbuffer; int i, j; unsigned nbits, nlimbs; mpi_limb_t a; MPI val = NULL; while (nbytes > 0 && buffer[0] == 0) { buffer++; nbytes--; } nbits = nbytes * 8; if (nbits > MAX_EXTERN_MPI_BITS) { pr_info("MPI: mpi too large (%u bits)\n", nbits); return NULL; } if (nbytes > 0) nbits -= count_leading_zeros(buffer[0]) - (BITS_PER_LONG - 8); nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); val = mpi_alloc(nlimbs); if (!val) return NULL; val->nbits = nbits; val->sign = 0; val->nlimbs = nlimbs; if (nbytes > 0) { i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; i %= BYTES_PER_MPI_LIMB; for (j = nlimbs; j > 0; j--) { a = 0; for (; i < BYTES_PER_MPI_LIMB; i++) { a <<= 8; a |= *buffer++; } i = 0; val->d[j - 1] = a; } } return val; } EXPORT_SYMBOL_GPL(mpi_read_raw_data); MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread) { const uint8_t *buffer = xbuffer; unsigned int nbits, nbytes; MPI val; if (*ret_nread < 2) return ERR_PTR(-EINVAL); nbits = buffer[0] << 8 | buffer[1]; if (nbits > MAX_EXTERN_MPI_BITS) { pr_info("MPI: mpi too large (%u bits)\n", nbits); return ERR_PTR(-EINVAL); } nbytes = DIV_ROUND_UP(nbits, 8); if (nbytes + 2 > *ret_nread) { pr_info("MPI: mpi larger than buffer nbytes=%u ret_nread=%u\n", nbytes, *ret_nread); return ERR_PTR(-EINVAL); } val = mpi_read_raw_data(buffer + 2, nbytes); if (!val) return ERR_PTR(-ENOMEM); *ret_nread = nbytes + 2; return val; } EXPORT_SYMBOL_GPL(mpi_read_from_buffer); static int count_lzeros(MPI a) { mpi_limb_t alimb; int i, lzeros = 0; for (i = a->nlimbs - 1; i >= 0; i--) { alimb = a->d[i]; if (alimb == 0) { lzeros += sizeof(mpi_limb_t); } else { lzeros += count_leading_zeros(alimb) / 8; break; } } return lzeros; } /** * mpi_read_buffer() - read MPI to a buffer provided by user (msb first) * * @a: a multi precision integer * @buf: buffer to which the output will be written to. Needs to be at * least mpi_get_size(a) long. * @buf_len: size of the buf. * @nbytes: receives the actual length of the data written on success and * the data to-be-written on -EOVERFLOW in case buf_len was too * small. * @sign: if not NULL, it will be set to the sign of a. * * Return: 0 on success or error code in case of error */ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes, int *sign) { uint8_t *p; #if BYTES_PER_MPI_LIMB == 4 __be32 alimb; #elif BYTES_PER_MPI_LIMB == 8 __be64 alimb; #else #error please implement for this limb size. #endif unsigned int n = mpi_get_size(a); int i, lzeros; if (!buf || !nbytes) return -EINVAL; if (sign) *sign = a->sign; lzeros = count_lzeros(a); if (buf_len < n - lzeros) { *nbytes = n - lzeros; return -EOVERFLOW; } p = buf; *nbytes = n - lzeros; for (i = a->nlimbs - 1 - lzeros / BYTES_PER_MPI_LIMB, lzeros %= BYTES_PER_MPI_LIMB; i >= 0; i--) { #if BYTES_PER_MPI_LIMB == 4 alimb = cpu_to_be32(a->d[i]); #elif BYTES_PER_MPI_LIMB == 8 alimb = cpu_to_be64(a->d[i]); #else #error please implement for this limb size. #endif memcpy(p, (u8 *)&alimb + lzeros, BYTES_PER_MPI_LIMB - lzeros); p += BYTES_PER_MPI_LIMB - lzeros; lzeros = 0; } return 0; } EXPORT_SYMBOL_GPL(mpi_read_buffer); /* * mpi_get_buffer() - Returns an allocated buffer with the MPI (msb first). * Caller must free the return string. * This function does return a 0 byte buffer with nbytes set to zero if the * value of A is zero. * * @a: a multi precision integer. * @nbytes: receives the length of this buffer. * @sign: if not NULL, it will be set to the sign of the a. * * Return: Pointer to MPI buffer or NULL on error */ void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign) { uint8_t *buf; unsigned int n; int ret; if (!nbytes) return NULL; n = mpi_get_size(a); if (!n) n++; buf = kmalloc(n, GFP_KERNEL); if (!buf) return NULL; ret = mpi_read_buffer(a, buf, n, nbytes, sign); if (ret) { kfree(buf); return NULL; } return buf; } EXPORT_SYMBOL_GPL(mpi_get_buffer); /** * mpi_write_to_sgl() - Funnction exports MPI to an sgl (msb first) * * This function works in the same way as the mpi_read_buffer, but it * takes an sgl instead of u8 * buf. * * @a: a multi precision integer * @sgl: scatterlist to write to. Needs to be at least * mpi_get_size(a) long. * @nbytes: the number of bytes to write. Leading bytes will be * filled with zero. * @sign: if not NULL, it will be set to the sign of a. * * Return: 0 on success or error code in case of error */ int mpi_write_to_sgl(MPI a, struct scatterlist *sgl, unsigned nbytes, int *sign) { u8 *p, *p2; #if BYTES_PER_MPI_LIMB == 4 __be32 alimb; #elif BYTES_PER_MPI_LIMB == 8 __be64 alimb; #else #error please implement for this limb size. #endif unsigned int n = mpi_get_size(a); struct sg_mapping_iter miter; int i, x, buf_len; int nents; if (sign) *sign = a->sign; if (nbytes < n) return -EOVERFLOW; nents = sg_nents_for_len(sgl, nbytes); if (nents < 0) return -EINVAL; sg_miter_start(&miter, sgl, nents, SG_MITER_ATOMIC | SG_MITER_TO_SG); sg_miter_next(&miter); buf_len = miter.length; p2 = miter.addr; while (nbytes > n) { i = min_t(unsigned, nbytes - n, buf_len); memset(p2, 0, i); p2 += i; nbytes -= i; buf_len -= i; if (!buf_len) { sg_miter_next(&miter); buf_len = miter.length; p2 = miter.addr; } } for (i = a->nlimbs - 1; i >= 0; i--) { #if BYTES_PER_MPI_LIMB == 4 alimb = a->d[i] ? cpu_to_be32(a->d[i]) : 0; #elif BYTES_PER_MPI_LIMB == 8 alimb = a->d[i] ? cpu_to_be64(a->d[i]) : 0; #else #error please implement for this limb size. #endif p = (u8 *)&alimb; for (x = 0; x < sizeof(alimb); x++) { *p2++ = *p++; if (!--buf_len) { sg_miter_next(&miter); buf_len = miter.length; p2 = miter.addr; } } } sg_miter_stop(&miter); return 0; } EXPORT_SYMBOL_GPL(mpi_write_to_sgl); /* * mpi_read_raw_from_sgl() - Function allocates an MPI and populates it with * data from the sgl * * This function works in the same way as the mpi_read_raw_data, but it * takes an sgl instead of void * buffer. i.e. it allocates * a new MPI and reads the content of the sgl to the MPI. * * @sgl: scatterlist to read from * @nbytes: number of bytes to read * * Return: Pointer to a new MPI or NULL on error */ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes) { struct sg_mapping_iter miter; unsigned int nbits, nlimbs; int x, j, z, lzeros, ents; unsigned int len; const u8 *buff; mpi_limb_t a; MPI val = NULL; ents = sg_nents_for_len(sgl, nbytes); if (ents < 0) return NULL; sg_miter_start(&miter, sgl, ents, SG_MITER_ATOMIC | SG_MITER_FROM_SG); lzeros = 0; len = 0; while (nbytes > 0) { while (len && !*buff) { lzeros++; len--; buff++; } if (len && *buff) break; sg_miter_next(&miter); buff = miter.addr; len = miter.length; nbytes -= lzeros; lzeros = 0; } miter.consumed = lzeros; nbytes -= lzeros; nbits = nbytes * 8; if (nbits > MAX_EXTERN_MPI_BITS) { sg_miter_stop(&miter); pr_info("MPI: mpi too large (%u bits)\n", nbits); return NULL; } if (nbytes > 0) nbits -= count_leading_zeros(*buff) - (BITS_PER_LONG - 8); sg_miter_stop(&miter); nlimbs = DIV_ROUND_UP(nbytes, BYTES_PER_MPI_LIMB); val = mpi_alloc(nlimbs); if (!val) return NULL; val->nbits = nbits; val->sign = 0; val->nlimbs = nlimbs; if (nbytes == 0) return val; j = nlimbs - 1; a = 0; z = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB; z %= BYTES_PER_MPI_LIMB; while (sg_miter_next(&miter)) { buff = miter.addr; len = min_t(unsigned, miter.length, nbytes); nbytes -= len; for (x = 0; x < len; x++) { a <<= 8; a |= *buff++; if (((z + x + 1) % BYTES_PER_MPI_LIMB) == 0) { val->d[j--] = a; a = 0; } } z += x; } return val; } EXPORT_SYMBOL_GPL(mpi_read_raw_from_sgl); |
2 2082 999 2082 35 36 8 1000 1003 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | /* SPDX-License-Identifier: GPL-2.0-only */ /* A pointer that can point to either kernel or userspace memory. */ #ifndef _LINUX_BPFPTR_H #define _LINUX_BPFPTR_H #include <linux/mm.h> #include <linux/sockptr.h> typedef sockptr_t bpfptr_t; static inline bool bpfptr_is_kernel(bpfptr_t bpfptr) { return bpfptr.is_kernel; } static inline bpfptr_t KERNEL_BPFPTR(void *p) { return (bpfptr_t) { .kernel = p, .is_kernel = true }; } static inline bpfptr_t USER_BPFPTR(void __user *p) { return (bpfptr_t) { .user = p }; } static inline bpfptr_t make_bpfptr(u64 addr, bool is_kernel) { if (is_kernel) return KERNEL_BPFPTR((void*) (uintptr_t) addr); else return USER_BPFPTR(u64_to_user_ptr(addr)); } static inline bool bpfptr_is_null(bpfptr_t bpfptr) { if (bpfptr_is_kernel(bpfptr)) return !bpfptr.kernel; return !bpfptr.user; } static inline void bpfptr_add(bpfptr_t *bpfptr, size_t val) { if (bpfptr_is_kernel(*bpfptr)) bpfptr->kernel += val; else bpfptr->user += val; } static inline int copy_from_bpfptr_offset(void *dst, bpfptr_t src, size_t offset, size_t size) { if (!bpfptr_is_kernel(src)) return copy_from_user(dst, src.user + offset, size); return copy_from_kernel_nofault(dst, src.kernel + offset, size); } static inline int copy_from_bpfptr(void *dst, bpfptr_t src, size_t size) { return copy_from_bpfptr_offset(dst, src, 0, size); } static inline int copy_to_bpfptr_offset(bpfptr_t dst, size_t offset, const void *src, size_t size) { return copy_to_sockptr_offset((sockptr_t) dst, offset, src, size); } static inline void *kvmemdup_bpfptr_noprof(bpfptr_t src, size_t len) { void *p = kvmalloc_noprof(len, GFP_USER | __GFP_NOWARN); if (!p) return ERR_PTR(-ENOMEM); if (copy_from_bpfptr(p, src, len)) { kvfree(p); return ERR_PTR(-EFAULT); } return p; } #define kvmemdup_bpfptr(...) alloc_hooks(kvmemdup_bpfptr_noprof(__VA_ARGS__)) static inline long strncpy_from_bpfptr(char *dst, bpfptr_t src, size_t count) { if (bpfptr_is_kernel(src)) return strncpy_from_kernel_nofault(dst, src.kernel, count); return strncpy_from_user(dst, src.user, count); } #endif /* _LINUX_BPFPTR_H */ |
146 146 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 | // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/act_simple.c Simple example of an action * * Authors: Jamal Hadi Salim (2005-8) */ #include <linux/module.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <net/tc_wrapper.h> #include <linux/tc_act/tc_defact.h> #include <net/tc_act/tc_defact.h> static struct tc_action_ops act_simp_ops; #define SIMP_MAX_DATA 32 TC_INDIRECT_SCOPE int tcf_simp_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_defact *d = to_defact(a); spin_lock(&d->tcf_lock); tcf_lastuse_update(&d->tcf_tm); bstats_update(&d->tcf_bstats, skb); /* print policy string followed by _ then packet count * Example if this was the 3rd packet and the string was "hello" * then it would look like "hello_3" (without quotes) */ pr_info("simple: %s_%llu\n", (char *)d->tcfd_defdata, u64_stats_read(&d->tcf_bstats.packets)); spin_unlock(&d->tcf_lock); return d->tcf_action; } static void tcf_simp_release(struct tc_action *a) { struct tcf_defact *d = to_defact(a); kfree(d->tcfd_defdata); } static int alloc_defdata(struct tcf_defact *d, const struct nlattr *defdata) { d->tcfd_defdata = kzalloc(SIMP_MAX_DATA, GFP_KERNEL); if (unlikely(!d->tcfd_defdata)) return -ENOMEM; nla_strscpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA); return 0; } static int reset_policy(struct tc_action *a, const struct nlattr *defdata, struct tc_defact *p, struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tcf_chain *goto_ch = NULL; struct tcf_defact *d; int err; err = tcf_action_check_ctrlact(p->action, tp, &goto_ch, extack); if (err < 0) return err; d = to_defact(a); spin_lock_bh(&d->tcf_lock); goto_ch = tcf_action_set_ctrlact(a, p->action, goto_ch); memset(d->tcfd_defdata, 0, SIMP_MAX_DATA); nla_strscpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA); spin_unlock_bh(&d->tcf_lock); if (goto_ch) tcf_chain_put_by_act(goto_ch); return 0; } static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = { [TCA_DEF_PARMS] = { .len = sizeof(struct tc_defact) }, [TCA_DEF_DATA] = { .type = NLA_STRING, .len = SIMP_MAX_DATA }, }; static int tcf_simp_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, act_simp_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_DEF_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tc_defact *parm; struct tcf_defact *d; bool exists = false; int ret = 0, err; u32 index; if (nla == NULL) return -EINVAL; err = nla_parse_nested_deprecated(tb, TCA_DEF_MAX, nla, simple_policy, NULL); if (err < 0) return err; if (tb[TCA_DEF_PARMS] == NULL) return -EINVAL; parm = nla_data(tb[TCA_DEF_PARMS]); index = parm->index; err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) return err; exists = err; if (exists && bind) return ACT_P_BOUND; if (tb[TCA_DEF_DATA] == NULL) { if (exists) tcf_idr_release(*a, bind); else tcf_idr_cleanup(tn, index); return -EINVAL; } if (!exists) { ret = tcf_idr_create(tn, index, est, a, &act_simp_ops, bind, false, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; } d = to_defact(*a); err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (err < 0) goto release_idr; err = alloc_defdata(d, tb[TCA_DEF_DATA]); if (err < 0) goto put_chain; tcf_action_set_ctrlact(*a, parm->action, goto_ch); ret = ACT_P_CREATED; } else { if (!(flags & TCA_ACT_FLAGS_REPLACE)) { err = -EEXIST; goto release_idr; } err = reset_policy(*a, tb[TCA_DEF_DATA], parm, tp, extack); if (err) goto release_idr; } return ret; put_chain: if (goto_ch) tcf_chain_put_by_act(goto_ch); release_idr: tcf_idr_release(*a, bind); return err; } static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_defact *d = to_defact(a); struct tc_defact opt = { .index = d->tcf_index, .refcnt = refcount_read(&d->tcf_refcnt) - ref, .bindcnt = atomic_read(&d->tcf_bindcnt) - bind, }; struct tcf_t t; spin_lock_bh(&d->tcf_lock); opt.action = d->tcf_action; if (nla_put(skb, TCA_DEF_PARMS, sizeof(opt), &opt) || nla_put_string(skb, TCA_DEF_DATA, d->tcfd_defdata)) goto nla_put_failure; tcf_tm_dump(&t, &d->tcf_tm); if (nla_put_64bit(skb, TCA_DEF_TM, sizeof(t), &t, TCA_DEF_PAD)) goto nla_put_failure; spin_unlock_bh(&d->tcf_lock); return skb->len; nla_put_failure: spin_unlock_bh(&d->tcf_lock); nlmsg_trim(skb, b); return -1; } static struct tc_action_ops act_simp_ops = { .kind = "simple", .id = TCA_ID_SIMP, .owner = THIS_MODULE, .act = tcf_simp_act, .dump = tcf_simp_dump, .cleanup = tcf_simp_release, .init = tcf_simp_init, .size = sizeof(struct tcf_defact), }; MODULE_ALIAS_NET_ACT("simple"); static __net_init int simp_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, act_simp_ops.net_id); return tc_action_net_init(net, tn, &act_simp_ops); } static void __net_exit simp_exit_net(struct list_head *net_list) { tc_action_net_exit(net_list, act_simp_ops.net_id); } static struct pernet_operations simp_net_ops = { .init = simp_init_net, .exit_batch = simp_exit_net, .id = &act_simp_ops.net_id, .size = sizeof(struct tc_action_net), }; MODULE_AUTHOR("Jamal Hadi Salim(2005)"); MODULE_DESCRIPTION("Simple example action"); MODULE_LICENSE("GPL"); static int __init simp_init_module(void) { int ret = tcf_register_action(&act_simp_ops, &simp_net_ops); if (!ret) pr_info("Simple TC action Loaded\n"); return ret; } static void __exit simp_cleanup_module(void) { tcf_unregister_action(&act_simp_ops, &simp_net_ops); } module_init(simp_init_module); module_exit(simp_cleanup_module); |
1 2 2 2 5 3 5 5 5 5 6 3 3 2 1 6 5 1 3 3 3 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 | // SPDX-License-Identifier: GPL-2.0-only #include <linux/ethtool_netlink.h> #include <net/udp_tunnel.h> #include <net/vxlan.h> #include "bitset.h" #include "common.h" #include "netlink.h" const struct nla_policy ethnl_tunnel_info_get_policy[] = { [ETHTOOL_A_TUNNEL_INFO_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), }; static_assert(ETHTOOL_UDP_TUNNEL_TYPE_VXLAN == ilog2(UDP_TUNNEL_TYPE_VXLAN)); static_assert(ETHTOOL_UDP_TUNNEL_TYPE_GENEVE == ilog2(UDP_TUNNEL_TYPE_GENEVE)); static_assert(ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE == ilog2(UDP_TUNNEL_TYPE_VXLAN_GPE)); static ssize_t ethnl_udp_table_reply_size(unsigned int types, bool compact) { ssize_t size; size = ethnl_bitset32_size(&types, NULL, __ETHTOOL_UDP_TUNNEL_TYPE_CNT, udp_tunnel_type_names, compact); if (size < 0) return size; return size + nla_total_size(0) + /* _UDP_TABLE */ nla_total_size(sizeof(u32)); /* _UDP_TABLE_SIZE */ } static ssize_t ethnl_tunnel_info_reply_size(const struct ethnl_req_info *req_base, struct netlink_ext_ack *extack) { bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; const struct udp_tunnel_nic_info *info; unsigned int i; ssize_t ret; size_t size; info = req_base->dev->udp_tunnel_nic_info; if (!info) { NL_SET_ERR_MSG(extack, "device does not report tunnel offload info"); return -EOPNOTSUPP; } size = nla_total_size(0); /* _INFO_UDP_PORTS */ for (i = 0; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) { if (!info->tables[i].n_entries) break; ret = ethnl_udp_table_reply_size(info->tables[i].tunnel_types, compact); if (ret < 0) return ret; size += ret; size += udp_tunnel_nic_dump_size(req_base->dev, i); } if (info->flags & UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN) { ret = ethnl_udp_table_reply_size(0, compact); if (ret < 0) return ret; size += ret; size += nla_total_size(0) + /* _TABLE_ENTRY */ nla_total_size(sizeof(__be16)) + /* _ENTRY_PORT */ nla_total_size(sizeof(u32)); /* _ENTRY_TYPE */ } return size; } static int ethnl_tunnel_info_fill_reply(const struct ethnl_req_info *req_base, struct sk_buff *skb) { bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; const struct udp_tunnel_nic_info *info; struct nlattr *ports, *table, *entry; unsigned int i; info = req_base->dev->udp_tunnel_nic_info; if (!info) return -EOPNOTSUPP; ports = nla_nest_start(skb, ETHTOOL_A_TUNNEL_INFO_UDP_PORTS); if (!ports) return -EMSGSIZE; for (i = 0; i < UDP_TUNNEL_NIC_MAX_TABLES; i++) { if (!info->tables[i].n_entries) break; table = nla_nest_start(skb, ETHTOOL_A_TUNNEL_UDP_TABLE); if (!table) goto err_cancel_ports; if (nla_put_u32(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE, info->tables[i].n_entries)) goto err_cancel_table; if (ethnl_put_bitset32(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES, &info->tables[i].tunnel_types, NULL, __ETHTOOL_UDP_TUNNEL_TYPE_CNT, udp_tunnel_type_names, compact)) goto err_cancel_table; if (udp_tunnel_nic_dump_write(req_base->dev, i, skb)) goto err_cancel_table; nla_nest_end(skb, table); } if (info->flags & UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN) { u32 zero = 0; table = nla_nest_start(skb, ETHTOOL_A_TUNNEL_UDP_TABLE); if (!table) goto err_cancel_ports; if (nla_put_u32(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE, 1)) goto err_cancel_table; if (ethnl_put_bitset32(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES, &zero, NULL, __ETHTOOL_UDP_TUNNEL_TYPE_CNT, udp_tunnel_type_names, compact)) goto err_cancel_table; entry = nla_nest_start(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY); if (!entry) goto err_cancel_entry; if (nla_put_be16(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT, htons(IANA_VXLAN_UDP_PORT)) || nla_put_u32(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE, ilog2(UDP_TUNNEL_TYPE_VXLAN))) goto err_cancel_entry; nla_nest_end(skb, entry); nla_nest_end(skb, table); } nla_nest_end(skb, ports); return 0; err_cancel_entry: nla_nest_cancel(skb, entry); err_cancel_table: nla_nest_cancel(skb, table); err_cancel_ports: nla_nest_cancel(skb, ports); return -EMSGSIZE; } int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info) { struct ethnl_req_info req_info = {}; struct nlattr **tb = info->attrs; struct sk_buff *rskb; void *reply_payload; int reply_len; int ret; ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_TUNNEL_INFO_HEADER], genl_info_net(info), info->extack, true); if (ret < 0) return ret; rtnl_lock(); ret = ethnl_tunnel_info_reply_size(&req_info, info->extack); if (ret < 0) goto err_unlock_rtnl; reply_len = ret + ethnl_reply_header_size(); rskb = ethnl_reply_init(reply_len, req_info.dev, ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY, ETHTOOL_A_TUNNEL_INFO_HEADER, info, &reply_payload); if (!rskb) { ret = -ENOMEM; goto err_unlock_rtnl; } ret = ethnl_tunnel_info_fill_reply(&req_info, rskb); if (ret) goto err_free_msg; rtnl_unlock(); ethnl_parse_header_dev_put(&req_info); genlmsg_end(rskb, reply_payload); return genlmsg_reply(rskb, info); err_free_msg: nlmsg_free(rskb); err_unlock_rtnl: rtnl_unlock(); ethnl_parse_header_dev_put(&req_info); return ret; } struct ethnl_tunnel_info_dump_ctx { struct ethnl_req_info req_info; unsigned long ifindex; }; int ethnl_tunnel_info_start(struct netlink_callback *cb) { const struct genl_dumpit_info *info = genl_dumpit_info(cb); struct ethnl_tunnel_info_dump_ctx *ctx = (void *)cb->ctx; struct nlattr **tb = info->info.attrs; int ret; BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); memset(ctx, 0, sizeof(*ctx)); ret = ethnl_parse_header_dev_get(&ctx->req_info, tb[ETHTOOL_A_TUNNEL_INFO_HEADER], sock_net(cb->skb->sk), cb->extack, false); if (ctx->req_info.dev) { ethnl_parse_header_dev_put(&ctx->req_info); ctx->req_info.dev = NULL; } return ret; } int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { struct ethnl_tunnel_info_dump_ctx *ctx = (void *)cb->ctx; struct net *net = sock_net(skb->sk); struct net_device *dev; int ret = 0; void *ehdr; rtnl_lock(); for_each_netdev_dump(net, dev, ctx->ifindex) { ehdr = ethnl_dump_put(skb, cb, ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY); if (!ehdr) { ret = -EMSGSIZE; break; } ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_TUNNEL_INFO_HEADER); if (ret < 0) { genlmsg_cancel(skb, ehdr); break; } ctx->req_info.dev = dev; ret = ethnl_tunnel_info_fill_reply(&ctx->req_info, skb); ctx->req_info.dev = NULL; if (ret < 0) { genlmsg_cancel(skb, ehdr); if (ret == -EOPNOTSUPP) continue; break; } genlmsg_end(skb, ehdr); } rtnl_unlock(); if (ret == -EMSGSIZE && skb->len) return skb->len; return ret; } |
1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 | /* SPDX-License-Identifier: GPL-2.0 */ /* * irq_domain - IRQ translation domains * * Translation infrastructure between hw and linux irq numbers. This is * helpful for interrupt controllers to implement mapping between hardware * irq numbers and the Linux irq number space. * * irq_domains also have hooks for translating device tree or other * firmware interrupt representations into a hardware irq number that * can be mapped back to a Linux irq number without any extra platform * support code. * * Interrupt controller "domain" data structure. This could be defined as a * irq domain controller. That is, it handles the mapping between hardware * and virtual interrupt numbers for a given interrupt domain. The domain * structure is generally created by the PIC code for a given PIC instance * (though a domain can cover more than one PIC if they have a flat number * model). It's the domain callbacks that are responsible for setting the * irq_chip on a given irq_desc after it's been mapped. * * The host code and data structures use a fwnode_handle pointer to * identify the domain. In some cases, and in order to preserve source * code compatibility, this fwnode pointer is "upgraded" to a DT * device_node. For those firmware infrastructures that do not provide * a unique identifier for an interrupt controller, the irq_domain * code offers a fwnode allocator. */ #ifndef _LINUX_IRQDOMAIN_H #define _LINUX_IRQDOMAIN_H #include <linux/types.h> #include <linux/irqdomain_defs.h> #include <linux/irqhandler.h> #include <linux/of.h> #include <linux/mutex.h> #include <linux/radix-tree.h> struct device_node; struct fwnode_handle; struct irq_domain; struct irq_chip; struct irq_data; struct irq_desc; struct cpumask; struct seq_file; struct irq_affinity_desc; struct msi_parent_ops; #define IRQ_DOMAIN_IRQ_SPEC_PARAMS 16 /** * struct irq_fwspec - generic IRQ specifier structure * * @fwnode: Pointer to a firmware-specific descriptor * @param_count: Number of device-specific parameters * @param: Device-specific parameters * * This structure, directly modeled after of_phandle_args, is used to * pass a device-specific description of an interrupt. */ struct irq_fwspec { struct fwnode_handle *fwnode; int param_count; u32 param[IRQ_DOMAIN_IRQ_SPEC_PARAMS]; }; /* Conversion function from of_phandle_args fields to fwspec */ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, unsigned int count, struct irq_fwspec *fwspec); /** * struct irq_domain_ops - Methods for irq_domain objects * @match: Match an interrupt controller device node to a host, returns * 1 on a match * @select: Match an interrupt controller fw specification. It is more generic * than @match as it receives a complete struct irq_fwspec. Therefore, * @select is preferred if provided. Returns 1 on a match. * @map: Create or update a mapping between a virtual irq number and a hw * irq number. This is called only once for a given mapping. * @unmap: Dispose of such a mapping * @xlate: Given a device tree node and interrupt specifier, decode * the hardware irq number and linux irq type value. * @alloc: Allocate @nr_irqs interrupts starting from @virq. * @free: Free @nr_irqs interrupts starting from @virq. * @activate: Activate one interrupt in HW (@irqd). If @reserve is set, only * reserve the vector. If unset, assign the vector (called from * request_irq()). * @deactivate: Disarm one interrupt (@irqd). * @translate: Given @fwspec, decode the hardware irq number (@out_hwirq) and * linux irq type value (@out_type). This is a generalised @xlate * (over struct irq_fwspec) and is preferred if provided. * @debug_show: For domains to show specific data for an interrupt in debugfs. * * Functions below are provided by the driver and called whenever a new mapping * is created or an old mapping is disposed. The driver can then proceed to * whatever internal data structures management is required. It also needs * to setup the irq_desc when returning from map(). */ struct irq_domain_ops { int (*match)(struct irq_domain *d, struct device_node *node, enum irq_domain_bus_token bus_token); int (*select)(struct irq_domain *d, struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token); int (*map)(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw); void (*unmap)(struct irq_domain *d, unsigned int virq); int (*xlate)(struct irq_domain *d, struct device_node *node, const u32 *intspec, unsigned int intsize, unsigned long *out_hwirq, unsigned int *out_type); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY /* extended V2 interfaces to support hierarchy irq_domains */ int (*alloc)(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs, void *arg); void (*free)(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs); int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool reserve); void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data); int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *out_hwirq, unsigned int *out_type); #endif #ifdef CONFIG_GENERIC_IRQ_DEBUGFS void (*debug_show)(struct seq_file *m, struct irq_domain *d, struct irq_data *irqd, int ind); #endif }; extern const struct irq_domain_ops irq_generic_chip_ops; struct irq_domain_chip_generic; /** * struct irq_domain - Hardware interrupt number translation object * @link: Element in global irq_domain list. * @name: Name of interrupt domain * @ops: Pointer to irq_domain methods * @host_data: Private data pointer for use by owner. Not touched by irq_domain * core code. * @flags: Per irq_domain flags * @mapcount: The number of mapped interrupts * @mutex: Domain lock, hierarchical domains use root domain's lock * @root: Pointer to root domain, or containing structure if non-hierarchical * * Optional elements: * @fwnode: Pointer to firmware node associated with the irq_domain. Pretty easy * to swap it for the of_node via the irq_domain_get_of_node accessor * @bus_token: @fwnode's device_node might be used for several irq domains. But * in connection with @bus_token, the pair shall be unique in a * system. * @gc: Pointer to a list of generic chips. There is a helper function for * setting up one or more generic chips for interrupt controllers * drivers using the generic chip library which uses this pointer. * @dev: Pointer to the device which instantiated the irqdomain * With per device irq domains this is not necessarily the same * as @pm_dev. * @pm_dev: Pointer to a device that can be utilized for power management * purposes related to the irq domain. * @parent: Pointer to parent irq_domain to support hierarchy irq_domains * @msi_parent_ops: Pointer to MSI parent domain methods for per device domain init * @exit: Function called when the domain is destroyed * * Revmap data, used internally by the irq domain code: * @hwirq_max: Top limit for the HW irq number. Especially to avoid * conflicts/failures with reserved HW irqs. Can be ~0. * @revmap_size: Size of the linear map table @revmap * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map * @revmap: Linear table of irq_data pointers */ struct irq_domain { struct list_head link; const char *name; const struct irq_domain_ops *ops; void *host_data; unsigned int flags; unsigned int mapcount; struct mutex mutex; struct irq_domain *root; /* Optional data */ struct fwnode_handle *fwnode; enum irq_domain_bus_token bus_token; struct irq_domain_chip_generic *gc; struct device *dev; struct device *pm_dev; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY struct irq_domain *parent; #endif #ifdef CONFIG_GENERIC_MSI_IRQ const struct msi_parent_ops *msi_parent_ops; #endif void (*exit)(struct irq_domain *d); /* reverse map data. The linear map gets appended to the irq_domain */ irq_hw_number_t hwirq_max; unsigned int revmap_size; struct radix_tree_root revmap_tree; struct irq_data __rcu *revmap[] __counted_by(revmap_size); }; /* Irq domain flags */ enum { /* Irq domain is hierarchical */ IRQ_DOMAIN_FLAG_HIERARCHY = (1 << 0), /* Irq domain name was allocated internally */ IRQ_DOMAIN_NAME_ALLOCATED = (1 << 1), /* Irq domain is an IPI domain with virq per cpu */ IRQ_DOMAIN_FLAG_IPI_PER_CPU = (1 << 2), /* Irq domain is an IPI domain with single virq */ IRQ_DOMAIN_FLAG_IPI_SINGLE = (1 << 3), /* Irq domain implements MSIs */ IRQ_DOMAIN_FLAG_MSI = (1 << 4), /* * Irq domain implements isolated MSI, see msi_device_has_isolated_msi() */ IRQ_DOMAIN_FLAG_ISOLATED_MSI = (1 << 5), /* Irq domain doesn't translate anything */ IRQ_DOMAIN_FLAG_NO_MAP = (1 << 6), /* Irq domain is a MSI parent domain */ IRQ_DOMAIN_FLAG_MSI_PARENT = (1 << 8), /* Irq domain is a MSI device domain */ IRQ_DOMAIN_FLAG_MSI_DEVICE = (1 << 9), /* Irq domain must destroy generic chips when removed */ IRQ_DOMAIN_FLAG_DESTROY_GC = (1 << 10), /* * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved * for implementation specific purposes and ignored by the * core code. */ IRQ_DOMAIN_FLAG_NONCORE = (1 << 16), }; static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d) { return to_of_node(d->fwnode); } static inline void irq_domain_set_pm_device(struct irq_domain *d, struct device *dev) { if (d) d->pm_dev = dev; } #ifdef CONFIG_IRQ_DOMAIN struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id, const char *name, phys_addr_t *pa); enum { IRQCHIP_FWNODE_REAL, IRQCHIP_FWNODE_NAMED, IRQCHIP_FWNODE_NAMED_ID, }; static inline struct fwnode_handle *irq_domain_alloc_named_fwnode(const char *name) { return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED, 0, name, NULL); } static inline struct fwnode_handle *irq_domain_alloc_named_id_fwnode(const char *name, int id) { return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED_ID, id, name, NULL); } static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa) { return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_REAL, 0, NULL, pa); } void irq_domain_free_fwnode(struct fwnode_handle *fwnode); struct irq_domain_chip_generic_info; /** * struct irq_domain_info - Domain information structure * @fwnode: firmware node for the interrupt controller * @domain_flags: Additional flags to add to the domain flags * @size: Size of linear map; 0 for radix mapping only * @hwirq_max: Maximum number of interrupts supported by controller * @direct_max: Maximum value of direct maps; * Use ~0 for no limit; 0 for no direct mapping * @hwirq_base: The first hardware interrupt number (legacy domains only) * @virq_base: The first Linux interrupt number for legacy domains to * immediately associate the interrupts after domain creation * @bus_token: Domain bus token * @name_suffix: Optional name suffix to avoid collisions when multiple * domains are added using same fwnode * @ops: Domain operation callbacks * @host_data: Controller private data pointer * @dgc_info: Geneneric chip information structure pointer used to * create generic chips for the domain if not NULL. * @init: Function called when the domain is created. * Allow to do some additional domain initialisation. * @exit: Function called when the domain is destroyed. * Allow to do some additional cleanup operation. */ struct irq_domain_info { struct fwnode_handle *fwnode; unsigned int domain_flags; unsigned int size; irq_hw_number_t hwirq_max; int direct_max; unsigned int hwirq_base; unsigned int virq_base; enum irq_domain_bus_token bus_token; const char *name_suffix; const struct irq_domain_ops *ops; void *host_data; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY /** * @parent: Pointer to the parent irq domain used in a hierarchy domain */ struct irq_domain *parent; #endif struct irq_domain_chip_generic_info *dgc_info; int (*init)(struct irq_domain *d); void (*exit)(struct irq_domain *d); }; struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info); struct irq_domain *devm_irq_domain_instantiate(struct device *dev, const struct irq_domain_info *info); struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode, unsigned int size, unsigned int first_irq, const struct irq_domain_ops *ops, void *host_data); struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, unsigned int size, unsigned int first_irq, irq_hw_number_t first_hwirq, const struct irq_domain_ops *ops, void *host_data); struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, unsigned int size, unsigned int first_irq, irq_hw_number_t first_hwirq, const struct irq_domain_ops *ops, void *host_data); struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token); void irq_set_default_host(struct irq_domain *host); struct irq_domain *irq_get_default_host(void); int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, irq_hw_number_t hwirq, int node, const struct irq_affinity_desc *affinity); static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node) { return node ? &node->fwnode : NULL; } extern const struct fwnode_operations irqchip_fwnode_ops; static inline bool is_fwnode_irqchip(const struct fwnode_handle *fwnode) { return fwnode && fwnode->ops == &irqchip_fwnode_ops; } void irq_domain_update_bus_token(struct irq_domain *domain, enum irq_domain_bus_token bus_token); static inline struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode, enum irq_domain_bus_token bus_token) { struct irq_fwspec fwspec = { .fwnode = fwnode, }; return irq_find_matching_fwspec(&fwspec, bus_token); } static inline struct irq_domain *irq_find_matching_host(struct device_node *node, enum irq_domain_bus_token bus_token) { return irq_find_matching_fwnode(of_node_to_fwnode(node), bus_token); } static inline struct irq_domain *irq_find_host(struct device_node *node) { struct irq_domain *d; d = irq_find_matching_host(node, DOMAIN_BUS_WIRED); if (!d) d = irq_find_matching_host(node, DOMAIN_BUS_ANY); return d; } static inline struct irq_domain *irq_domain_add_simple(struct device_node *of_node, unsigned int size, unsigned int first_irq, const struct irq_domain_ops *ops, void *host_data) { return irq_domain_create_simple(of_node_to_fwnode(of_node), size, first_irq, ops, host_data); } /** * irq_domain_add_linear() - Allocate and register a linear revmap irq_domain. * @of_node: pointer to interrupt controller's device tree node. * @size: Number of interrupts in the domain. * @ops: map/unmap domain callbacks * @host_data: Controller private data pointer */ static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_node, unsigned int size, const struct irq_domain_ops *ops, void *host_data) { struct irq_domain_info info = { .fwnode = of_node_to_fwnode(of_node), .size = size, .hwirq_max = size, .ops = ops, .host_data = host_data, }; struct irq_domain *d; d = irq_domain_instantiate(&info); return IS_ERR(d) ? NULL : d; } #ifdef CONFIG_IRQ_DOMAIN_NOMAP static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_node, unsigned int max_irq, const struct irq_domain_ops *ops, void *host_data) { struct irq_domain_info info = { .fwnode = of_node_to_fwnode(of_node), .hwirq_max = max_irq, .direct_max = max_irq, .ops = ops, .host_data = host_data, }; struct irq_domain *d; d = irq_domain_instantiate(&info); return IS_ERR(d) ? NULL : d; } unsigned int irq_create_direct_mapping(struct irq_domain *host); #endif static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node, const struct irq_domain_ops *ops, void *host_data) { struct irq_domain_info info = { .fwnode = of_node_to_fwnode(of_node), .hwirq_max = ~0U, .ops = ops, .host_data = host_data, }; struct irq_domain *d; d = irq_domain_instantiate(&info); return IS_ERR(d) ? NULL : d; } static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle *fwnode, unsigned int size, const struct irq_domain_ops *ops, void *host_data) { struct irq_domain_info info = { .fwnode = fwnode, .size = size, .hwirq_max = size, .ops = ops, .host_data = host_data, }; struct irq_domain *d; d = irq_domain_instantiate(&info); return IS_ERR(d) ? NULL : d; } static inline struct irq_domain *irq_domain_create_tree(struct fwnode_handle *fwnode, const struct irq_domain_ops *ops, void *host_data) { struct irq_domain_info info = { .fwnode = fwnode, .hwirq_max = ~0, .ops = ops, .host_data = host_data, }; struct irq_domain *d; d = irq_domain_instantiate(&info); return IS_ERR(d) ? NULL : d; } void irq_domain_remove(struct irq_domain *host); int irq_domain_associate(struct irq_domain *domain, unsigned int irq, irq_hw_number_t hwirq); void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, irq_hw_number_t hwirq_base, int count); unsigned int irq_create_mapping_affinity(struct irq_domain *host, irq_hw_number_t hwirq, const struct irq_affinity_desc *affinity); unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec); void irq_dispose_mapping(unsigned int virq); static inline unsigned int irq_create_mapping(struct irq_domain *host, irq_hw_number_t hwirq) { return irq_create_mapping_affinity(host, hwirq, NULL); } struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain, irq_hw_number_t hwirq, unsigned int *irq); static inline struct irq_desc *irq_resolve_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) { return __irq_resolve_mapping(domain, hwirq, NULL); } /** * irq_find_mapping() - Find a linux irq from a hw irq number. * @domain: domain owning this hardware interrupt * @hwirq: hardware irq number in that domain space */ static inline unsigned int irq_find_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) { unsigned int irq; if (__irq_resolve_mapping(domain, hwirq, &irq)) return irq; return 0; } static inline unsigned int irq_linear_revmap(struct irq_domain *domain, irq_hw_number_t hwirq) { return irq_find_mapping(domain, hwirq); } extern const struct irq_domain_ops irq_domain_simple_ops; /* stock xlate functions */ int irq_domain_xlate_onecell(struct irq_domain *d, struct device_node *ctrlr, const u32 *intspec, unsigned int intsize, irq_hw_number_t *out_hwirq, unsigned int *out_type); int irq_domain_xlate_twocell(struct irq_domain *d, struct device_node *ctrlr, const u32 *intspec, unsigned int intsize, irq_hw_number_t *out_hwirq, unsigned int *out_type); int irq_domain_xlate_onetwocell(struct irq_domain *d, struct device_node *ctrlr, const u32 *intspec, unsigned int intsize, irq_hw_number_t *out_hwirq, unsigned int *out_type); int irq_domain_translate_twocell(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *out_hwirq, unsigned int *out_type); int irq_domain_translate_onecell(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *out_hwirq, unsigned int *out_type); /* IPI functions */ int irq_reserve_ipi(struct irq_domain *domain, const struct cpumask *dest); int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest); /* V2 interfaces to support hierarchy IRQ domains. */ struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, unsigned int virq); void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq, const struct irq_chip *chip, void *chip_data, irq_flow_handler_t handler, void *handler_data, const char *handler_name); void irq_domain_reset_irq_data(struct irq_data *irq_data); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent, unsigned int flags, unsigned int size, struct fwnode_handle *fwnode, const struct irq_domain_ops *ops, void *host_data); static inline struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *parent, unsigned int flags, unsigned int size, struct device_node *node, const struct irq_domain_ops *ops, void *host_data) { return irq_domain_create_hierarchy(parent, flags, size, of_node_to_fwnode(node), ops, host_data); } int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, unsigned int nr_irqs, int node, void *arg, bool realloc, const struct irq_affinity_desc *affinity); void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs); int irq_domain_activate_irq(struct irq_data *irq_data, bool early); void irq_domain_deactivate_irq(struct irq_data *irq_data); static inline int irq_domain_alloc_irqs(struct irq_domain *domain, unsigned int nr_irqs, int node, void *arg) { return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false, NULL); } int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq, const struct irq_chip *chip, void *chip_data); void irq_domain_free_irqs_common(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs); void irq_domain_free_irqs_top(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs); int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg); int irq_domain_pop_irq(struct irq_domain *domain, int virq); int irq_domain_alloc_irqs_parent(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs, void *arg); void irq_domain_free_irqs_parent(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs); int irq_domain_disconnect_hierarchy(struct irq_domain *domain, unsigned int virq); static inline bool irq_domain_is_hierarchy(struct irq_domain *domain) { return domain->flags & IRQ_DOMAIN_FLAG_HIERARCHY; } static inline bool irq_domain_is_ipi(struct irq_domain *domain) { return domain->flags & (IRQ_DOMAIN_FLAG_IPI_PER_CPU | IRQ_DOMAIN_FLAG_IPI_SINGLE); } static inline bool irq_domain_is_ipi_per_cpu(struct irq_domain *domain) { return domain->flags & IRQ_DOMAIN_FLAG_IPI_PER_CPU; } static inline bool irq_domain_is_ipi_single(struct irq_domain *domain) { return domain->flags & IRQ_DOMAIN_FLAG_IPI_SINGLE; } static inline bool irq_domain_is_msi(struct irq_domain *domain) { return domain->flags & IRQ_DOMAIN_FLAG_MSI; } static inline bool irq_domain_is_msi_parent(struct irq_domain *domain) { return domain->flags & IRQ_DOMAIN_FLAG_MSI_PARENT; } static inline bool irq_domain_is_msi_device(struct irq_domain *domain) { return domain->flags & IRQ_DOMAIN_FLAG_MSI_DEVICE; } #else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ static inline int irq_domain_alloc_irqs(struct irq_domain *domain, unsigned int nr_irqs, int node, void *arg) { return -1; } static inline void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs) { } static inline bool irq_domain_is_hierarchy(struct irq_domain *domain) { return false; } static inline bool irq_domain_is_ipi(struct irq_domain *domain) { return false; } static inline bool irq_domain_is_ipi_per_cpu(struct irq_domain *domain) { return false; } static inline bool irq_domain_is_ipi_single(struct irq_domain *domain) { return false; } static inline bool irq_domain_is_msi(struct irq_domain *domain) { return false; } static inline bool irq_domain_is_msi_parent(struct irq_domain *domain) { return false; } static inline bool irq_domain_is_msi_device(struct irq_domain *domain) { return false; } #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ #ifdef CONFIG_GENERIC_MSI_IRQ int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq, unsigned int type); void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq); #else static inline int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq, unsigned int type) { WARN_ON_ONCE(1); return -EINVAL; } static inline void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq) { WARN_ON_ONCE(1); } #endif #else /* CONFIG_IRQ_DOMAIN */ static inline void irq_dispose_mapping(unsigned int virq) { } static inline struct irq_domain *irq_find_matching_fwnode( struct fwnode_handle *fwnode, enum irq_domain_bus_token bus_token) { return NULL; } #endif /* !CONFIG_IRQ_DOMAIN */ #endif /* _LINUX_IRQDOMAIN_H */ |
38 38 38 228 40 40 17 17 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 | // SPDX-License-Identifier: GPL-2.0-or-later /* * A generic kernel FIFO implementation * * Copyright (C) 2009/2010 Stefani Seibold <stefani@seibold.net> */ #include <linux/dma-mapping.h> #include <linux/err.h> #include <linux/export.h> #include <linux/kfifo.h> #include <linux/log2.h> #include <linux/scatterlist.h> #include <linux/slab.h> #include <linux/uaccess.h> /* * internal helper to calculate the unused elements in a fifo */ static inline unsigned int kfifo_unused(struct __kfifo *fifo) { return (fifo->mask + 1) - (fifo->in - fifo->out); } int __kfifo_alloc(struct __kfifo *fifo, unsigned int size, size_t esize, gfp_t gfp_mask) { /* * round up to the next power of 2, since our 'let the indices * wrap' technique works only in this case. */ size = roundup_pow_of_two(size); fifo->in = 0; fifo->out = 0; fifo->esize = esize; if (size < 2) { fifo->data = NULL; fifo->mask = 0; return -EINVAL; } fifo->data = kmalloc_array(esize, size, gfp_mask); if (!fifo->data) { fifo->mask = 0; return -ENOMEM; } fifo->mask = size - 1; return 0; } EXPORT_SYMBOL(__kfifo_alloc); void __kfifo_free(struct __kfifo *fifo) { kfree(fifo->data); fifo->in = 0; fifo->out = 0; fifo->esize = 0; fifo->data = NULL; fifo->mask = 0; } EXPORT_SYMBOL(__kfifo_free); int __kfifo_init(struct __kfifo *fifo, void *buffer, unsigned int size, size_t esize) { size /= esize; if (!is_power_of_2(size)) size = rounddown_pow_of_two(size); fifo->in = 0; fifo->out = 0; fifo->esize = esize; fifo->data = buffer; if (size < 2) { fifo->mask = 0; return -EINVAL; } fifo->mask = size - 1; return 0; } EXPORT_SYMBOL(__kfifo_init); static void kfifo_copy_in(struct __kfifo *fifo, const void *src, unsigned int len, unsigned int off) { unsigned int size = fifo->mask + 1; unsigned int esize = fifo->esize; unsigned int l; off &= fifo->mask; if (esize != 1) { off *= esize; size *= esize; len *= esize; } l = min(len, size - off); memcpy(fifo->data + off, src, l); memcpy(fifo->data, src + l, len - l); /* * make sure that the data in the fifo is up to date before * incrementing the fifo->in index counter */ smp_wmb(); } unsigned int __kfifo_in(struct __kfifo *fifo, const void *buf, unsigned int len) { unsigned int l; l = kfifo_unused(fifo); if (len > l) len = l; kfifo_copy_in(fifo, buf, len, fifo->in); fifo->in += len; return len; } EXPORT_SYMBOL(__kfifo_in); static void kfifo_copy_out(struct __kfifo *fifo, void *dst, unsigned int len, unsigned int off) { unsigned int size = fifo->mask + 1; unsigned int esize = fifo->esize; unsigned int l; off &= fifo->mask; if (esize != 1) { off *= esize; size *= esize; len *= esize; } l = min(len, size - off); memcpy(dst, fifo->data + off, l); memcpy(dst + l, fifo->data, len - l); /* * make sure that the data is copied before * incrementing the fifo->out index counter */ smp_wmb(); } unsigned int __kfifo_out_peek(struct __kfifo *fifo, void *buf, unsigned int len) { unsigned int l; l = fifo->in - fifo->out; if (len > l) len = l; kfifo_copy_out(fifo, buf, len, fifo->out); return len; } EXPORT_SYMBOL(__kfifo_out_peek); unsigned int __kfifo_out_linear(struct __kfifo *fifo, unsigned int *tail, unsigned int n) { unsigned int size = fifo->mask + 1; unsigned int off = fifo->out & fifo->mask; if (tail) *tail = off; return min3(n, fifo->in - fifo->out, size - off); } EXPORT_SYMBOL(__kfifo_out_linear); unsigned int __kfifo_out(struct __kfifo *fifo, void *buf, unsigned int len) { len = __kfifo_out_peek(fifo, buf, len); fifo->out += len; return len; } EXPORT_SYMBOL(__kfifo_out); static unsigned long kfifo_copy_from_user(struct __kfifo *fifo, const void __user *from, unsigned int len, unsigned int off, unsigned int *copied) { unsigned int size = fifo->mask + 1; unsigned int esize = fifo->esize; unsigned int l; unsigned long ret; off &= fifo->mask; if (esize != 1) { off *= esize; size *= esize; len *= esize; } l = min(len, size - off); ret = copy_from_user(fifo->data + off, from, l); if (unlikely(ret)) ret = DIV_ROUND_UP(ret + len - l, esize); else { ret = copy_from_user(fifo->data, from + l, len - l); if (unlikely(ret)) ret = DIV_ROUND_UP(ret, esize); } /* * make sure that the data in the fifo is up to date before * incrementing the fifo->in index counter */ smp_wmb(); *copied = len - ret * esize; /* return the number of elements which are not copied */ return ret; } int __kfifo_from_user(struct __kfifo *fifo, const void __user *from, unsigned long len, unsigned int *copied) { unsigned int l; unsigned long ret; unsigned int esize = fifo->esize; int err; if (esize != 1) len /= esize; l = kfifo_unused(fifo); if (len > l) len = l; ret = kfifo_copy_from_user(fifo, from, len, fifo->in, copied); if (unlikely(ret)) { len -= ret; err = -EFAULT; } else err = 0; fifo->in += len; return err; } EXPORT_SYMBOL(__kfifo_from_user); static unsigned long kfifo_copy_to_user(struct __kfifo *fifo, void __user *to, unsigned int len, unsigned int off, unsigned int *copied) { unsigned int l; unsigned long ret; unsigned int size = fifo->mask + 1; unsigned int esize = fifo->esize; off &= fifo->mask; if (esize != 1) { off *= esize; size *= esize; len *= esize; } l = min(len, size - off); ret = copy_to_user(to, fifo->data + off, l); if (unlikely(ret)) ret = DIV_ROUND_UP(ret + len - l, esize); else { ret = copy_to_user(to + l, fifo->data, len - l); if (unlikely(ret)) ret = DIV_ROUND_UP(ret, esize); } /* * make sure that the data is copied before * incrementing the fifo->out index counter */ smp_wmb(); *copied = len - ret * esize; /* return the number of elements which are not copied */ return ret; } int __kfifo_to_user(struct __kfifo *fifo, void __user *to, unsigned long len, unsigned int *copied) { unsigned int l; unsigned long ret; unsigned int esize = fifo->esize; int err; if (esize != 1) len /= esize; l = fifo->in - fifo->out; if (len > l) len = l; ret = kfifo_copy_to_user(fifo, to, len, fifo->out, copied); if (unlikely(ret)) { len -= ret; err = -EFAULT; } else err = 0; fifo->out += len; return err; } EXPORT_SYMBOL(__kfifo_to_user); static unsigned int setup_sgl_buf(struct __kfifo *fifo, struct scatterlist *sgl, unsigned int data_offset, int nents, unsigned int len, dma_addr_t dma) { const void *buf = fifo->data + data_offset; if (!nents || !len) return 0; sg_set_buf(sgl, buf, len); if (dma != DMA_MAPPING_ERROR) { sg_dma_address(sgl) = dma + data_offset; sg_dma_len(sgl) = len; } return 1; } static unsigned int setup_sgl(struct __kfifo *fifo, struct scatterlist *sgl, int nents, unsigned int len, unsigned int off, dma_addr_t dma) { unsigned int size = fifo->mask + 1; unsigned int esize = fifo->esize; unsigned int len_to_end; unsigned int n; off &= fifo->mask; if (esize != 1) { off *= esize; size *= esize; len *= esize; } len_to_end = min(len, size - off); n = setup_sgl_buf(fifo, sgl, off, nents, len_to_end, dma); n += setup_sgl_buf(fifo, sgl + n, 0, nents - n, len - len_to_end, dma); return n; } unsigned int __kfifo_dma_in_prepare(struct __kfifo *fifo, struct scatterlist *sgl, int nents, unsigned int len, dma_addr_t dma) { unsigned int l; l = kfifo_unused(fifo); if (len > l) len = l; return setup_sgl(fifo, sgl, nents, len, fifo->in, dma); } EXPORT_SYMBOL(__kfifo_dma_in_prepare); unsigned int __kfifo_dma_out_prepare(struct __kfifo *fifo, struct scatterlist *sgl, int nents, unsigned int len, dma_addr_t dma) { unsigned int l; l = fifo->in - fifo->out; if (len > l) len = l; return setup_sgl(fifo, sgl, nents, len, fifo->out, dma); } EXPORT_SYMBOL(__kfifo_dma_out_prepare); unsigned int __kfifo_max_r(unsigned int len, size_t recsize) { unsigned int max = (1 << (recsize << 3)) - 1; if (len > max) return max; return len; } EXPORT_SYMBOL(__kfifo_max_r); #define __KFIFO_PEEK(data, out, mask) \ ((data)[(out) & (mask)]) /* * __kfifo_peek_n internal helper function for determinate the length of * the next record in the fifo */ static unsigned int __kfifo_peek_n(struct __kfifo *fifo, size_t recsize) { unsigned int l; unsigned int mask = fifo->mask; unsigned char *data = fifo->data; l = __KFIFO_PEEK(data, fifo->out, mask); if (--recsize) l |= __KFIFO_PEEK(data, fifo->out + 1, mask) << 8; return l; } #define __KFIFO_POKE(data, in, mask, val) \ ( \ (data)[(in) & (mask)] = (unsigned char)(val) \ ) /* * __kfifo_poke_n internal helper function for storing the length of * the record into the fifo */ static void __kfifo_poke_n(struct __kfifo *fifo, unsigned int n, size_t recsize) { unsigned int mask = fifo->mask; unsigned char *data = fifo->data; __KFIFO_POKE(data, fifo->in, mask, n); if (recsize > 1) __KFIFO_POKE(data, fifo->in + 1, mask, n >> 8); } unsigned int __kfifo_len_r(struct __kfifo *fifo, size_t recsize) { return __kfifo_peek_n(fifo, recsize); } EXPORT_SYMBOL(__kfifo_len_r); unsigned int __kfifo_in_r(struct __kfifo *fifo, const void *buf, unsigned int len, size_t recsize) { if (len + recsize > kfifo_unused(fifo)) return 0; __kfifo_poke_n(fifo, len, recsize); kfifo_copy_in(fifo, buf, len, fifo->in + recsize); fifo->in += len + recsize; return len; } EXPORT_SYMBOL(__kfifo_in_r); static unsigned int kfifo_out_copy_r(struct __kfifo *fifo, void *buf, unsigned int len, size_t recsize, unsigned int *n) { *n = __kfifo_peek_n(fifo, recsize); if (len > *n) len = *n; kfifo_copy_out(fifo, buf, len, fifo->out + recsize); return len; } unsigned int __kfifo_out_peek_r(struct __kfifo *fifo, void *buf, unsigned int len, size_t recsize) { unsigned int n; if (fifo->in == fifo->out) return 0; return kfifo_out_copy_r(fifo, buf, len, recsize, &n); } EXPORT_SYMBOL(__kfifo_out_peek_r); unsigned int __kfifo_out_linear_r(struct __kfifo *fifo, unsigned int *tail, unsigned int n, size_t recsize) { if (fifo->in == fifo->out) return 0; if (tail) *tail = fifo->out + recsize; return min(n, __kfifo_peek_n(fifo, recsize)); } EXPORT_SYMBOL(__kfifo_out_linear_r); unsigned int __kfifo_out_r(struct __kfifo *fifo, void *buf, unsigned int len, size_t recsize) { unsigned int n; if (fifo->in == fifo->out) return 0; len = kfifo_out_copy_r(fifo, buf, len, recsize, &n); fifo->out += n + recsize; return len; } EXPORT_SYMBOL(__kfifo_out_r); void __kfifo_skip_r(struct __kfifo *fifo, size_t recsize) { unsigned int n; n = __kfifo_peek_n(fifo, recsize); fifo->out += n + recsize; } EXPORT_SYMBOL(__kfifo_skip_r); int __kfifo_from_user_r(struct __kfifo *fifo, const void __user *from, unsigned long len, unsigned int *copied, size_t recsize) { unsigned long ret; len = __kfifo_max_r(len, recsize); if (len + recsize > kfifo_unused(fifo)) { *copied = 0; return 0; } __kfifo_poke_n(fifo, len, recsize); ret = kfifo_copy_from_user(fifo, from, len, fifo->in + recsize, copied); if (unlikely(ret)) { *copied = 0; return -EFAULT; } fifo->in += len + recsize; return 0; } EXPORT_SYMBOL(__kfifo_from_user_r); int __kfifo_to_user_r(struct __kfifo *fifo, void __user *to, unsigned long len, unsigned int *copied, size_t recsize) { unsigned long ret; unsigned int n; if (fifo->in == fifo->out) { *copied = 0; return 0; } n = __kfifo_peek_n(fifo, recsize); if (len > n) len = n; ret = kfifo_copy_to_user(fifo, to, len, fifo->out + recsize, copied); if (unlikely(ret)) { *copied = 0; return -EFAULT; } fifo->out += n + recsize; return 0; } EXPORT_SYMBOL(__kfifo_to_user_r); unsigned int __kfifo_dma_in_prepare_r(struct __kfifo *fifo, struct scatterlist *sgl, int nents, unsigned int len, size_t recsize, dma_addr_t dma) { BUG_ON(!nents); len = __kfifo_max_r(len, recsize); if (len + recsize > kfifo_unused(fifo)) return 0; return setup_sgl(fifo, sgl, nents, len, fifo->in + recsize, dma); } EXPORT_SYMBOL(__kfifo_dma_in_prepare_r); void __kfifo_dma_in_finish_r(struct __kfifo *fifo, unsigned int len, size_t recsize) { len = __kfifo_max_r(len, recsize); __kfifo_poke_n(fifo, len, recsize); fifo->in += len + recsize; } EXPORT_SYMBOL(__kfifo_dma_in_finish_r); unsigned int __kfifo_dma_out_prepare_r(struct __kfifo *fifo, struct scatterlist *sgl, int nents, unsigned int len, size_t recsize, dma_addr_t dma) { BUG_ON(!nents); len = __kfifo_max_r(len, recsize); if (len + recsize > fifo->in - fifo->out) return 0; return setup_sgl(fifo, sgl, nents, len, fifo->out + recsize, dma); } EXPORT_SYMBOL(__kfifo_dma_out_prepare_r); |
6 6 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | // SPDX-License-Identifier: GPL-2.0-or-later /* * ChaCha and XChaCha stream ciphers, including ChaCha20 (RFC7539) * * Copyright (C) 2015 Martin Willi * Copyright (C) 2018 Google LLC */ #include <linux/unaligned.h> #include <crypto/algapi.h> #include <crypto/internal/chacha.h> #include <crypto/internal/skcipher.h> #include <linux/module.h> static int chacha_stream_xor(struct skcipher_request *req, const struct chacha_ctx *ctx, const u8 *iv) { struct skcipher_walk walk; u32 state[16]; int err; err = skcipher_walk_virt(&walk, req, false); chacha_init(state, ctx->key, iv); while (walk.nbytes > 0) { unsigned int nbytes = walk.nbytes; if (nbytes < walk.total) nbytes = round_down(nbytes, CHACHA_BLOCK_SIZE); chacha_crypt_generic(state, walk.dst.virt.addr, walk.src.virt.addr, nbytes, ctx->nrounds); err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } return err; } static int crypto_chacha_crypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); return chacha_stream_xor(req, ctx, req->iv); } static int crypto_xchacha_crypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); struct chacha_ctx subctx; u32 state[16]; u8 real_iv[16]; /* Compute the subkey given the original key and first 128 nonce bits */ chacha_init(state, ctx->key, req->iv); hchacha_block_generic(state, subctx.key, ctx->nrounds); subctx.nrounds = ctx->nrounds; /* Build the real IV */ memcpy(&real_iv[0], req->iv + 24, 8); /* stream position */ memcpy(&real_iv[8], req->iv + 16, 8); /* remaining 64 nonce bits */ /* Generate the stream and XOR it with the data */ return chacha_stream_xor(req, &subctx, real_iv); } static struct skcipher_alg algs[] = { { .base.cra_name = "chacha20", .base.cra_driver_name = "chacha20-generic", .base.cra_priority = 100, .base.cra_blocksize = 1, .base.cra_ctxsize = sizeof(struct chacha_ctx), .base.cra_module = THIS_MODULE, .min_keysize = CHACHA_KEY_SIZE, .max_keysize = CHACHA_KEY_SIZE, .ivsize = CHACHA_IV_SIZE, .chunksize = CHACHA_BLOCK_SIZE, .setkey = chacha20_setkey, .encrypt = crypto_chacha_crypt, .decrypt = crypto_chacha_crypt, }, { .base.cra_name = "xchacha20", .base.cra_driver_name = "xchacha20-generic", .base.cra_priority = 100, .base.cra_blocksize = 1, .base.cra_ctxsize = sizeof(struct chacha_ctx), .base.cra_module = THIS_MODULE, .min_keysize = CHACHA_KEY_SIZE, .max_keysize = CHACHA_KEY_SIZE, .ivsize = XCHACHA_IV_SIZE, .chunksize = CHACHA_BLOCK_SIZE, .setkey = chacha20_setkey, .encrypt = crypto_xchacha_crypt, .decrypt = crypto_xchacha_crypt, }, { .base.cra_name = "xchacha12", .base.cra_driver_name = "xchacha12-generic", .base.cra_priority = 100, .base.cra_blocksize = 1, .base.cra_ctxsize = sizeof(struct chacha_ctx), .base.cra_module = THIS_MODULE, .min_keysize = CHACHA_KEY_SIZE, .max_keysize = CHACHA_KEY_SIZE, .ivsize = XCHACHA_IV_SIZE, .chunksize = CHACHA_BLOCK_SIZE, .setkey = chacha12_setkey, .encrypt = crypto_xchacha_crypt, .decrypt = crypto_xchacha_crypt, } }; static int __init chacha_generic_mod_init(void) { return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); } static void __exit chacha_generic_mod_fini(void) { crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); } subsys_initcall(chacha_generic_mod_init); module_exit(chacha_generic_mod_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Martin Willi <martin@strongswan.org>"); MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (generic)"); MODULE_ALIAS_CRYPTO("chacha20"); MODULE_ALIAS_CRYPTO("chacha20-generic"); MODULE_ALIAS_CRYPTO("xchacha20"); MODULE_ALIAS_CRYPTO("xchacha20-generic"); MODULE_ALIAS_CRYPTO("xchacha12"); MODULE_ALIAS_CRYPTO("xchacha12-generic"); |
2 4 2 2 3 3 2 2 2 2 2 2 2 2 4 4 2 6 1 1 4 4 3 3 3 3 3 3 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 | // SPDX-License-Identifier: GPL-2.0 /* * NVMe over Fabrics common host code. * Copyright (c) 2015-2016 HGST, a Western Digital Company. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/init.h> #include <linux/miscdevice.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/parser.h> #include <linux/seq_file.h> #include "nvme.h" #include "fabrics.h" #include <linux/nvme-keyring.h> static LIST_HEAD(nvmf_transports); static DECLARE_RWSEM(nvmf_transports_rwsem); static LIST_HEAD(nvmf_hosts); static DEFINE_MUTEX(nvmf_hosts_mutex); static struct nvmf_host *nvmf_default_host; static struct nvmf_host *nvmf_host_alloc(const char *hostnqn, uuid_t *id) { struct nvmf_host *host; host = kmalloc(sizeof(*host), GFP_KERNEL); if (!host) return NULL; kref_init(&host->ref); uuid_copy(&host->id, id); strscpy(host->nqn, hostnqn, NVMF_NQN_SIZE); return host; } static struct nvmf_host *nvmf_host_add(const char *hostnqn, uuid_t *id) { struct nvmf_host *host; mutex_lock(&nvmf_hosts_mutex); /* * We have defined a host as how it is perceived by the target. * Therefore, we don't allow different Host NQNs with the same Host ID. * Similarly, we do not allow the usage of the same Host NQN with * different Host IDs. This'll maintain unambiguous host identification. */ list_for_each_entry(host, &nvmf_hosts, list) { bool same_hostnqn = !strcmp(host->nqn, hostnqn); bool same_hostid = uuid_equal(&host->id, id); if (same_hostnqn && same_hostid) { kref_get(&host->ref); goto out_unlock; } if (same_hostnqn) { pr_err("found same hostnqn %s but different hostid %pUb\n", hostnqn, id); host = ERR_PTR(-EINVAL); goto out_unlock; } if (same_hostid) { pr_err("found same hostid %pUb but different hostnqn %s\n", id, hostnqn); host = ERR_PTR(-EINVAL); goto out_unlock; } } host = nvmf_host_alloc(hostnqn, id); if (!host) { host = ERR_PTR(-ENOMEM); goto out_unlock; } list_add_tail(&host->list, &nvmf_hosts); out_unlock: mutex_unlock(&nvmf_hosts_mutex); return host; } static struct nvmf_host *nvmf_host_default(void) { struct nvmf_host *host; char nqn[NVMF_NQN_SIZE]; uuid_t id; uuid_gen(&id); snprintf(nqn, NVMF_NQN_SIZE, "nqn.2014-08.org.nvmexpress:uuid:%pUb", &id); host = nvmf_host_alloc(nqn, &id); if (!host) return NULL; mutex_lock(&nvmf_hosts_mutex); list_add_tail(&host->list, &nvmf_hosts); mutex_unlock(&nvmf_hosts_mutex); return host; } static void nvmf_host_destroy(struct kref *ref) { struct nvmf_host *host = container_of(ref, struct nvmf_host, ref); mutex_lock(&nvmf_hosts_mutex); list_del(&host->list); mutex_unlock(&nvmf_hosts_mutex); kfree(host); } static void nvmf_host_put(struct nvmf_host *host) { if (host) kref_put(&host->ref, nvmf_host_destroy); } /** * nvmf_get_address() - Get address/port * @ctrl: Host NVMe controller instance which we got the address * @buf: OUTPUT parameter that will contain the address/port * @size: buffer size */ int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size) { int len = 0; if (ctrl->opts->mask & NVMF_OPT_TRADDR) len += scnprintf(buf, size, "traddr=%s", ctrl->opts->traddr); if (ctrl->opts->mask & NVMF_OPT_TRSVCID) len += scnprintf(buf + len, size - len, "%strsvcid=%s", (len) ? "," : "", ctrl->opts->trsvcid); if (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR) len += scnprintf(buf + len, size - len, "%shost_traddr=%s", (len) ? "," : "", ctrl->opts->host_traddr); if (ctrl->opts->mask & NVMF_OPT_HOST_IFACE) len += scnprintf(buf + len, size - len, "%shost_iface=%s", (len) ? "," : "", ctrl->opts->host_iface); len += scnprintf(buf + len, size - len, "\n"); return len; } EXPORT_SYMBOL_GPL(nvmf_get_address); /** * nvmf_reg_read32() - NVMe Fabrics "Property Get" API function. * @ctrl: Host NVMe controller instance maintaining the admin * queue used to submit the property read command to * the allocated NVMe controller resource on the target system. * @off: Starting offset value of the targeted property * register (see the fabrics section of the NVMe standard). * @val: OUTPUT parameter that will contain the value of * the property after a successful read. * * Used by the host system to retrieve a 32-bit capsule property value * from an NVMe controller on the target system. * * ("Capsule property" is an "PCIe register concept" applied to the * NVMe fabrics space.) * * Return: * 0: successful read * > 0: NVMe error status code * < 0: Linux errno error code */ int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val) { struct nvme_command cmd = { }; union nvme_result res; int ret; cmd.prop_get.opcode = nvme_fabrics_command; cmd.prop_get.fctype = nvme_fabrics_type_property_get; cmd.prop_get.offset = cpu_to_le32(off); ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0, NVME_QID_ANY, NVME_SUBMIT_RESERVED); if (ret >= 0) *val = le64_to_cpu(res.u64); if (unlikely(ret != 0)) dev_err(ctrl->device, "Property Get error: %d, offset %#x\n", ret > 0 ? ret & ~NVME_STATUS_DNR : ret, off); return ret; } EXPORT_SYMBOL_GPL(nvmf_reg_read32); /** * nvmf_reg_read64() - NVMe Fabrics "Property Get" API function. * @ctrl: Host NVMe controller instance maintaining the admin * queue used to submit the property read command to * the allocated controller resource on the target system. * @off: Starting offset value of the targeted property * register (see the fabrics section of the NVMe standard). * @val: OUTPUT parameter that will contain the value of * the property after a successful read. * * Used by the host system to retrieve a 64-bit capsule property value * from an NVMe controller on the target system. * * ("Capsule property" is an "PCIe register concept" applied to the * NVMe fabrics space.) * * Return: * 0: successful read * > 0: NVMe error status code * < 0: Linux errno error code */ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) { struct nvme_command cmd = { }; union nvme_result res; int ret; cmd.prop_get.opcode = nvme_fabrics_command; cmd.prop_get.fctype = nvme_fabrics_type_property_get; cmd.prop_get.attrib = 1; cmd.prop_get.offset = cpu_to_le32(off); ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, NULL, 0, NVME_QID_ANY, NVME_SUBMIT_RESERVED); if (ret >= 0) *val = le64_to_cpu(res.u64); if (unlikely(ret != 0)) dev_err(ctrl->device, "Property Get error: %d, offset %#x\n", ret > 0 ? ret & ~NVME_STATUS_DNR : ret, off); return ret; } EXPORT_SYMBOL_GPL(nvmf_reg_read64); /** * nvmf_reg_write32() - NVMe Fabrics "Property Write" API function. * @ctrl: Host NVMe controller instance maintaining the admin * queue used to submit the property read command to * the allocated NVMe controller resource on the target system. * @off: Starting offset value of the targeted property * register (see the fabrics section of the NVMe standard). * @val: Input parameter that contains the value to be * written to the property. * * Used by the NVMe host system to write a 32-bit capsule property value * to an NVMe controller on the target system. * * ("Capsule property" is an "PCIe register concept" applied to the * NVMe fabrics space.) * * Return: * 0: successful write * > 0: NVMe error status code * < 0: Linux errno error code */ int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val) { struct nvme_command cmd = { }; int ret; cmd.prop_set.opcode = nvme_fabrics_command; cmd.prop_set.fctype = nvme_fabrics_type_property_set; cmd.prop_set.attrib = 0; cmd.prop_set.offset = cpu_to_le32(off); cmd.prop_set.value = cpu_to_le64(val); ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, NULL, NULL, 0, NVME_QID_ANY, NVME_SUBMIT_RESERVED); if (unlikely(ret)) dev_err(ctrl->device, "Property Set error: %d, offset %#x\n", ret > 0 ? ret & ~NVME_STATUS_DNR : ret, off); return ret; } EXPORT_SYMBOL_GPL(nvmf_reg_write32); int nvmf_subsystem_reset(struct nvme_ctrl *ctrl) { int ret; if (!nvme_wait_reset(ctrl)) return -EBUSY; ret = ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, NVME_SUBSYS_RESET); if (ret) return ret; return nvme_try_sched_reset(ctrl); } EXPORT_SYMBOL_GPL(nvmf_subsystem_reset); /** * nvmf_log_connect_error() - Error-parsing-diagnostic print out function for * connect() errors. * @ctrl: The specific /dev/nvmeX device that had the error. * @errval: Error code to be decoded in a more human-friendly * printout. * @offset: For use with the NVMe error code * NVME_SC_CONNECT_INVALID_PARAM. * @cmd: This is the SQE portion of a submission capsule. * @data: This is the "Data" portion of a submission capsule. */ static void nvmf_log_connect_error(struct nvme_ctrl *ctrl, int errval, int offset, struct nvme_command *cmd, struct nvmf_connect_data *data) { int err_sctype = errval & ~NVME_STATUS_DNR; if (errval < 0) { dev_err(ctrl->device, "Connect command failed, errno: %d\n", errval); return; } switch (err_sctype) { case NVME_SC_CONNECT_INVALID_PARAM: if (offset >> 16) { char *inv_data = "Connect Invalid Data Parameter"; switch (offset & 0xffff) { case (offsetof(struct nvmf_connect_data, cntlid)): dev_err(ctrl->device, "%s, cntlid: %d\n", inv_data, data->cntlid); break; case (offsetof(struct nvmf_connect_data, hostnqn)): dev_err(ctrl->device, "%s, hostnqn \"%s\"\n", inv_data, data->hostnqn); break; case (offsetof(struct nvmf_connect_data, subsysnqn)): dev_err(ctrl->device, "%s, subsysnqn \"%s\"\n", inv_data, data->subsysnqn); break; default: dev_err(ctrl->device, "%s, starting byte offset: %d\n", inv_data, offset & 0xffff); break; } } else { char *inv_sqe = "Connect Invalid SQE Parameter"; switch (offset) { case (offsetof(struct nvmf_connect_command, qid)): dev_err(ctrl->device, "%s, qid %d\n", inv_sqe, cmd->connect.qid); break; default: dev_err(ctrl->device, "%s, starting byte offset: %d\n", inv_sqe, offset); } } break; case NVME_SC_CONNECT_INVALID_HOST: dev_err(ctrl->device, "Connect for subsystem %s is not allowed, hostnqn: %s\n", data->subsysnqn, data->hostnqn); break; case NVME_SC_CONNECT_CTRL_BUSY: dev_err(ctrl->device, "Connect command failed: controller is busy or not available\n"); break; case NVME_SC_CONNECT_FORMAT: dev_err(ctrl->device, "Connect incompatible format: %d", cmd->connect.recfmt); break; case NVME_SC_HOST_PATH_ERROR: dev_err(ctrl->device, "Connect command failed: host path error\n"); break; case NVME_SC_AUTH_REQUIRED: dev_err(ctrl->device, "Connect command failed: authentication required\n"); break; default: dev_err(ctrl->device, "Connect command failed, error wo/DNR bit: %d\n", err_sctype); break; } } static struct nvmf_connect_data *nvmf_connect_data_prep(struct nvme_ctrl *ctrl, u16 cntlid) { struct nvmf_connect_data *data; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return NULL; uuid_copy(&data->hostid, &ctrl->opts->host->id); data->cntlid = cpu_to_le16(cntlid); strscpy(data->subsysnqn, ctrl->opts->subsysnqn, NVMF_NQN_SIZE); strscpy(data->hostnqn, ctrl->opts->host->nqn, NVMF_NQN_SIZE); return data; } static void nvmf_connect_cmd_prep(struct nvme_ctrl *ctrl, u16 qid, struct nvme_command *cmd) { cmd->connect.opcode = nvme_fabrics_command; cmd->connect.fctype = nvme_fabrics_type_connect; cmd->connect.qid = cpu_to_le16(qid); if (qid) { cmd->connect.sqsize = cpu_to_le16(ctrl->sqsize); } else { cmd->connect.sqsize = cpu_to_le16(NVME_AQ_DEPTH - 1); /* * set keep-alive timeout in seconds granularity (ms * 1000) */ cmd->connect.kato = cpu_to_le32(ctrl->kato * 1000); } if (ctrl->opts->disable_sqflow) cmd->connect.cattr |= NVME_CONNECT_DISABLE_SQFLOW; } /** * nvmf_connect_admin_queue() - NVMe Fabrics Admin Queue "Connect" * API function. * @ctrl: Host nvme controller instance used to request * a new NVMe controller allocation on the target * system and establish an NVMe Admin connection to * that controller. * * This function enables an NVMe host device to request a new allocation of * an NVMe controller resource on a target system as well establish a * fabrics-protocol connection of the NVMe Admin queue between the * host system device and the allocated NVMe controller on the * target system via a NVMe Fabrics "Connect" command. */ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) { struct nvme_command cmd = { }; union nvme_result res; struct nvmf_connect_data *data; int ret; u32 result; nvmf_connect_cmd_prep(ctrl, 0, &cmd); data = nvmf_connect_data_prep(ctrl, 0xffff); if (!data) return -ENOMEM; ret = __nvme_submit_sync_cmd(ctrl->fabrics_q, &cmd, &res, data, sizeof(*data), NVME_QID_ANY, NVME_SUBMIT_AT_HEAD | NVME_SUBMIT_NOWAIT | NVME_SUBMIT_RESERVED); if (ret) { nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32), &cmd, data); goto out_free_data; } result = le32_to_cpu(res.u32); ctrl->cntlid = result & 0xFFFF; if (result & (NVME_CONNECT_AUTHREQ_ATR | NVME_CONNECT_AUTHREQ_ASCR)) { /* Check for secure concatenation */ if ((result & NVME_CONNECT_AUTHREQ_ASCR) && !ctrl->opts->concat) { dev_warn(ctrl->device, "qid 0: secure concatenation is not supported\n"); ret = -EOPNOTSUPP; goto out_free_data; } /* Authentication required */ ret = nvme_auth_negotiate(ctrl, 0); if (ret) { dev_warn(ctrl->device, "qid 0: authentication setup failed\n"); goto out_free_data; } ret = nvme_auth_wait(ctrl, 0); if (ret) { dev_warn(ctrl->device, "qid 0: authentication failed, error %d\n", ret); } else dev_info(ctrl->device, "qid 0: authenticated\n"); } out_free_data: kfree(data); return ret; } EXPORT_SYMBOL_GPL(nvmf_connect_admin_queue); /** * nvmf_connect_io_queue() - NVMe Fabrics I/O Queue "Connect" * API function. * @ctrl: Host nvme controller instance used to establish an * NVMe I/O queue connection to the already allocated NVMe * controller on the target system. * @qid: NVMe I/O queue number for the new I/O connection between * host and target (note qid == 0 is illegal as this is * the Admin queue, per NVMe standard). * * This function issues a fabrics-protocol connection * of a NVMe I/O queue (via NVMe Fabrics "Connect" command) * between the host system device and the allocated NVMe controller * on the target system. * * Return: * 0: success * > 0: NVMe error status code * < 0: Linux errno error code */ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) { struct nvme_command cmd = { }; struct nvmf_connect_data *data; union nvme_result res; int ret; u32 result; nvmf_connect_cmd_prep(ctrl, qid, &cmd); data = nvmf_connect_data_prep(ctrl, ctrl->cntlid); if (!data) return -ENOMEM; ret = __nvme_submit_sync_cmd(ctrl->connect_q, &cmd, &res, data, sizeof(*data), qid, NVME_SUBMIT_AT_HEAD | NVME_SUBMIT_RESERVED | NVME_SUBMIT_NOWAIT); if (ret) { nvmf_log_connect_error(ctrl, ret, le32_to_cpu(res.u32), &cmd, data); goto out_free_data; } result = le32_to_cpu(res.u32); if (result & (NVME_CONNECT_AUTHREQ_ATR | NVME_CONNECT_AUTHREQ_ASCR)) { /* Secure concatenation is not implemented */ if (result & NVME_CONNECT_AUTHREQ_ASCR) { dev_warn(ctrl->device, "qid %d: secure concatenation is not supported\n", qid); ret = -EOPNOTSUPP; goto out_free_data; } /* Authentication required */ ret = nvme_auth_negotiate(ctrl, qid); if (ret) { dev_warn(ctrl->device, "qid %d: authentication setup failed\n", qid); goto out_free_data; } ret = nvme_auth_wait(ctrl, qid); if (ret) { dev_warn(ctrl->device, "qid %u: authentication failed, error %d\n", qid, ret); } } out_free_data: kfree(data); return ret; } EXPORT_SYMBOL_GPL(nvmf_connect_io_queue); /* * Evaluate the status information returned by the transport in order to decided * if a reconnect attempt should be scheduled. * * Do not retry when: * * - the DNR bit is set and the specification states no further connect * attempts with the same set of paramenters should be attempted. * * - when the authentication attempt fails, because the key was invalid. * This error code is set on the host side. */ bool nvmf_should_reconnect(struct nvme_ctrl *ctrl, int status) { if (status > 0 && (status & NVME_STATUS_DNR)) return false; if (status == -EKEYREJECTED) return false; if (ctrl->opts->max_reconnects == -1 || ctrl->nr_reconnects < ctrl->opts->max_reconnects) return true; return false; } EXPORT_SYMBOL_GPL(nvmf_should_reconnect); /** * nvmf_register_transport() - NVMe Fabrics Library registration function. * @ops: Transport ops instance to be registered to the * common fabrics library. * * API function that registers the type of specific transport fabric * being implemented to the common NVMe fabrics library. Part of * the overall init sequence of starting up a fabrics driver. */ int nvmf_register_transport(struct nvmf_transport_ops *ops) { if (!ops->create_ctrl) return -EINVAL; down_write(&nvmf_transports_rwsem); list_add_tail(&ops->entry, &nvmf_transports); up_write(&nvmf_transports_rwsem); return 0; } EXPORT_SYMBOL_GPL(nvmf_register_transport); /** * nvmf_unregister_transport() - NVMe Fabrics Library unregistration function. * @ops: Transport ops instance to be unregistered from the * common fabrics library. * * Fabrics API function that unregisters the type of specific transport * fabric being implemented from the common NVMe fabrics library. * Part of the overall exit sequence of unloading the implemented driver. */ void nvmf_unregister_transport(struct nvmf_transport_ops *ops) { down_write(&nvmf_transports_rwsem); list_del(&ops->entry); up_write(&nvmf_transports_rwsem); } EXPORT_SYMBOL_GPL(nvmf_unregister_transport); static struct nvmf_transport_ops *nvmf_lookup_transport( struct nvmf_ctrl_options *opts) { struct nvmf_transport_ops *ops; lockdep_assert_held(&nvmf_transports_rwsem); list_for_each_entry(ops, &nvmf_transports, entry) { if (strcmp(ops->name, opts->transport) == 0) return ops; } return NULL; } static struct key *nvmf_parse_key(int key_id) { struct key *key; if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) { pr_err("TLS is not supported\n"); return ERR_PTR(-EINVAL); } key = nvme_tls_key_lookup(key_id); if (IS_ERR(key)) pr_err("key id %08x not found\n", key_id); else pr_debug("Using key id %08x\n", key_id); return key; } static const match_table_t opt_tokens = { { NVMF_OPT_TRANSPORT, "transport=%s" }, { NVMF_OPT_TRADDR, "traddr=%s" }, { NVMF_OPT_TRSVCID, "trsvcid=%s" }, { NVMF_OPT_NQN, "nqn=%s" }, { NVMF_OPT_QUEUE_SIZE, "queue_size=%d" }, { NVMF_OPT_NR_IO_QUEUES, "nr_io_queues=%d" }, { NVMF_OPT_RECONNECT_DELAY, "reconnect_delay=%d" }, { NVMF_OPT_CTRL_LOSS_TMO, "ctrl_loss_tmo=%d" }, { NVMF_OPT_KATO, "keep_alive_tmo=%d" }, { NVMF_OPT_HOSTNQN, "hostnqn=%s" }, { NVMF_OPT_HOST_TRADDR, "host_traddr=%s" }, { NVMF_OPT_HOST_IFACE, "host_iface=%s" }, { NVMF_OPT_HOST_ID, "hostid=%s" }, { NVMF_OPT_DUP_CONNECT, "duplicate_connect" }, { NVMF_OPT_DISABLE_SQFLOW, "disable_sqflow" }, { NVMF_OPT_HDR_DIGEST, "hdr_digest" }, { NVMF_OPT_DATA_DIGEST, "data_digest" }, { NVMF_OPT_NR_WRITE_QUEUES, "nr_write_queues=%d" }, { NVMF_OPT_NR_POLL_QUEUES, "nr_poll_queues=%d" }, { NVMF_OPT_TOS, "tos=%d" }, #ifdef CONFIG_NVME_TCP_TLS { NVMF_OPT_KEYRING, "keyring=%d" }, { NVMF_OPT_TLS_KEY, "tls_key=%d" }, #endif { NVMF_OPT_FAIL_FAST_TMO, "fast_io_fail_tmo=%d" }, { NVMF_OPT_DISCOVERY, "discovery" }, #ifdef CONFIG_NVME_HOST_AUTH { NVMF_OPT_DHCHAP_SECRET, "dhchap_secret=%s" }, { NVMF_OPT_DHCHAP_CTRL_SECRET, "dhchap_ctrl_secret=%s" }, #endif #ifdef CONFIG_NVME_TCP_TLS { NVMF_OPT_TLS, "tls" }, { NVMF_OPT_CONCAT, "concat" }, #endif { NVMF_OPT_ERR, NULL } }; static int nvmf_parse_options(struct nvmf_ctrl_options *opts, const char *buf) { substring_t args[MAX_OPT_ARGS]; char *options, *o, *p; int token, ret = 0; size_t nqnlen = 0; int ctrl_loss_tmo = NVMF_DEF_CTRL_LOSS_TMO, key_id; uuid_t hostid; char hostnqn[NVMF_NQN_SIZE]; struct key *key; /* Set defaults */ opts->queue_size = NVMF_DEF_QUEUE_SIZE; opts->nr_io_queues = num_online_cpus(); opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY; opts->kato = 0; opts->duplicate_connect = false; opts->fast_io_fail_tmo = NVMF_DEF_FAIL_FAST_TMO; opts->hdr_digest = false; opts->data_digest = false; opts->tos = -1; /* < 0 == use transport default */ opts->tls = false; opts->tls_key = NULL; opts->keyring = NULL; opts->concat = false; options = o = kstrdup(buf, GFP_KERNEL); if (!options) return -ENOMEM; /* use default host if not given by user space */ uuid_copy(&hostid, &nvmf_default_host->id); strscpy(hostnqn, nvmf_default_host->nqn, NVMF_NQN_SIZE); while ((p = strsep(&o, ",\n")) != NULL) { if (!*p) continue; token = match_token(p, opt_tokens, args); opts->mask |= token; switch (token) { case NVMF_OPT_TRANSPORT: p = match_strdup(args); if (!p) { ret = -ENOMEM; goto out; } kfree(opts->transport); opts->transport = p; break; case NVMF_OPT_NQN: p = match_strdup(args); if (!p) { ret = -ENOMEM; goto out; } kfree(opts->subsysnqn); opts->subsysnqn = p; nqnlen = strlen(opts->subsysnqn); if (nqnlen >= NVMF_NQN_SIZE) { pr_err("%s needs to be < %d bytes\n", opts->subsysnqn, NVMF_NQN_SIZE); ret = -EINVAL; goto out; } opts->discovery_nqn = !(strcmp(opts->subsysnqn, NVME_DISC_SUBSYS_NAME)); break; case NVMF_OPT_TRADDR: p = match_strdup(args); if (!p) { ret = -ENOMEM; goto out; } kfree(opts->traddr); opts->traddr = p; break; case NVMF_OPT_TRSVCID: p = match_strdup(args); if (!p) { ret = -ENOMEM; goto out; } kfree(opts->trsvcid); opts->trsvcid = p; break; case NVMF_OPT_QUEUE_SIZE: if (match_int(args, &token)) { ret = -EINVAL; goto out; } if (token < NVMF_MIN_QUEUE_SIZE || token > NVMF_MAX_QUEUE_SIZE) { pr_err("Invalid queue_size %d\n", token); ret = -EINVAL; goto out; } opts->queue_size = token; break; case NVMF_OPT_NR_IO_QUEUES: if (match_int(args, &token)) { ret = -EINVAL; goto out; } if (token <= 0) { pr_err("Invalid number of IOQs %d\n", token); ret = -EINVAL; goto out; } if (opts->discovery_nqn) { pr_debug("Ignoring nr_io_queues value for discovery controller\n"); break; } opts->nr_io_queues = min_t(unsigned int, num_online_cpus(), token); break; case NVMF_OPT_KATO: if (match_int(args, &token)) { ret = -EINVAL; goto out; } if (token < 0) { pr_err("Invalid keep_alive_tmo %d\n", token); ret = -EINVAL; goto out; } else if (token == 0 && !opts->discovery_nqn) { /* Allowed for debug */ pr_warn("keep_alive_tmo 0 won't execute keep alives!!!\n"); } opts->kato = token; break; case NVMF_OPT_CTRL_LOSS_TMO: if (match_int(args, &token)) { ret = -EINVAL; goto out; } if (token < 0) pr_warn("ctrl_loss_tmo < 0 will reconnect forever\n"); ctrl_loss_tmo = token; break; case NVMF_OPT_FAIL_FAST_TMO: if (match_int(args, &token)) { ret = -EINVAL; goto out; } if (token >= 0) pr_warn("I/O fail on reconnect controller after %d sec\n", token); else token = -1; opts->fast_io_fail_tmo = token; break; case NVMF_OPT_HOSTNQN: if (opts->host) { pr_err("hostnqn already user-assigned: %s\n", opts->host->nqn); ret = -EADDRINUSE; goto out; } p = match_strdup(args); if (!p) { ret = -ENOMEM; goto out; } nqnlen = strlen(p); if (nqnlen >= NVMF_NQN_SIZE) { pr_err("%s needs to be < %d bytes\n", p, NVMF_NQN_SIZE); kfree(p); ret = -EINVAL; goto out; } strscpy(hostnqn, p, NVMF_NQN_SIZE); kfree(p); break; case NVMF_OPT_RECONNECT_DELAY: if (match_int(args, &token)) { ret = -EINVAL; goto out; } if (token <= 0) { pr_err("Invalid reconnect_delay %d\n", token); ret = -EINVAL; goto out; } opts->reconnect_delay = token; break; case NVMF_OPT_HOST_TRADDR: p = match_strdup(args); if (!p) { ret = -ENOMEM; goto out; } kfree(opts->host_traddr); opts->host_traddr = p; break; case NVMF_OPT_HOST_IFACE: p = match_strdup(args); if (!p) { ret = -ENOMEM; goto out; } kfree(opts->host_iface); opts->host_iface = p; break; case NVMF_OPT_HOST_ID: p = match_strdup(args); if (!p) { ret = -ENOMEM; goto out; } ret = uuid_parse(p, &hostid); if (ret) { pr_err("Invalid hostid %s\n", p); ret = -EINVAL; kfree(p); goto out; } kfree(p); break; case NVMF_OPT_DUP_CONNECT: opts->duplicate_connect = true; break; case NVMF_OPT_DISABLE_SQFLOW: opts->disable_sqflow = true; break; case NVMF_OPT_HDR_DIGEST: opts->hdr_digest = true; break; case NVMF_OPT_DATA_DIGEST: opts->data_digest = true; break; case NVMF_OPT_NR_WRITE_QUEUES: if (match_int(args, &token)) { ret = -EINVAL; goto out; } if (token <= 0) { pr_err("Invalid nr_write_queues %d\n", token); ret = -EINVAL; goto out; } opts->nr_write_queues = token; break; case NVMF_OPT_NR_POLL_QUEUES: if (match_int(args, &token)) { ret = -EINVAL; goto out; } if (token <= 0) { pr_err("Invalid nr_poll_queues %d\n", token); ret = -EINVAL; goto out; } opts->nr_poll_queues = token; break; case NVMF_OPT_TOS: if (match_int(args, &token)) { ret = -EINVAL; goto out; } if (token < 0) { pr_err("Invalid type of service %d\n", token); ret = -EINVAL; goto out; } if (token > 255) { pr_warn("Clamping type of service to 255\n"); token = 255; } opts->tos = token; break; case NVMF_OPT_KEYRING: if (match_int(args, &key_id) || key_id <= 0) { ret = -EINVAL; goto out; } key = nvmf_parse_key(key_id); if (IS_ERR(key)) { ret = PTR_ERR(key); goto out; } key_put(opts->keyring); opts->keyring = key; break; case NVMF_OPT_TLS_KEY: if (match_int(args, &key_id) || key_id <= 0) { ret = -EINVAL; goto out; } key = nvmf_parse_key(key_id); if (IS_ERR(key)) { ret = PTR_ERR(key); goto out; } key_put(opts->tls_key); opts->tls_key = key; break; case NVMF_OPT_DISCOVERY: opts->discovery_nqn = true; break; case NVMF_OPT_DHCHAP_SECRET: p = match_strdup(args); if (!p) { ret = -ENOMEM; goto out; } if (strlen(p) < 11 || strncmp(p, "DHHC-1:", 7)) { pr_err("Invalid DH-CHAP secret %s\n", p); ret = -EINVAL; goto out; } kfree(opts->dhchap_secret); opts->dhchap_secret = p; break; case NVMF_OPT_DHCHAP_CTRL_SECRET: p = match_strdup(args); if (!p) { ret = -ENOMEM; goto out; } if (strlen(p) < 11 || strncmp(p, "DHHC-1:", 7)) { pr_err("Invalid DH-CHAP secret %s\n", p); ret = -EINVAL; goto out; } kfree(opts->dhchap_ctrl_secret); opts->dhchap_ctrl_secret = p; break; case NVMF_OPT_TLS: if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) { pr_err("TLS is not supported\n"); ret = -EINVAL; goto out; } opts->tls = true; break; case NVMF_OPT_CONCAT: if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) { pr_err("TLS is not supported\n"); ret = -EINVAL; goto out; } opts->concat = true; break; default: pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n", p); ret = -EINVAL; goto out; } } if (opts->discovery_nqn) { opts->nr_io_queues = 0; opts->nr_write_queues = 0; opts->nr_poll_queues = 0; opts->duplicate_connect = true; } else { if (!opts->kato) opts->kato = NVME_DEFAULT_KATO; } if (ctrl_loss_tmo < 0) { opts->max_reconnects = -1; } else { opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo, opts->reconnect_delay); if (ctrl_loss_tmo < opts->fast_io_fail_tmo) pr_warn("failfast tmo (%d) larger than controller loss tmo (%d)\n", opts->fast_io_fail_tmo, ctrl_loss_tmo); } if (opts->concat) { if (opts->tls) { pr_err("Secure concatenation over TLS is not supported\n"); ret = -EINVAL; goto out; } if (opts->tls_key) { pr_err("Cannot specify a TLS key for secure concatenation\n"); ret = -EINVAL; goto out; } if (!opts->dhchap_secret) { pr_err("Need to enable DH-CHAP for secure concatenation\n"); ret = -EINVAL; goto out; } } opts->host = nvmf_host_add(hostnqn, &hostid); if (IS_ERR(opts->host)) { ret = PTR_ERR(opts->host); opts->host = NULL; goto out; } out: kfree(options); return ret; } void nvmf_set_io_queues(struct nvmf_ctrl_options *opts, u32 nr_io_queues, u32 io_queues[HCTX_MAX_TYPES]) { if (opts->nr_write_queues && opts->nr_io_queues < nr_io_queues) { /* * separate read/write queues * hand out dedicated default queues only after we have * sufficient read queues. */ io_queues[HCTX_TYPE_READ] = opts->nr_io_queues; nr_io_queues -= io_queues[HCTX_TYPE_READ]; io_queues[HCTX_TYPE_DEFAULT] = min(opts->nr_write_queues, nr_io_queues); nr_io_queues -= io_queues[HCTX_TYPE_DEFAULT]; } else { /* * shared read/write queues * either no write queues were requested, or we don't have * sufficient queue count to have dedicated default queues. */ io_queues[HCTX_TYPE_DEFAULT] = min(opts->nr_io_queues, nr_io_queues); nr_io_queues -= io_queues[HCTX_TYPE_DEFAULT]; } if (opts->nr_poll_queues && nr_io_queues) { /* map dedicated poll queues only if we have queues left */ io_queues[HCTX_TYPE_POLL] = min(opts->nr_poll_queues, nr_io_queues); } } EXPORT_SYMBOL_GPL(nvmf_set_io_queues); void nvmf_map_queues(struct blk_mq_tag_set *set, struct nvme_ctrl *ctrl, u32 io_queues[HCTX_MAX_TYPES]) { struct nvmf_ctrl_options *opts = ctrl->opts; if (opts->nr_write_queues && io_queues[HCTX_TYPE_READ]) { /* separate read/write queues */ set->map[HCTX_TYPE_DEFAULT].nr_queues = io_queues[HCTX_TYPE_DEFAULT]; set->map[HCTX_TYPE_DEFAULT].queue_offset = 0; set->map[HCTX_TYPE_READ].nr_queues = io_queues[HCTX_TYPE_READ]; set->map[HCTX_TYPE_READ].queue_offset = io_queues[HCTX_TYPE_DEFAULT]; } else { /* shared read/write queues */ set->map[HCTX_TYPE_DEFAULT].nr_queues = io_queues[HCTX_TYPE_DEFAULT]; set->map[HCTX_TYPE_DEFAULT].queue_offset = 0; set->map[HCTX_TYPE_READ].nr_queues = io_queues[HCTX_TYPE_DEFAULT]; set->map[HCTX_TYPE_READ].queue_offset = 0; } blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]); blk_mq_map_queues(&set->map[HCTX_TYPE_READ]); if (opts->nr_poll_queues && io_queues[HCTX_TYPE_POLL]) { /* map dedicated poll queues only if we have queues left */ set->map[HCTX_TYPE_POLL].nr_queues = io_queues[HCTX_TYPE_POLL]; set->map[HCTX_TYPE_POLL].queue_offset = io_queues[HCTX_TYPE_DEFAULT] + io_queues[HCTX_TYPE_READ]; blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]); } dev_info(ctrl->device, "mapped %d/%d/%d default/read/poll queues.\n", io_queues[HCTX_TYPE_DEFAULT], io_queues[HCTX_TYPE_READ], io_queues[HCTX_TYPE_POLL]); } EXPORT_SYMBOL_GPL(nvmf_map_queues); static int nvmf_check_required_opts(struct nvmf_ctrl_options *opts, unsigned int required_opts) { if ((opts->mask & required_opts) != required_opts) { unsigned int i; for (i = 0; i < ARRAY_SIZE(opt_tokens); i++) { if ((opt_tokens[i].token & required_opts) && !(opt_tokens[i].token & opts->mask)) { pr_warn("missing parameter '%s'\n", opt_tokens[i].pattern); } } return -EINVAL; } return 0; } bool nvmf_ip_options_match(struct nvme_ctrl *ctrl, struct nvmf_ctrl_options *opts) { if (!nvmf_ctlr_matches_baseopts(ctrl, opts) || strcmp(opts->traddr, ctrl->opts->traddr) || strcmp(opts->trsvcid, ctrl->opts->trsvcid)) return false; /* * Checking the local address or host interfaces is rough. * * In most cases, none is specified and the host port or * host interface is selected by the stack. * * Assume no match if: * - local address or host interface is specified and address * or host interface is not the same * - local address or host interface is not specified but * remote is, or vice versa (admin using specific * host_traddr/host_iface when it matters). */ if ((opts->mask & NVMF_OPT_HOST_TRADDR) && (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)) { if (strcmp(opts->host_traddr, ctrl->opts->host_traddr)) return false; } else if ((opts->mask & NVMF_OPT_HOST_TRADDR) || (ctrl->opts->mask & NVMF_OPT_HOST_TRADDR)) { return false; } if ((opts->mask & NVMF_OPT_HOST_IFACE) && (ctrl->opts->mask & NVMF_OPT_HOST_IFACE)) { if (strcmp(opts->host_iface, ctrl->opts->host_iface)) return false; } else if ((opts->mask & NVMF_OPT_HOST_IFACE) || (ctrl->opts->mask & NVMF_OPT_HOST_IFACE)) { return false; } return true; } EXPORT_SYMBOL_GPL(nvmf_ip_options_match); static int nvmf_check_allowed_opts(struct nvmf_ctrl_options *opts, unsigned int allowed_opts) { if (opts->mask & ~allowed_opts) { unsigned int i; for (i = 0; i < ARRAY_SIZE(opt_tokens); i++) { if ((opt_tokens[i].token & opts->mask) && (opt_tokens[i].token & ~allowed_opts)) { pr_warn("invalid parameter '%s'\n", opt_tokens[i].pattern); } } return -EINVAL; } return 0; } void nvmf_free_options(struct nvmf_ctrl_options *opts) { nvmf_host_put(opts->host); key_put(opts->keyring); key_put(opts->tls_key); kfree(opts->transport); kfree(opts->traddr); kfree(opts->trsvcid); kfree(opts->subsysnqn); kfree(opts->host_traddr); kfree(opts->host_iface); kfree(opts->dhchap_secret); kfree(opts->dhchap_ctrl_secret); kfree(opts); } EXPORT_SYMBOL_GPL(nvmf_free_options); #define NVMF_REQUIRED_OPTS (NVMF_OPT_TRANSPORT | NVMF_OPT_NQN) #define NVMF_ALLOWED_OPTS (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \ NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \ NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\ NVMF_OPT_DISABLE_SQFLOW | NVMF_OPT_DISCOVERY |\ NVMF_OPT_FAIL_FAST_TMO | NVMF_OPT_DHCHAP_SECRET |\ NVMF_OPT_DHCHAP_CTRL_SECRET) static struct nvme_ctrl * nvmf_create_ctrl(struct device *dev, const char *buf) { struct nvmf_ctrl_options *opts; struct nvmf_transport_ops *ops; struct nvme_ctrl *ctrl; int ret; opts = kzalloc(sizeof(*opts), GFP_KERNEL); if (!opts) return ERR_PTR(-ENOMEM); ret = nvmf_parse_options(opts, buf); if (ret) goto out_free_opts; request_module("nvme-%s", opts->transport); /* * Check the generic options first as we need a valid transport for * the lookup below. Then clear the generic flags so that transport * drivers don't have to care about them. */ ret = nvmf_check_required_opts(opts, NVMF_REQUIRED_OPTS); if (ret) goto out_free_opts; opts->mask &= ~NVMF_REQUIRED_OPTS; down_read(&nvmf_transports_rwsem); ops = nvmf_lookup_transport(opts); if (!ops) { pr_info("no handler found for transport %s.\n", opts->transport); ret = -EINVAL; goto out_unlock; } if (!try_module_get(ops->module)) { ret = -EBUSY; goto out_unlock; } up_read(&nvmf_transports_rwsem); ret = nvmf_check_required_opts(opts, ops->required_opts); if (ret) goto out_module_put; ret = nvmf_check_allowed_opts(opts, NVMF_ALLOWED_OPTS | ops->allowed_opts | ops->required_opts); if (ret) goto out_module_put; ctrl = ops->create_ctrl(dev, opts); if (IS_ERR(ctrl)) { ret = PTR_ERR(ctrl); goto out_module_put; } module_put(ops->module); return ctrl; out_module_put: module_put(ops->module); goto out_free_opts; out_unlock: up_read(&nvmf_transports_rwsem); out_free_opts: nvmf_free_options(opts); return ERR_PTR(ret); } static const struct class nvmf_class = { .name = "nvme-fabrics", }; static struct device *nvmf_device; static DEFINE_MUTEX(nvmf_dev_mutex); static ssize_t nvmf_dev_write(struct file *file, const char __user *ubuf, size_t count, loff_t *pos) { struct seq_file *seq_file = file->private_data; struct nvme_ctrl *ctrl; const char *buf; int ret = 0; if (count > PAGE_SIZE) return -ENOMEM; buf = memdup_user_nul(ubuf, count); if (IS_ERR(buf)) return PTR_ERR(buf); mutex_lock(&nvmf_dev_mutex); if (seq_file->private) { ret = -EINVAL; goto out_unlock; } ctrl = nvmf_create_ctrl(nvmf_device, buf); if (IS_ERR(ctrl)) { ret = PTR_ERR(ctrl); goto out_unlock; } seq_file->private = ctrl; out_unlock: mutex_unlock(&nvmf_dev_mutex); kfree(buf); return ret ? ret : count; } static void __nvmf_concat_opt_tokens(struct seq_file *seq_file) { const struct match_token *tok; int idx; /* * Add dummy entries for instance and cntlid to * signal an invalid/non-existing controller */ seq_puts(seq_file, "instance=-1,cntlid=-1"); for (idx = 0; idx < ARRAY_SIZE(opt_tokens); idx++) { tok = &opt_tokens[idx]; if (tok->token == NVMF_OPT_ERR) continue; seq_putc(seq_file, ','); seq_puts(seq_file, tok->pattern); } seq_putc(seq_file, '\n'); } static int nvmf_dev_show(struct seq_file *seq_file, void *private) { struct nvme_ctrl *ctrl; mutex_lock(&nvmf_dev_mutex); ctrl = seq_file->private; if (!ctrl) { __nvmf_concat_opt_tokens(seq_file); goto out_unlock; } seq_printf(seq_file, "instance=%d,cntlid=%d\n", ctrl->instance, ctrl->cntlid); out_unlock: mutex_unlock(&nvmf_dev_mutex); return 0; } static int nvmf_dev_open(struct inode *inode, struct file *file) { /* * The miscdevice code initializes file->private_data, but doesn't * make use of it later. */ file->private_data = NULL; return single_open(file, nvmf_dev_show, NULL); } static int nvmf_dev_release(struct inode *inode, struct file *file) { struct seq_file *seq_file = file->private_data; struct nvme_ctrl *ctrl = seq_file->private; if (ctrl) nvme_put_ctrl(ctrl); return single_release(inode, file); } static const struct file_operations nvmf_dev_fops = { .owner = THIS_MODULE, .write = nvmf_dev_write, .read = seq_read, .open = nvmf_dev_open, .release = nvmf_dev_release, }; static struct miscdevice nvmf_misc = { .minor = MISC_DYNAMIC_MINOR, .name = "nvme-fabrics", .fops = &nvmf_dev_fops, }; static int __init nvmf_init(void) { int ret; nvmf_default_host = nvmf_host_default(); if (!nvmf_default_host) return -ENOMEM; ret = class_register(&nvmf_class); if (ret) { pr_err("couldn't register class nvme-fabrics\n"); goto out_free_host; } nvmf_device = device_create(&nvmf_class, NULL, MKDEV(0, 0), NULL, "ctl"); if (IS_ERR(nvmf_device)) { pr_err("couldn't create nvme-fabrics device!\n"); ret = PTR_ERR(nvmf_device); goto out_destroy_class; } ret = misc_register(&nvmf_misc); if (ret) { pr_err("couldn't register misc device: %d\n", ret); goto out_destroy_device; } return 0; out_destroy_device: device_destroy(&nvmf_class, MKDEV(0, 0)); out_destroy_class: class_unregister(&nvmf_class); out_free_host: nvmf_host_put(nvmf_default_host); return ret; } static void __exit nvmf_exit(void) { misc_deregister(&nvmf_misc); device_destroy(&nvmf_class, MKDEV(0, 0)); class_unregister(&nvmf_class); nvmf_host_put(nvmf_default_host); BUILD_BUG_ON(sizeof(struct nvmf_common_command) != 64); BUILD_BUG_ON(sizeof(struct nvmf_connect_command) != 64); BUILD_BUG_ON(sizeof(struct nvmf_property_get_command) != 64); BUILD_BUG_ON(sizeof(struct nvmf_property_set_command) != 64); BUILD_BUG_ON(sizeof(struct nvmf_auth_send_command) != 64); BUILD_BUG_ON(sizeof(struct nvmf_auth_receive_command) != 64); BUILD_BUG_ON(sizeof(struct nvmf_connect_data) != 1024); BUILD_BUG_ON(sizeof(struct nvmf_auth_dhchap_negotiate_data) != 8); BUILD_BUG_ON(sizeof(struct nvmf_auth_dhchap_challenge_data) != 16); BUILD_BUG_ON(sizeof(struct nvmf_auth_dhchap_reply_data) != 16); BUILD_BUG_ON(sizeof(struct nvmf_auth_dhchap_success1_data) != 16); BUILD_BUG_ON(sizeof(struct nvmf_auth_dhchap_success2_data) != 16); } MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("NVMe host fabrics library"); module_init(nvmf_init); module_exit(nvmf_exit); |
3 1 2 3 3 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 | // SPDX-License-Identifier: GPL-2.0-only /* * (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> * * Based on Rusty Russell's IPv4 REDIRECT target. Development of IPv6 * NAT funded by Astaro. */ #include <linux/if.h> #include <linux/inetdevice.h> #include <linux/ip.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/netfilter.h> #include <linux/types.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter/x_tables.h> #include <net/addrconf.h> #include <net/checksum.h> #include <net/protocol.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_redirect.h> static unsigned int redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par) { return nf_nat_redirect_ipv6(skb, par->targinfo, xt_hooknum(par)); } static int redirect_tg6_checkentry(const struct xt_tgchk_param *par) { const struct nf_nat_range2 *range = par->targinfo; if (range->flags & NF_NAT_RANGE_MAP_IPS) return -EINVAL; return nf_ct_netns_get(par->net, par->family); } static void redirect_tg_destroy(const struct xt_tgdtor_param *par) { nf_ct_netns_put(par->net, par->family); } static int redirect_tg4_check(const struct xt_tgchk_param *par) { const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) { pr_debug("bad MAP_IPS.\n"); return -EINVAL; } if (mr->rangesize != 1) { pr_debug("bad rangesize %u.\n", mr->rangesize); return -EINVAL; } return nf_ct_netns_get(par->net, par->family); } static unsigned int redirect_tg4(struct sk_buff *skb, const struct xt_action_param *par) { const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; struct nf_nat_range2 range = { .flags = mr->range[0].flags, .min_proto = mr->range[0].min, .max_proto = mr->range[0].max, }; return nf_nat_redirect_ipv4(skb, &range, xt_hooknum(par)); } static struct xt_target redirect_tg_reg[] __read_mostly = { { .name = "REDIRECT", .family = NFPROTO_IPV6, .revision = 0, .table = "nat", .checkentry = redirect_tg6_checkentry, .destroy = redirect_tg_destroy, .target = redirect_tg6, .targetsize = sizeof(struct nf_nat_range), .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), .me = THIS_MODULE, }, { .name = "REDIRECT", .family = NFPROTO_IPV4, .revision = 0, .table = "nat", .target = redirect_tg4, .checkentry = redirect_tg4_check, .destroy = redirect_tg_destroy, .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), .me = THIS_MODULE, }, }; static int __init redirect_tg_init(void) { return xt_register_targets(redirect_tg_reg, ARRAY_SIZE(redirect_tg_reg)); } static void __exit redirect_tg_exit(void) { xt_unregister_targets(redirect_tg_reg, ARRAY_SIZE(redirect_tg_reg)); } module_init(redirect_tg_init); module_exit(redirect_tg_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_DESCRIPTION("Xtables: Connection redirection to localhost"); MODULE_ALIAS("ip6t_REDIRECT"); MODULE_ALIAS("ipt_REDIRECT"); |
4061 4068 3810 243 66 11 3839 25 5 3850 90 27 5 5 17 326 324 2 1 325 4115 202 3936 25 3917 4 1 1 1 3754 1634 2720 8 9 5466 5481 65 90 240 30 241 236 5330 981 2377 5439 2127 5475 5479 36 2092 5477 281 5471 1409 3114 3233 1681 3927 3934 3944 13 1728 37 3903 3883 21 21 310 14 3901 39 3878 3874 3892 3901 3872 3312 734 91 5 5 3853 14 3844 3373 3821 5 3812 3815 12 72 2 3 67 3315 3538 823 33 2853 2880 3538 7 3048 679 2134 2078 2083 2079 129 2030 2 2020 3 17 135 90 1 89 18 413 597 326 282 33 1 32 303 3 1 73 71 68 87 1 88 2 86 3 3 3 3 2 1 3 241 212 244 33 242 31 242 32 241 242 66 10 10 66 66 2768 2 2485 2482 910 3224 2 1 171 173 177 1 5 2291 170 39 48 525 526 3 383 44 43 12 480 3454 760 3188 291 5 61 340 406 324 21 2353 2 5 2 2351 2320 44 480 3829 1 1 4 25 633 1 21 631 633 17 24 423 282 3 136 484 5 276 303 148 305 88 460 2 153 597 92 3330 3342 178 3203 329 3 3113 720 176 605 135 181 90 87 173 173 3 8 108 100 1 2 101 4 5 2005 1893 152 112 181 109 256 250 2 7 1618 226 18 1799 7 3079 474 1076 413 15843 15217 14891 262 262 186 3934 326 1925 108 3830 222 1286 3526 261 1822 3720 3792 31 3781 397 3547 100 418 3542 6 5 3929 15 3881 111 3933 3937 49 42 9 1889 1 1853 1857 1861 56 27 251 8 255 216 36 2066 1 2 18 3 1287 3 59 55 7 1337 1325 12 1 231 1289 1145 45 1102 89 1391 117 1282 1272 2 1272 175 1131 1280 1 23 1 540 603 47 561 558 23 1 24 23 539 560 252 13 1926 1929 1922 1918 1918 1924 6 1915 233 233 3 230 233 233 66 66 66 734 6 163 2 161 3 157 2 84 84 156 313 311 309 37 20 15 3 21 1 115 108 37 445 38 1 38 7 31 20 6 15 16 16 2302 1457 32 155 1410 44 752 56 2266 2264 1 2 52 11 12 203 27 16 6 7 7 15 652 656 8 4 7 430 234 548 170 430 234 8 226 27 26 1 3 619 569 52 430 149 4 145 570 2 2059 1 373 2361 5 2241 149 2 2 46 2 2391 2 2358 214 433 213 26 161 561 240 431 664 79 436 151 576 6 232 432 173 548 80 436 151 2191 139 2145 200 151 2188 151 2186 2024 319 7 306 2295 151 2115 62 115 2185 1989 55 2217 2174 54 23 2 1 21 3 17 5 6 9 31 7 1 8 15 38 16 22 2507 31 38 374 2426 2309 588 2060 2481 2483 2388 22 2399 2402 37 37 37 37 37 37 357 31 1 57 283 4 56 27 28 11 139 7 53 54 2 40 40 3 37 87 1 87 88 17 71 2 69 8 31 71 46 43 120 1 119 119 10 109 109 162 54 122 2 109 11 121 121 111 52 37 2 37 37 7 30 8 30 57 11 1 1 5 1 39 48 48 31 2 1 27 27 2 2 25 25 46 11 1 1 6 2 23 8 24 32 34 2 30 1 9 20 26 30 1 1 27 41 3 8 1 30 30 18 23 22 1 1 20 18 61 1 20 6 1 1 10 22 34 34 27 33 103 11 3 35 32 24 2 1 68 11 9 12 8 86 31 9 23 17 1 5 41 86 86 81 36 46 52 29 13 47 12 7 19 47 21 84 40 45 18 47 21 176 1 1 13 4 1 1 1 1 115 37 12 8 87 22 21 51 2 76 1 2 19 104 145 145 153 83 7 87 9 16 20 22 3 2 11 6 8 8 8 15 11 1 12 13 3 12 12 14 6 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 | // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/namei.c * * Copyright (C) 1991, 1992 Linus Torvalds */ /* * Some corrections by tytso. */ /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname * lookup logic. */ /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture. */ #include <linux/init.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/wordpart.h> #include <linux/fs.h> #include <linux/filelock.h> #include <linux/namei.h> #include <linux/pagemap.h> #include <linux/sched/mm.h> #include <linux/fsnotify.h> #include <linux/personality.h> #include <linux/security.h> #include <linux/syscalls.h> #include <linux/mount.h> #include <linux/audit.h> #include <linux/capability.h> #include <linux/file.h> #include <linux/fcntl.h> #include <linux/device_cgroup.h> #include <linux/fs_struct.h> #include <linux/posix_acl.h> #include <linux/hash.h> #include <linux/bitops.h> #include <linux/init_task.h> #include <linux/uaccess.h> #include "internal.h" #include "mount.h" /* [Feb-1997 T. Schoebel-Theuer] * Fundamental changes in the pathname lookup mechanisms (namei) * were necessary because of omirr. The reason is that omirr needs * to know the _real_ pathname, not the user-supplied one, in case * of symlinks (and also when transname replacements occur). * * The new code replaces the old recursive symlink resolution with * an iterative one (in case of non-nested symlink chains). It does * this with calls to <fs>_follow_link(). * As a side effect, dir_namei(), _namei() and follow_link() are now * replaced with a single function lookup_dentry() that can handle all * the special cases of the former code. * * With the new dcache, the pathname is stored at each inode, at least as * long as the refcount of the inode is positive. As a side effect, the * size of the dcache depends on the inode cache and thus is dynamic. * * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink * resolution to correspond with current state of the code. * * Note that the symlink resolution is not *completely* iterative. * There is still a significant amount of tail- and mid- recursion in * the algorithm. Also, note that <fs>_readlink() is not used in * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink() * may return different results than <fs>_follow_link(). Many virtual * filesystems (including /proc) exhibit this behavior. */ /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation: * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL * and the name already exists in form of a symlink, try to create the new * name indicated by the symlink. The old code always complained that the * name already exists, due to not following the symlink even if its target * is nonexistent. The new semantics affects also mknod() and link() when * the name is a symlink pointing to a non-existent name. * * I don't know which semantics is the right one, since I have no access * to standards. But I found by trial that HP-UX 9.0 has the full "new" * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the * "old" one. Personally, I think the new semantics is much more logical. * Note that "ln old new" where "new" is a symlink pointing to a non-existing * file does succeed in both HP-UX and SunOs, but not in Solaris * and in the old Linux semantics. */ /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink * semantics. See the comments in "open_namei" and "do_link" below. * * [10-Sep-98 Alan Modra] Another symlink change. */ /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks: * inside the path - always follow. * in the last component in creation/removal/renaming - never follow. * if LOOKUP_FOLLOW passed - follow. * if the pathname has trailing slashes - follow. * otherwise - don't follow. * (applied in that order). * * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT * restored for 2.4. This is the last surviving part of old 4.2BSD bug. * During the 2.4 we need to fix the userland stuff depending on it - * hopefully we will be able to get rid of that wart in 2.5. So far only * XEmacs seems to be relying on it... */ /* * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland) * implemented. Let's see if raised priority of ->s_vfs_rename_mutex gives * any extra contention... */ /* In order to reduce some races, while at the same time doing additional * checking and hopefully speeding things up, we copy filenames to the * kernel data space before using them.. * * POSIX.1 2.4: an empty pathname is invalid (ENOENT). * PATH_MAX includes the nul terminator --RR. */ #define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname)) static inline void initname(struct filename *name) { name->uptr = NULL; name->aname = NULL; atomic_set(&name->refcnt, 1); } struct filename * getname_flags(const char __user *filename, int flags) { struct filename *result; char *kname; int len; result = audit_reusename(filename); if (result) return result; result = __getname(); if (unlikely(!result)) return ERR_PTR(-ENOMEM); /* * First, try to embed the struct filename inside the names_cache * allocation */ kname = (char *)result->iname; result->name = kname; len = strncpy_from_user(kname, filename, EMBEDDED_NAME_MAX); /* * Handle both empty path and copy failure in one go. */ if (unlikely(len <= 0)) { if (unlikely(len < 0)) { __putname(result); return ERR_PTR(len); } /* The empty path is special. */ if (!(flags & LOOKUP_EMPTY)) { __putname(result); return ERR_PTR(-ENOENT); } } /* * Uh-oh. We have a name that's approaching PATH_MAX. Allocate a * separate struct filename so we can dedicate the entire * names_cache allocation for the pathname, and re-do the copy from * userland. */ if (unlikely(len == EMBEDDED_NAME_MAX)) { const size_t size = offsetof(struct filename, iname[1]); kname = (char *)result; /* * size is chosen that way we to guarantee that * result->iname[0] is within the same object and that * kname can't be equal to result->iname, no matter what. */ result = kzalloc(size, GFP_KERNEL); if (unlikely(!result)) { __putname(kname); return ERR_PTR(-ENOMEM); } result->name = kname; len = strncpy_from_user(kname, filename, PATH_MAX); if (unlikely(len < 0)) { __putname(kname); kfree(result); return ERR_PTR(len); } /* The empty path is special. */ if (unlikely(!len) && !(flags & LOOKUP_EMPTY)) { __putname(kname); kfree(result); return ERR_PTR(-ENOENT); } if (unlikely(len == PATH_MAX)) { __putname(kname); kfree(result); return ERR_PTR(-ENAMETOOLONG); } } initname(result); audit_getname(result); return result; } struct filename *getname_uflags(const char __user *filename, int uflags) { int flags = (uflags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0; return getname_flags(filename, flags); } struct filename *__getname_maybe_null(const char __user *pathname) { struct filename *name; char c; /* try to save on allocations; loss on um, though */ if (get_user(c, pathname)) return ERR_PTR(-EFAULT); if (!c) return NULL; name = getname_flags(pathname, LOOKUP_EMPTY); if (!IS_ERR(name) && !(name->name[0])) { putname(name); name = NULL; } return name; } struct filename *getname_kernel(const char * filename) { struct filename *result; int len = strlen(filename) + 1; result = __getname(); if (unlikely(!result)) return ERR_PTR(-ENOMEM); if (len <= EMBEDDED_NAME_MAX) { result->name = (char *)result->iname; } else if (len <= PATH_MAX) { const size_t size = offsetof(struct filename, iname[1]); struct filename *tmp; tmp = kmalloc(size, GFP_KERNEL); if (unlikely(!tmp)) { __putname(result); return ERR_PTR(-ENOMEM); } tmp->name = (char *)result; result = tmp; } else { __putname(result); return ERR_PTR(-ENAMETOOLONG); } memcpy((char *)result->name, filename, len); initname(result); audit_getname(result); return result; } EXPORT_SYMBOL(getname_kernel); void putname(struct filename *name) { int refcnt; if (IS_ERR_OR_NULL(name)) return; refcnt = atomic_read(&name->refcnt); if (refcnt != 1) { if (WARN_ON_ONCE(!refcnt)) return; if (!atomic_dec_and_test(&name->refcnt)) return; } if (name->name != name->iname) { __putname(name->name); kfree(name); } else __putname(name); } EXPORT_SYMBOL(putname); /** * check_acl - perform ACL permission checking * @idmap: idmap of the mount the inode was found from * @inode: inode to check permissions on * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...) * * This function performs the ACL permission checking. Since this function * retrieve POSIX acls it needs to know whether it is called from a blocking or * non-blocking context and thus cares about the MAY_NOT_BLOCK bit. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ static int check_acl(struct mnt_idmap *idmap, struct inode *inode, int mask) { #ifdef CONFIG_FS_POSIX_ACL struct posix_acl *acl; if (mask & MAY_NOT_BLOCK) { acl = get_cached_acl_rcu(inode, ACL_TYPE_ACCESS); if (!acl) return -EAGAIN; /* no ->get_inode_acl() calls in RCU mode... */ if (is_uncached_acl(acl)) return -ECHILD; return posix_acl_permission(idmap, inode, acl, mask); } acl = get_inode_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl) { int error = posix_acl_permission(idmap, inode, acl, mask); posix_acl_release(acl); return error; } #endif return -EAGAIN; } /* * Very quick optimistic "we know we have no ACL's" check. * * Note that this is purely for ACL_TYPE_ACCESS, and purely * for the "we have cached that there are no ACLs" case. * * If this returns true, we know there are no ACLs. But if * it returns false, we might still not have ACLs (it could * be the is_uncached_acl() case). */ static inline bool no_acl_inode(struct inode *inode) { #ifdef CONFIG_FS_POSIX_ACL return likely(!READ_ONCE(inode->i_acl)); #else return true; #endif } /** * acl_permission_check - perform basic UNIX permission checking * @idmap: idmap of the mount the inode was found from * @inode: inode to check permissions on * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...) * * This function performs the basic UNIX permission checking. Since this * function may retrieve POSIX acls it needs to know whether it is called from a * blocking or non-blocking context and thus cares about the MAY_NOT_BLOCK bit. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ static int acl_permission_check(struct mnt_idmap *idmap, struct inode *inode, int mask) { unsigned int mode = inode->i_mode; vfsuid_t vfsuid; /* * Common cheap case: everybody has the requested * rights, and there are no ACLs to check. No need * to do any owner/group checks in that case. * * - 'mask&7' is the requested permission bit set * - multiplying by 0111 spreads them out to all of ugo * - '& ~mode' looks for missing inode permission bits * - the '!' is for "no missing permissions" * * After that, we just need to check that there are no * ACL's on the inode - do the 'IS_POSIXACL()' check last * because it will dereference the ->i_sb pointer and we * want to avoid that if at all possible. */ if (!((mask & 7) * 0111 & ~mode)) { if (no_acl_inode(inode)) return 0; if (!IS_POSIXACL(inode)) return 0; } /* Are we the owner? If so, ACL's don't matter */ vfsuid = i_uid_into_vfsuid(idmap, inode); if (likely(vfsuid_eq_kuid(vfsuid, current_fsuid()))) { mask &= 7; mode >>= 6; return (mask & ~mode) ? -EACCES : 0; } /* Do we have ACL's? */ if (IS_POSIXACL(inode) && (mode & S_IRWXG)) { int error = check_acl(idmap, inode, mask); if (error != -EAGAIN) return error; } /* Only RWX matters for group/other mode bits */ mask &= 7; /* * Are the group permissions different from * the other permissions in the bits we care * about? Need to check group ownership if so. */ if (mask & (mode ^ (mode >> 3))) { vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode); if (vfsgid_in_group_p(vfsgid)) mode >>= 3; } /* Bits in 'mode' clear that we require? */ return (mask & ~mode) ? -EACCES : 0; } /** * generic_permission - check for access rights on a Posix-like filesystem * @idmap: idmap of the mount the inode was found from * @inode: inode to check access rights for * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, * %MAY_NOT_BLOCK ...) * * Used to check for read/write/execute permissions on a file. * We use "fsuid" for this, letting us set arbitrary permissions * for filesystem access without changing the "normal" uids which * are used for other things. * * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk * request cannot be satisfied (eg. requires blocking or too much complexity). * It would then be called again in ref-walk mode. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ int generic_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { int ret; /* * Do the basic permission checks. */ ret = acl_permission_check(idmap, inode, mask); if (ret != -EACCES) return ret; if (S_ISDIR(inode->i_mode)) { /* DACs are overridable for directories */ if (!(mask & MAY_WRITE)) if (capable_wrt_inode_uidgid(idmap, inode, CAP_DAC_READ_SEARCH)) return 0; if (capable_wrt_inode_uidgid(idmap, inode, CAP_DAC_OVERRIDE)) return 0; return -EACCES; } /* * Searching includes executable on directories, else just read. */ mask &= MAY_READ | MAY_WRITE | MAY_EXEC; if (mask == MAY_READ) if (capable_wrt_inode_uidgid(idmap, inode, CAP_DAC_READ_SEARCH)) return 0; /* * Read/write DACs are always overridable. * Executable DACs are overridable when there is * at least one exec bit set. */ if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO)) if (capable_wrt_inode_uidgid(idmap, inode, CAP_DAC_OVERRIDE)) return 0; return -EACCES; } EXPORT_SYMBOL(generic_permission); /** * do_inode_permission - UNIX permission checking * @idmap: idmap of the mount the inode was found from * @inode: inode to check permissions on * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...) * * We _really_ want to just do "generic_permission()" without * even looking at the inode->i_op values. So we keep a cache * flag in inode->i_opflags, that says "this has not special * permission function, use the fast case". */ static inline int do_inode_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) { if (likely(inode->i_op->permission)) return inode->i_op->permission(idmap, inode, mask); /* This gets set once for the inode lifetime */ spin_lock(&inode->i_lock); inode->i_opflags |= IOP_FASTPERM; spin_unlock(&inode->i_lock); } return generic_permission(idmap, inode, mask); } /** * sb_permission - Check superblock-level permissions * @sb: Superblock of inode to check permission on * @inode: Inode to check permission on * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) * * Separate out file-system wide checks from inode-specific permission checks. */ static int sb_permission(struct super_block *sb, struct inode *inode, int mask) { if (unlikely(mask & MAY_WRITE)) { umode_t mode = inode->i_mode; /* Nobody gets write access to a read-only fs. */ if (sb_rdonly(sb) && (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) return -EROFS; } return 0; } /** * inode_permission - Check for access rights to a given inode * @idmap: idmap of the mount the inode was found from * @inode: Inode to check permission on * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) * * Check for read/write/execute permissions on an inode. We use fs[ug]id for * this, letting us set arbitrary permissions for filesystem access without * changing the "normal" UIDs which are used for other things. * * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask. */ int inode_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { int retval; retval = sb_permission(inode->i_sb, inode, mask); if (retval) return retval; if (unlikely(mask & MAY_WRITE)) { /* * Nobody gets write access to an immutable file. */ if (IS_IMMUTABLE(inode)) return -EPERM; /* * Updating mtime will likely cause i_uid and i_gid to be * written back improperly if their true value is unknown * to the vfs. */ if (HAS_UNMAPPED_ID(idmap, inode)) return -EACCES; } retval = do_inode_permission(idmap, inode, mask); if (retval) return retval; retval = devcgroup_inode_permission(inode, mask); if (retval) return retval; return security_inode_permission(inode, mask); } EXPORT_SYMBOL(inode_permission); /** * path_get - get a reference to a path * @path: path to get the reference to * * Given a path increment the reference count to the dentry and the vfsmount. */ void path_get(const struct path *path) { mntget(path->mnt); dget(path->dentry); } EXPORT_SYMBOL(path_get); /** * path_put - put a reference to a path * @path: path to put the reference to * * Given a path decrement the reference count to the dentry and the vfsmount. */ void path_put(const struct path *path) { dput(path->dentry); mntput(path->mnt); } EXPORT_SYMBOL(path_put); #define EMBEDDED_LEVELS 2 struct nameidata { struct path path; struct qstr last; struct path root; struct inode *inode; /* path.dentry.d_inode */ unsigned int flags, state; unsigned seq, next_seq, m_seq, r_seq; int last_type; unsigned depth; int total_link_count; struct saved { struct path link; struct delayed_call done; const char *name; unsigned seq; } *stack, internal[EMBEDDED_LEVELS]; struct filename *name; const char *pathname; struct nameidata *saved; unsigned root_seq; int dfd; vfsuid_t dir_vfsuid; umode_t dir_mode; } __randomize_layout; #define ND_ROOT_PRESET 1 #define ND_ROOT_GRABBED 2 #define ND_JUMPED 4 static void __set_nameidata(struct nameidata *p, int dfd, struct filename *name) { struct nameidata *old = current->nameidata; p->stack = p->internal; p->depth = 0; p->dfd = dfd; p->name = name; p->pathname = likely(name) ? name->name : ""; p->path.mnt = NULL; p->path.dentry = NULL; p->total_link_count = old ? old->total_link_count : 0; p->saved = old; current->nameidata = p; } static inline void set_nameidata(struct nameidata *p, int dfd, struct filename *name, const struct path *root) { __set_nameidata(p, dfd, name); p->state = 0; if (unlikely(root)) { p->state = ND_ROOT_PRESET; p->root = *root; } } static void restore_nameidata(void) { struct nameidata *now = current->nameidata, *old = now->saved; current->nameidata = old; if (old) old->total_link_count = now->total_link_count; if (now->stack != now->internal) kfree(now->stack); } static bool nd_alloc_stack(struct nameidata *nd) { struct saved *p; p= kmalloc_array(MAXSYMLINKS, sizeof(struct saved), nd->flags & LOOKUP_RCU ? GFP_ATOMIC : GFP_KERNEL); if (unlikely(!p)) return false; memcpy(p, nd->internal, sizeof(nd->internal)); nd->stack = p; return true; } /** * path_connected - Verify that a dentry is below mnt.mnt_root * @mnt: The mountpoint to check. * @dentry: The dentry to check. * * Rename can sometimes move a file or directory outside of a bind * mount, path_connected allows those cases to be detected. */ static bool path_connected(struct vfsmount *mnt, struct dentry *dentry) { struct super_block *sb = mnt->mnt_sb; /* Bind mounts can have disconnected paths */ if (mnt->mnt_root == sb->s_root) return true; return is_subdir(dentry, mnt->mnt_root); } static void drop_links(struct nameidata *nd) { int i = nd->depth; while (i--) { struct saved *last = nd->stack + i; do_delayed_call(&last->done); clear_delayed_call(&last->done); } } static void leave_rcu(struct nameidata *nd) { nd->flags &= ~LOOKUP_RCU; nd->seq = nd->next_seq = 0; rcu_read_unlock(); } static void terminate_walk(struct nameidata *nd) { drop_links(nd); if (!(nd->flags & LOOKUP_RCU)) { int i; path_put(&nd->path); for (i = 0; i < nd->depth; i++) path_put(&nd->stack[i].link); if (nd->state & ND_ROOT_GRABBED) { path_put(&nd->root); nd->state &= ~ND_ROOT_GRABBED; } } else { leave_rcu(nd); } nd->depth = 0; nd->path.mnt = NULL; nd->path.dentry = NULL; } /* path_put is needed afterwards regardless of success or failure */ static bool __legitimize_path(struct path *path, unsigned seq, unsigned mseq) { int res = __legitimize_mnt(path->mnt, mseq); if (unlikely(res)) { if (res > 0) path->mnt = NULL; path->dentry = NULL; return false; } if (unlikely(!lockref_get_not_dead(&path->dentry->d_lockref))) { path->dentry = NULL; return false; } return !read_seqcount_retry(&path->dentry->d_seq, seq); } static inline bool legitimize_path(struct nameidata *nd, struct path *path, unsigned seq) { return __legitimize_path(path, seq, nd->m_seq); } static bool legitimize_links(struct nameidata *nd) { int i; if (unlikely(nd->flags & LOOKUP_CACHED)) { drop_links(nd); nd->depth = 0; return false; } for (i = 0; i < nd->depth; i++) { struct saved *last = nd->stack + i; if (unlikely(!legitimize_path(nd, &last->link, last->seq))) { drop_links(nd); nd->depth = i + 1; return false; } } return true; } static bool legitimize_root(struct nameidata *nd) { /* Nothing to do if nd->root is zero or is managed by the VFS user. */ if (!nd->root.mnt || (nd->state & ND_ROOT_PRESET)) return true; nd->state |= ND_ROOT_GRABBED; return legitimize_path(nd, &nd->root, nd->root_seq); } /* * Path walking has 2 modes, rcu-walk and ref-walk (see * Documentation/filesystems/path-lookup.txt). In situations when we can't * continue in RCU mode, we attempt to drop out of rcu-walk mode and grab * normal reference counts on dentries and vfsmounts to transition to ref-walk * mode. Refcounts are grabbed at the last known good point before rcu-walk * got stuck, so ref-walk may continue from there. If this is not successful * (eg. a seqcount has changed), then failure is returned and it's up to caller * to restart the path walk from the beginning in ref-walk mode. */ /** * try_to_unlazy - try to switch to ref-walk mode. * @nd: nameidata pathwalk data * Returns: true on success, false on failure * * try_to_unlazy attempts to legitimize the current nd->path and nd->root * for ref-walk mode. * Must be called from rcu-walk context. * Nothing should touch nameidata between try_to_unlazy() failure and * terminate_walk(). */ static bool try_to_unlazy(struct nameidata *nd) { struct dentry *parent = nd->path.dentry; BUG_ON(!(nd->flags & LOOKUP_RCU)); if (unlikely(!legitimize_links(nd))) goto out1; if (unlikely(!legitimize_path(nd, &nd->path, nd->seq))) goto out; if (unlikely(!legitimize_root(nd))) goto out; leave_rcu(nd); BUG_ON(nd->inode != parent->d_inode); return true; out1: nd->path.mnt = NULL; nd->path.dentry = NULL; out: leave_rcu(nd); return false; } /** * try_to_unlazy_next - try to switch to ref-walk mode. * @nd: nameidata pathwalk data * @dentry: next dentry to step into * Returns: true on success, false on failure * * Similar to try_to_unlazy(), but here we have the next dentry already * picked by rcu-walk and want to legitimize that in addition to the current * nd->path and nd->root for ref-walk mode. Must be called from rcu-walk context. * Nothing should touch nameidata between try_to_unlazy_next() failure and * terminate_walk(). */ static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry) { int res; BUG_ON(!(nd->flags & LOOKUP_RCU)); if (unlikely(!legitimize_links(nd))) goto out2; res = __legitimize_mnt(nd->path.mnt, nd->m_seq); if (unlikely(res)) { if (res > 0) goto out2; goto out1; } if (unlikely(!lockref_get_not_dead(&nd->path.dentry->d_lockref))) goto out1; /* * We need to move both the parent and the dentry from the RCU domain * to be properly refcounted. And the sequence number in the dentry * validates *both* dentry counters, since we checked the sequence * number of the parent after we got the child sequence number. So we * know the parent must still be valid if the child sequence number is */ if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) goto out; if (read_seqcount_retry(&dentry->d_seq, nd->next_seq)) goto out_dput; /* * Sequence counts matched. Now make sure that the root is * still valid and get it if required. */ if (unlikely(!legitimize_root(nd))) goto out_dput; leave_rcu(nd); return true; out2: nd->path.mnt = NULL; out1: nd->path.dentry = NULL; out: leave_rcu(nd); return false; out_dput: leave_rcu(nd); dput(dentry); return false; } static inline int d_revalidate(struct inode *dir, const struct qstr *name, struct dentry *dentry, unsigned int flags) { if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) return dentry->d_op->d_revalidate(dir, name, dentry, flags); else return 1; } /** * complete_walk - successful completion of path walk * @nd: pointer nameidata * * If we had been in RCU mode, drop out of it and legitimize nd->path. * Revalidate the final result, unless we'd already done that during * the path walk or the filesystem doesn't ask for it. Return 0 on * success, -error on failure. In case of failure caller does not * need to drop nd->path. */ static int complete_walk(struct nameidata *nd) { struct dentry *dentry = nd->path.dentry; int status; if (nd->flags & LOOKUP_RCU) { /* * We don't want to zero nd->root for scoped-lookups or * externally-managed nd->root. */ if (!(nd->state & ND_ROOT_PRESET)) if (!(nd->flags & LOOKUP_IS_SCOPED)) nd->root.mnt = NULL; nd->flags &= ~LOOKUP_CACHED; if (!try_to_unlazy(nd)) return -ECHILD; } if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) { /* * While the guarantee of LOOKUP_IS_SCOPED is (roughly) "don't * ever step outside the root during lookup" and should already * be guaranteed by the rest of namei, we want to avoid a namei * BUG resulting in userspace being given a path that was not * scoped within the root at some point during the lookup. * * So, do a final sanity-check to make sure that in the * worst-case scenario (a complete bypass of LOOKUP_IS_SCOPED) * we won't silently return an fd completely outside of the * requested root to userspace. * * Userspace could move the path outside the root after this * check, but as discussed elsewhere this is not a concern (the * resolved file was inside the root at some point). */ if (!path_is_under(&nd->path, &nd->root)) return -EXDEV; } if (likely(!(nd->state & ND_JUMPED))) return 0; if (likely(!(dentry->d_flags & DCACHE_OP_WEAK_REVALIDATE))) return 0; status = dentry->d_op->d_weak_revalidate(dentry, nd->flags); if (status > 0) return 0; if (!status) status = -ESTALE; return status; } static int set_root(struct nameidata *nd) { struct fs_struct *fs = current->fs; /* * Jumping to the real root in a scoped-lookup is a BUG in namei, but we * still have to ensure it doesn't happen because it will cause a breakout * from the dirfd. */ if (WARN_ON(nd->flags & LOOKUP_IS_SCOPED)) return -ENOTRECOVERABLE; if (nd->flags & LOOKUP_RCU) { unsigned seq; do { seq = read_seqcount_begin(&fs->seq); nd->root = fs->root; nd->root_seq = __read_seqcount_begin(&nd->root.dentry->d_seq); } while (read_seqcount_retry(&fs->seq, seq)); } else { get_fs_root(fs, &nd->root); nd->state |= ND_ROOT_GRABBED; } return 0; } static int nd_jump_root(struct nameidata *nd) { if (unlikely(nd->flags & LOOKUP_BENEATH)) return -EXDEV; if (unlikely(nd->flags & LOOKUP_NO_XDEV)) { /* Absolute path arguments to path_init() are allowed. */ if (nd->path.mnt != NULL && nd->path.mnt != nd->root.mnt) return -EXDEV; } if (!nd->root.mnt) { int error = set_root(nd); if (error) return error; } if (nd->flags & LOOKUP_RCU) { struct dentry *d; nd->path = nd->root; d = nd->path.dentry; nd->inode = d->d_inode; nd->seq = nd->root_seq; if (read_seqcount_retry(&d->d_seq, nd->seq)) return -ECHILD; } else { path_put(&nd->path); nd->path = nd->root; path_get(&nd->path); nd->inode = nd->path.dentry->d_inode; } nd->state |= ND_JUMPED; return 0; } /* * Helper to directly jump to a known parsed path from ->get_link, * caller must have taken a reference to path beforehand. */ int nd_jump_link(const struct path *path) { int error = -ELOOP; struct nameidata *nd = current->nameidata; if (unlikely(nd->flags & LOOKUP_NO_MAGICLINKS)) goto err; error = -EXDEV; if (unlikely(nd->flags & LOOKUP_NO_XDEV)) { if (nd->path.mnt != path->mnt) goto err; } /* Not currently safe for scoped-lookups. */ if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) goto err; path_put(&nd->path); nd->path = *path; nd->inode = nd->path.dentry->d_inode; nd->state |= ND_JUMPED; return 0; err: path_put(path); return error; } static inline void put_link(struct nameidata *nd) { struct saved *last = nd->stack + --nd->depth; do_delayed_call(&last->done); if (!(nd->flags & LOOKUP_RCU)) path_put(&last->link); } static int sysctl_protected_symlinks __read_mostly; static int sysctl_protected_hardlinks __read_mostly; static int sysctl_protected_fifos __read_mostly; static int sysctl_protected_regular __read_mostly; #ifdef CONFIG_SYSCTL static const struct ctl_table namei_sysctls[] = { { .procname = "protected_symlinks", .data = &sysctl_protected_symlinks, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "protected_hardlinks", .data = &sysctl_protected_hardlinks, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, { .procname = "protected_fifos", .data = &sysctl_protected_fifos, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, { .procname = "protected_regular", .data = &sysctl_protected_regular, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, }; static int __init init_fs_namei_sysctls(void) { register_sysctl_init("fs", namei_sysctls); return 0; } fs_initcall(init_fs_namei_sysctls); #endif /* CONFIG_SYSCTL */ /** * may_follow_link - Check symlink following for unsafe situations * @nd: nameidata pathwalk data * @inode: Used for idmapping. * * In the case of the sysctl_protected_symlinks sysctl being enabled, * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is * in a sticky world-writable directory. This is to protect privileged * processes from failing races against path names that may change out * from under them by way of other users creating malicious symlinks. * It will permit symlinks to be followed only when outside a sticky * world-writable directory, or when the uid of the symlink and follower * match, or when the directory owner matches the symlink's owner. * * Returns 0 if following the symlink is allowed, -ve on error. */ static inline int may_follow_link(struct nameidata *nd, const struct inode *inode) { struct mnt_idmap *idmap; vfsuid_t vfsuid; if (!sysctl_protected_symlinks) return 0; idmap = mnt_idmap(nd->path.mnt); vfsuid = i_uid_into_vfsuid(idmap, inode); /* Allowed if owner and follower match. */ if (vfsuid_eq_kuid(vfsuid, current_fsuid())) return 0; /* Allowed if parent directory not sticky and world-writable. */ if ((nd->dir_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH)) return 0; /* Allowed if parent directory and link owner match. */ if (vfsuid_valid(nd->dir_vfsuid) && vfsuid_eq(nd->dir_vfsuid, vfsuid)) return 0; if (nd->flags & LOOKUP_RCU) return -ECHILD; audit_inode(nd->name, nd->stack[0].link.dentry, 0); audit_log_path_denied(AUDIT_ANOM_LINK, "follow_link"); return -EACCES; } /** * safe_hardlink_source - Check for safe hardlink conditions * @idmap: idmap of the mount the inode was found from * @inode: the source inode to hardlink from * * Return false if at least one of the following conditions: * - inode is not a regular file * - inode is setuid * - inode is setgid and group-exec * - access failure for read and write * * Otherwise returns true. */ static bool safe_hardlink_source(struct mnt_idmap *idmap, struct inode *inode) { umode_t mode = inode->i_mode; /* Special files should not get pinned to the filesystem. */ if (!S_ISREG(mode)) return false; /* Setuid files should not get pinned to the filesystem. */ if (mode & S_ISUID) return false; /* Executable setgid files should not get pinned to the filesystem. */ if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) return false; /* Hardlinking to unreadable or unwritable sources is dangerous. */ if (inode_permission(idmap, inode, MAY_READ | MAY_WRITE)) return false; return true; } /** * may_linkat - Check permissions for creating a hardlink * @idmap: idmap of the mount the inode was found from * @link: the source to hardlink from * * Block hardlink when all of: * - sysctl_protected_hardlinks enabled * - fsuid does not match inode * - hardlink source is unsafe (see safe_hardlink_source() above) * - not CAP_FOWNER in a namespace with the inode owner uid mapped * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. * * Returns 0 if successful, -ve on error. */ int may_linkat(struct mnt_idmap *idmap, const struct path *link) { struct inode *inode = link->dentry->d_inode; /* Inode writeback is not safe when the uid or gid are invalid. */ if (!vfsuid_valid(i_uid_into_vfsuid(idmap, inode)) || !vfsgid_valid(i_gid_into_vfsgid(idmap, inode))) return -EOVERFLOW; if (!sysctl_protected_hardlinks) return 0; /* Source inode owner (or CAP_FOWNER) can hardlink all they like, * otherwise, it must be a safe source. */ if (safe_hardlink_source(idmap, inode) || inode_owner_or_capable(idmap, inode)) return 0; audit_log_path_denied(AUDIT_ANOM_LINK, "linkat"); return -EPERM; } /** * may_create_in_sticky - Check whether an O_CREAT open in a sticky directory * should be allowed, or not, on files that already * exist. * @idmap: idmap of the mount the inode was found from * @nd: nameidata pathwalk data * @inode: the inode of the file to open * * Block an O_CREAT open of a FIFO (or a regular file) when: * - sysctl_protected_fifos (or sysctl_protected_regular) is enabled * - the file already exists * - we are in a sticky directory * - we don't own the file * - the owner of the directory doesn't own the file * - the directory is world writable * If the sysctl_protected_fifos (or sysctl_protected_regular) is set to 2 * the directory doesn't have to be world writable: being group writable will * be enough. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. * * Returns 0 if the open is allowed, -ve on error. */ static int may_create_in_sticky(struct mnt_idmap *idmap, struct nameidata *nd, struct inode *const inode) { umode_t dir_mode = nd->dir_mode; vfsuid_t dir_vfsuid = nd->dir_vfsuid, i_vfsuid; if (likely(!(dir_mode & S_ISVTX))) return 0; if (S_ISREG(inode->i_mode) && !sysctl_protected_regular) return 0; if (S_ISFIFO(inode->i_mode) && !sysctl_protected_fifos) return 0; i_vfsuid = i_uid_into_vfsuid(idmap, inode); if (vfsuid_eq(i_vfsuid, dir_vfsuid)) return 0; if (vfsuid_eq_kuid(i_vfsuid, current_fsuid())) return 0; if (likely(dir_mode & 0002)) { audit_log_path_denied(AUDIT_ANOM_CREAT, "sticky_create"); return -EACCES; } if (dir_mode & 0020) { if (sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) { audit_log_path_denied(AUDIT_ANOM_CREAT, "sticky_create_fifo"); return -EACCES; } if (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode)) { audit_log_path_denied(AUDIT_ANOM_CREAT, "sticky_create_regular"); return -EACCES; } } return 0; } /* * follow_up - Find the mountpoint of path's vfsmount * * Given a path, find the mountpoint of its source file system. * Replace @path with the path of the mountpoint in the parent mount. * Up is towards /. * * Return 1 if we went up a level and 0 if we were already at the * root. */ int follow_up(struct path *path) { struct mount *mnt = real_mount(path->mnt); struct mount *parent; struct dentry *mountpoint; read_seqlock_excl(&mount_lock); parent = mnt->mnt_parent; if (parent == mnt) { read_sequnlock_excl(&mount_lock); return 0; } mntget(&parent->mnt); mountpoint = dget(mnt->mnt_mountpoint); read_sequnlock_excl(&mount_lock); dput(path->dentry); path->dentry = mountpoint; mntput(path->mnt); path->mnt = &parent->mnt; return 1; } EXPORT_SYMBOL(follow_up); static bool choose_mountpoint_rcu(struct mount *m, const struct path *root, struct path *path, unsigned *seqp) { while (mnt_has_parent(m)) { struct dentry *mountpoint = m->mnt_mountpoint; m = m->mnt_parent; if (unlikely(root->dentry == mountpoint && root->mnt == &m->mnt)) break; if (mountpoint != m->mnt.mnt_root) { path->mnt = &m->mnt; path->dentry = mountpoint; *seqp = read_seqcount_begin(&mountpoint->d_seq); return true; } } return false; } static bool choose_mountpoint(struct mount *m, const struct path *root, struct path *path) { bool found; rcu_read_lock(); while (1) { unsigned seq, mseq = read_seqbegin(&mount_lock); found = choose_mountpoint_rcu(m, root, path, &seq); if (unlikely(!found)) { if (!read_seqretry(&mount_lock, mseq)) break; } else { if (likely(__legitimize_path(path, seq, mseq))) break; rcu_read_unlock(); path_put(path); rcu_read_lock(); } } rcu_read_unlock(); return found; } /* * Perform an automount * - return -EISDIR to tell follow_managed() to stop and return the path we * were called with. */ static int follow_automount(struct path *path, int *count, unsigned lookup_flags) { struct dentry *dentry = path->dentry; /* We don't want to mount if someone's just doing a stat - * unless they're stat'ing a directory and appended a '/' to * the name. * * We do, however, want to mount if someone wants to open or * create a file of any type under the mountpoint, wants to * traverse through the mountpoint or wants to open the * mounted directory. Also, autofs may mark negative dentries * as being automount points. These will need the attentions * of the daemon to instantiate them before they can be used. */ if (!(lookup_flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) && dentry->d_inode) return -EISDIR; if (count && (*count)++ >= MAXSYMLINKS) return -ELOOP; return finish_automount(dentry->d_op->d_automount(path), path); } /* * mount traversal - out-of-line part. One note on ->d_flags accesses - * dentries are pinned but not locked here, so negative dentry can go * positive right under us. Use of smp_load_acquire() provides a barrier * sufficient for ->d_inode and ->d_flags consistency. */ static int __traverse_mounts(struct path *path, unsigned flags, bool *jumped, int *count, unsigned lookup_flags) { struct vfsmount *mnt = path->mnt; bool need_mntput = false; int ret = 0; while (flags & DCACHE_MANAGED_DENTRY) { /* Allow the filesystem to manage the transit without i_mutex * being held. */ if (flags & DCACHE_MANAGE_TRANSIT) { ret = path->dentry->d_op->d_manage(path, false); flags = smp_load_acquire(&path->dentry->d_flags); if (ret < 0) break; } if (flags & DCACHE_MOUNTED) { // something's mounted on it.. struct vfsmount *mounted = lookup_mnt(path); if (mounted) { // ... in our namespace dput(path->dentry); if (need_mntput) mntput(path->mnt); path->mnt = mounted; path->dentry = dget(mounted->mnt_root); // here we know it's positive flags = path->dentry->d_flags; need_mntput = true; continue; } } if (!(flags & DCACHE_NEED_AUTOMOUNT)) break; // uncovered automount point ret = follow_automount(path, count, lookup_flags); flags = smp_load_acquire(&path->dentry->d_flags); if (ret < 0) break; } if (ret == -EISDIR) ret = 0; // possible if you race with several mount --move if (need_mntput && path->mnt == mnt) mntput(path->mnt); if (!ret && unlikely(d_flags_negative(flags))) ret = -ENOENT; *jumped = need_mntput; return ret; } static inline int traverse_mounts(struct path *path, bool *jumped, int *count, unsigned lookup_flags) { unsigned flags = smp_load_acquire(&path->dentry->d_flags); /* fastpath */ if (likely(!(flags & DCACHE_MANAGED_DENTRY))) { *jumped = false; if (unlikely(d_flags_negative(flags))) return -ENOENT; return 0; } return __traverse_mounts(path, flags, jumped, count, lookup_flags); } int follow_down_one(struct path *path) { struct vfsmount *mounted; mounted = lookup_mnt(path); if (mounted) { dput(path->dentry); mntput(path->mnt); path->mnt = mounted; path->dentry = dget(mounted->mnt_root); return 1; } return 0; } EXPORT_SYMBOL(follow_down_one); /* * Follow down to the covering mount currently visible to userspace. At each * point, the filesystem owning that dentry may be queried as to whether the * caller is permitted to proceed or not. */ int follow_down(struct path *path, unsigned int flags) { struct vfsmount *mnt = path->mnt; bool jumped; int ret = traverse_mounts(path, &jumped, NULL, flags); if (path->mnt != mnt) mntput(mnt); return ret; } EXPORT_SYMBOL(follow_down); /* * Try to skip to top of mountpoint pile in rcuwalk mode. Fail if * we meet a managed dentry that would need blocking. */ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path) { struct dentry *dentry = path->dentry; unsigned int flags = dentry->d_flags; if (likely(!(flags & DCACHE_MANAGED_DENTRY))) return true; if (unlikely(nd->flags & LOOKUP_NO_XDEV)) return false; for (;;) { /* * Don't forget we might have a non-mountpoint managed dentry * that wants to block transit. */ if (unlikely(flags & DCACHE_MANAGE_TRANSIT)) { int res = dentry->d_op->d_manage(path, true); if (res) return res == -EISDIR; flags = dentry->d_flags; } if (flags & DCACHE_MOUNTED) { struct mount *mounted = __lookup_mnt(path->mnt, dentry); if (mounted) { path->mnt = &mounted->mnt; dentry = path->dentry = mounted->mnt.mnt_root; nd->state |= ND_JUMPED; nd->next_seq = read_seqcount_begin(&dentry->d_seq); flags = dentry->d_flags; // makes sure that non-RCU pathwalk could reach // this state. if (read_seqretry(&mount_lock, nd->m_seq)) return false; continue; } if (read_seqretry(&mount_lock, nd->m_seq)) return false; } return !(flags & DCACHE_NEED_AUTOMOUNT); } } static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry, struct path *path) { bool jumped; int ret; path->mnt = nd->path.mnt; path->dentry = dentry; if (nd->flags & LOOKUP_RCU) { unsigned int seq = nd->next_seq; if (likely(__follow_mount_rcu(nd, path))) return 0; // *path and nd->next_seq might've been clobbered path->mnt = nd->path.mnt; path->dentry = dentry; nd->next_seq = seq; if (!try_to_unlazy_next(nd, dentry)) return -ECHILD; } ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags); if (jumped) { if (unlikely(nd->flags & LOOKUP_NO_XDEV)) ret = -EXDEV; else nd->state |= ND_JUMPED; } if (unlikely(ret)) { dput(path->dentry); if (path->mnt != nd->path.mnt) mntput(path->mnt); } return ret; } /* * This looks up the name in dcache and possibly revalidates the found dentry. * NULL is returned if the dentry does not exist in the cache. */ static struct dentry *lookup_dcache(const struct qstr *name, struct dentry *dir, unsigned int flags) { struct dentry *dentry = d_lookup(dir, name); if (dentry) { int error = d_revalidate(dir->d_inode, name, dentry, flags); if (unlikely(error <= 0)) { if (!error) d_invalidate(dentry); dput(dentry); return ERR_PTR(error); } } return dentry; } /* * Parent directory has inode locked exclusive. This is one * and only case when ->lookup() gets called on non in-lookup * dentries - as the matter of fact, this only gets called * when directory is guaranteed to have no in-lookup children * at all. * Will return -ENOENT if name isn't found and LOOKUP_CREATE wasn't passed. * Will return -EEXIST if name is found and LOOKUP_EXCL was passed. */ struct dentry *lookup_one_qstr_excl(const struct qstr *name, struct dentry *base, unsigned int flags) { struct dentry *dentry = lookup_dcache(name, base, flags); struct dentry *old; struct inode *dir = base->d_inode; if (dentry) goto found; /* Don't create child dentry for a dead directory. */ if (unlikely(IS_DEADDIR(dir))) return ERR_PTR(-ENOENT); dentry = d_alloc(base, name); if (unlikely(!dentry)) return ERR_PTR(-ENOMEM); old = dir->i_op->lookup(dir, dentry, flags); if (unlikely(old)) { dput(dentry); dentry = old; } found: if (IS_ERR(dentry)) return dentry; if (d_is_negative(dentry) && !(flags & LOOKUP_CREATE)) { dput(dentry); return ERR_PTR(-ENOENT); } if (d_is_positive(dentry) && (flags & LOOKUP_EXCL)) { dput(dentry); return ERR_PTR(-EEXIST); } return dentry; } EXPORT_SYMBOL(lookup_one_qstr_excl); /** * lookup_fast - do fast lockless (but racy) lookup of a dentry * @nd: current nameidata * * Do a fast, but racy lookup in the dcache for the given dentry, and * revalidate it. Returns a valid dentry pointer or NULL if one wasn't * found. On error, an ERR_PTR will be returned. * * If this function returns a valid dentry and the walk is no longer * lazy, the dentry will carry a reference that must later be put. If * RCU mode is still in force, then this is not the case and the dentry * must be legitimized before use. If this returns NULL, then the walk * will no longer be in RCU mode. */ static struct dentry *lookup_fast(struct nameidata *nd) { struct dentry *dentry, *parent = nd->path.dentry; int status = 1; /* * Rename seqlock is not required here because in the off chance * of a false negative due to a concurrent rename, the caller is * going to fall back to non-racy lookup. */ if (nd->flags & LOOKUP_RCU) { dentry = __d_lookup_rcu(parent, &nd->last, &nd->next_seq); if (unlikely(!dentry)) { if (!try_to_unlazy(nd)) return ERR_PTR(-ECHILD); return NULL; } /* * This sequence count validates that the parent had no * changes while we did the lookup of the dentry above. */ if (read_seqcount_retry(&parent->d_seq, nd->seq)) return ERR_PTR(-ECHILD); status = d_revalidate(nd->inode, &nd->last, dentry, nd->flags); if (likely(status > 0)) return dentry; if (!try_to_unlazy_next(nd, dentry)) return ERR_PTR(-ECHILD); if (status == -ECHILD) /* we'd been told to redo it in non-rcu mode */ status = d_revalidate(nd->inode, &nd->last, dentry, nd->flags); } else { dentry = __d_lookup(parent, &nd->last); if (unlikely(!dentry)) return NULL; status = d_revalidate(nd->inode, &nd->last, dentry, nd->flags); } if (unlikely(status <= 0)) { if (!status) d_invalidate(dentry); dput(dentry); return ERR_PTR(status); } return dentry; } /* Fast lookup failed, do it the slow way */ static struct dentry *__lookup_slow(const struct qstr *name, struct dentry *dir, unsigned int flags) { struct dentry *dentry, *old; struct inode *inode = dir->d_inode; DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); /* Don't go there if it's already dead */ if (unlikely(IS_DEADDIR(inode))) return ERR_PTR(-ENOENT); again: dentry = d_alloc_parallel(dir, name, &wq); if (IS_ERR(dentry)) return dentry; if (unlikely(!d_in_lookup(dentry))) { int error = d_revalidate(inode, name, dentry, flags); if (unlikely(error <= 0)) { if (!error) { d_invalidate(dentry); dput(dentry); goto again; } dput(dentry); dentry = ERR_PTR(error); } } else { old = inode->i_op->lookup(inode, dentry, flags); d_lookup_done(dentry); if (unlikely(old)) { dput(dentry); dentry = old; } } return dentry; } static struct dentry *lookup_slow(const struct qstr *name, struct dentry *dir, unsigned int flags) { struct inode *inode = dir->d_inode; struct dentry *res; inode_lock_shared(inode); res = __lookup_slow(name, dir, flags); inode_unlock_shared(inode); return res; } static inline int may_lookup(struct mnt_idmap *idmap, struct nameidata *restrict nd) { int err, mask; mask = nd->flags & LOOKUP_RCU ? MAY_NOT_BLOCK : 0; err = inode_permission(idmap, nd->inode, mask | MAY_EXEC); if (likely(!err)) return 0; // If we failed, and we weren't in LOOKUP_RCU, it's final if (!(nd->flags & LOOKUP_RCU)) return err; // Drop out of RCU mode to make sure it wasn't transient if (!try_to_unlazy(nd)) return -ECHILD; // redo it all non-lazy if (err != -ECHILD) // hard error return err; return inode_permission(idmap, nd->inode, MAY_EXEC); } static int reserve_stack(struct nameidata *nd, struct path *link) { if (unlikely(nd->total_link_count++ >= MAXSYMLINKS)) return -ELOOP; if (likely(nd->depth != EMBEDDED_LEVELS)) return 0; if (likely(nd->stack != nd->internal)) return 0; if (likely(nd_alloc_stack(nd))) return 0; if (nd->flags & LOOKUP_RCU) { // we need to grab link before we do unlazy. And we can't skip // unlazy even if we fail to grab the link - cleanup needs it bool grabbed_link = legitimize_path(nd, link, nd->next_seq); if (!try_to_unlazy(nd) || !grabbed_link) return -ECHILD; if (nd_alloc_stack(nd)) return 0; } return -ENOMEM; } enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4}; static const char *pick_link(struct nameidata *nd, struct path *link, struct inode *inode, int flags) { struct saved *last; const char *res; int error = reserve_stack(nd, link); if (unlikely(error)) { if (!(nd->flags & LOOKUP_RCU)) path_put(link); return ERR_PTR(error); } last = nd->stack + nd->depth++; last->link = *link; clear_delayed_call(&last->done); last->seq = nd->next_seq; if (flags & WALK_TRAILING) { error = may_follow_link(nd, inode); if (unlikely(error)) return ERR_PTR(error); } if (unlikely(nd->flags & LOOKUP_NO_SYMLINKS) || unlikely(link->mnt->mnt_flags & MNT_NOSYMFOLLOW)) return ERR_PTR(-ELOOP); if (!(nd->flags & LOOKUP_RCU)) { touch_atime(&last->link); cond_resched(); } else if (atime_needs_update(&last->link, inode)) { if (!try_to_unlazy(nd)) return ERR_PTR(-ECHILD); touch_atime(&last->link); } error = security_inode_follow_link(link->dentry, inode, nd->flags & LOOKUP_RCU); if (unlikely(error)) return ERR_PTR(error); res = READ_ONCE(inode->i_link); if (!res) { const char * (*get)(struct dentry *, struct inode *, struct delayed_call *); get = inode->i_op->get_link; if (nd->flags & LOOKUP_RCU) { res = get(NULL, inode, &last->done); if (res == ERR_PTR(-ECHILD) && try_to_unlazy(nd)) res = get(link->dentry, inode, &last->done); } else { res = get(link->dentry, inode, &last->done); } if (!res) goto all_done; if (IS_ERR(res)) return res; } if (*res == '/') { error = nd_jump_root(nd); if (unlikely(error)) return ERR_PTR(error); while (unlikely(*++res == '/')) ; } if (*res) return res; all_done: // pure jump put_link(nd); return NULL; } /* * Do we need to follow links? We _really_ want to be able * to do this check without having to look at inode->i_op, * so we keep a cache of "no, this doesn't need follow_link" * for the common case. * * NOTE: dentry must be what nd->next_seq had been sampled from. */ static const char *step_into(struct nameidata *nd, int flags, struct dentry *dentry) { struct path path; struct inode *inode; int err = handle_mounts(nd, dentry, &path); if (err < 0) return ERR_PTR(err); inode = path.dentry->d_inode; if (likely(!d_is_symlink(path.dentry)) || ((flags & WALK_TRAILING) && !(nd->flags & LOOKUP_FOLLOW)) || (flags & WALK_NOFOLLOW)) { /* not a symlink or should not follow */ if (nd->flags & LOOKUP_RCU) { if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq)) return ERR_PTR(-ECHILD); if (unlikely(!inode)) return ERR_PTR(-ENOENT); } else { dput(nd->path.dentry); if (nd->path.mnt != path.mnt) mntput(nd->path.mnt); } nd->path = path; nd->inode = inode; nd->seq = nd->next_seq; return NULL; } if (nd->flags & LOOKUP_RCU) { /* make sure that d_is_symlink above matches inode */ if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq)) return ERR_PTR(-ECHILD); } else { if (path.mnt == nd->path.mnt) mntget(path.mnt); } return pick_link(nd, &path, inode, flags); } static struct dentry *follow_dotdot_rcu(struct nameidata *nd) { struct dentry *parent, *old; if (path_equal(&nd->path, &nd->root)) goto in_root; if (unlikely(nd->path.dentry == nd->path.mnt->mnt_root)) { struct path path; unsigned seq; if (!choose_mountpoint_rcu(real_mount(nd->path.mnt), &nd->root, &path, &seq)) goto in_root; if (unlikely(nd->flags & LOOKUP_NO_XDEV)) return ERR_PTR(-ECHILD); nd->path = path; nd->inode = path.dentry->d_inode; nd->seq = seq; // makes sure that non-RCU pathwalk could reach this state if (read_seqretry(&mount_lock, nd->m_seq)) return ERR_PTR(-ECHILD); /* we know that mountpoint was pinned */ } old = nd->path.dentry; parent = old->d_parent; nd->next_seq = read_seqcount_begin(&parent->d_seq); // makes sure that non-RCU pathwalk could reach this state if (read_seqcount_retry(&old->d_seq, nd->seq)) return ERR_PTR(-ECHILD); if (unlikely(!path_connected(nd->path.mnt, parent))) return ERR_PTR(-ECHILD); return parent; in_root: if (read_seqretry(&mount_lock, nd->m_seq)) return ERR_PTR(-ECHILD); if (unlikely(nd->flags & LOOKUP_BENEATH)) return ERR_PTR(-ECHILD); nd->next_seq = nd->seq; return nd->path.dentry; } static struct dentry *follow_dotdot(struct nameidata *nd) { struct dentry *parent; if (path_equal(&nd->path, &nd->root)) goto in_root; if (unlikely(nd->path.dentry == nd->path.mnt->mnt_root)) { struct path path; if (!choose_mountpoint(real_mount(nd->path.mnt), &nd->root, &path)) goto in_root; path_put(&nd->path); nd->path = path; nd->inode = path.dentry->d_inode; if (unlikely(nd->flags & LOOKUP_NO_XDEV)) return ERR_PTR(-EXDEV); } /* rare case of legitimate dget_parent()... */ parent = dget_parent(nd->path.dentry); if (unlikely(!path_connected(nd->path.mnt, parent))) { dput(parent); return ERR_PTR(-ENOENT); } return parent; in_root: if (unlikely(nd->flags & LOOKUP_BENEATH)) return ERR_PTR(-EXDEV); return dget(nd->path.dentry); } static const char *handle_dots(struct nameidata *nd, int type) { if (type == LAST_DOTDOT) { const char *error = NULL; struct dentry *parent; if (!nd->root.mnt) { error = ERR_PTR(set_root(nd)); if (error) return error; } if (nd->flags & LOOKUP_RCU) parent = follow_dotdot_rcu(nd); else parent = follow_dotdot(nd); if (IS_ERR(parent)) return ERR_CAST(parent); error = step_into(nd, WALK_NOFOLLOW, parent); if (unlikely(error)) return error; if (unlikely(nd->flags & LOOKUP_IS_SCOPED)) { /* * If there was a racing rename or mount along our * path, then we can't be sure that ".." hasn't jumped * above nd->root (and so userspace should retry or use * some fallback). */ smp_rmb(); if (__read_seqcount_retry(&mount_lock.seqcount, nd->m_seq)) return ERR_PTR(-EAGAIN); if (__read_seqcount_retry(&rename_lock.seqcount, nd->r_seq)) return ERR_PTR(-EAGAIN); } } return NULL; } static const char *walk_component(struct nameidata *nd, int flags) { struct dentry *dentry; /* * "." and ".." are special - ".." especially so because it has * to be able to know about the current root directory and * parent relationships. */ if (unlikely(nd->last_type != LAST_NORM)) { if (!(flags & WALK_MORE) && nd->depth) put_link(nd); return handle_dots(nd, nd->last_type); } dentry = lookup_fast(nd); if (IS_ERR(dentry)) return ERR_CAST(dentry); if (unlikely(!dentry)) { dentry = lookup_slow(&nd->last, nd->path.dentry, nd->flags); if (IS_ERR(dentry)) return ERR_CAST(dentry); } if (!(flags & WALK_MORE) && nd->depth) put_link(nd); return step_into(nd, flags, dentry); } /* * We can do the critical dentry name comparison and hashing * operations one word at a time, but we are limited to: * * - Architectures with fast unaligned word accesses. We could * do a "get_unaligned()" if this helps and is sufficiently * fast. * * - non-CONFIG_DEBUG_PAGEALLOC configurations (so that we * do not trap on the (extremely unlikely) case of a page * crossing operation. * * - Furthermore, we need an efficient 64-bit compile for the * 64-bit case in order to generate the "number of bytes in * the final mask". Again, that could be replaced with a * efficient population count instruction or similar. */ #ifdef CONFIG_DCACHE_WORD_ACCESS #include <asm/word-at-a-time.h> #ifdef HASH_MIX /* Architecture provides HASH_MIX and fold_hash() in <asm/hash.h> */ #elif defined(CONFIG_64BIT) /* * Register pressure in the mixing function is an issue, particularly * on 32-bit x86, but almost any function requires one state value and * one temporary. Instead, use a function designed for two state values * and no temporaries. * * This function cannot create a collision in only two iterations, so * we have two iterations to achieve avalanche. In those two iterations, * we have six layers of mixing, which is enough to spread one bit's * influence out to 2^6 = 64 state bits. * * Rotate constants are scored by considering either 64 one-bit input * deltas or 64*63/2 = 2016 two-bit input deltas, and finding the * probability of that delta causing a change to each of the 128 output * bits, using a sample of random initial states. * * The Shannon entropy of the computed probabilities is then summed * to produce a score. Ideally, any input change has a 50% chance of * toggling any given output bit. * * Mixing scores (in bits) for (12,45): * Input delta: 1-bit 2-bit * 1 round: 713.3 42542.6 * 2 rounds: 2753.7 140389.8 * 3 rounds: 5954.1 233458.2 * 4 rounds: 7862.6 256672.2 * Perfect: 8192 258048 * (64*128) (64*63/2 * 128) */ #define HASH_MIX(x, y, a) \ ( x ^= (a), \ y ^= x, x = rol64(x,12),\ x += y, y = rol64(y,45),\ y *= 9 ) /* * Fold two longs into one 32-bit hash value. This must be fast, but * latency isn't quite as critical, as there is a fair bit of additional * work done before the hash value is used. */ static inline unsigned int fold_hash(unsigned long x, unsigned long y) { y ^= x * GOLDEN_RATIO_64; y *= GOLDEN_RATIO_64; return y >> 32; } #else /* 32-bit case */ /* * Mixing scores (in bits) for (7,20): * Input delta: 1-bit 2-bit * 1 round: 330.3 9201.6 * 2 rounds: 1246.4 25475.4 * 3 rounds: 1907.1 31295.1 * 4 rounds: 2042.3 31718.6 * Perfect: 2048 31744 * (32*64) (32*31/2 * 64) */ #define HASH_MIX(x, y, a) \ ( x ^= (a), \ y ^= x, x = rol32(x, 7),\ x += y, y = rol32(y,20),\ y *= 9 ) static inline unsigned int fold_hash(unsigned long x, unsigned long y) { /* Use arch-optimized multiply if one exists */ return __hash_32(y ^ __hash_32(x)); } #endif /* * Return the hash of a string of known length. This is carfully * designed to match hash_name(), which is the more critical function. * In particular, we must end by hashing a final word containing 0..7 * payload bytes, to match the way that hash_name() iterates until it * finds the delimiter after the name. */ unsigned int full_name_hash(const void *salt, const char *name, unsigned int len) { unsigned long a, x = 0, y = (unsigned long)salt; for (;;) { if (!len) goto done; a = load_unaligned_zeropad(name); if (len < sizeof(unsigned long)) break; HASH_MIX(x, y, a); name += sizeof(unsigned long); len -= sizeof(unsigned long); } x ^= a & bytemask_from_count(len); done: return fold_hash(x, y); } EXPORT_SYMBOL(full_name_hash); /* Return the "hash_len" (hash and length) of a null-terminated string */ u64 hashlen_string(const void *salt, const char *name) { unsigned long a = 0, x = 0, y = (unsigned long)salt; unsigned long adata, mask, len; const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; len = 0; goto inside; do { HASH_MIX(x, y, a); len += sizeof(unsigned long); inside: a = load_unaligned_zeropad(name+len); } while (!has_zero(a, &adata, &constants)); adata = prep_zero_mask(a, adata, &constants); mask = create_zero_mask(adata); x ^= a & zero_bytemask(mask); return hashlen_create(fold_hash(x, y), len + find_zero(mask)); } EXPORT_SYMBOL(hashlen_string); /* * Calculate the length and hash of the path component, and * return the length as the result. */ static inline const char *hash_name(struct nameidata *nd, const char *name, unsigned long *lastword) { unsigned long a, b, x, y = (unsigned long)nd->path.dentry; unsigned long adata, bdata, mask, len; const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; /* * The first iteration is special, because it can result in * '.' and '..' and has no mixing other than the final fold. */ a = load_unaligned_zeropad(name); b = a ^ REPEAT_BYTE('/'); if (has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants)) { adata = prep_zero_mask(a, adata, &constants); bdata = prep_zero_mask(b, bdata, &constants); mask = create_zero_mask(adata | bdata); a &= zero_bytemask(mask); *lastword = a; len = find_zero(mask); nd->last.hash = fold_hash(a, y); nd->last.len = len; return name + len; } len = 0; x = 0; do { HASH_MIX(x, y, a); len += sizeof(unsigned long); a = load_unaligned_zeropad(name+len); b = a ^ REPEAT_BYTE('/'); } while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants))); adata = prep_zero_mask(a, adata, &constants); bdata = prep_zero_mask(b, bdata, &constants); mask = create_zero_mask(adata | bdata); a &= zero_bytemask(mask); x ^= a; len += find_zero(mask); *lastword = 0; // Multi-word components cannot be DOT or DOTDOT nd->last.hash = fold_hash(x, y); nd->last.len = len; return name + len; } /* * Note that the 'last' word is always zero-masked, but * was loaded as a possibly big-endian word. */ #ifdef __BIG_ENDIAN #define LAST_WORD_IS_DOT (0x2eul << (BITS_PER_LONG-8)) #define LAST_WORD_IS_DOTDOT (0x2e2eul << (BITS_PER_LONG-16)) #endif #else /* !CONFIG_DCACHE_WORD_ACCESS: Slow, byte-at-a-time version */ /* Return the hash of a string of known length */ unsigned int full_name_hash(const void *salt, const char *name, unsigned int len) { unsigned long hash = init_name_hash(salt); while (len--) hash = partial_name_hash((unsigned char)*name++, hash); return end_name_hash(hash); } EXPORT_SYMBOL(full_name_hash); /* Return the "hash_len" (hash and length) of a null-terminated string */ u64 hashlen_string(const void *salt, const char *name) { unsigned long hash = init_name_hash(salt); unsigned long len = 0, c; c = (unsigned char)*name; while (c) { len++; hash = partial_name_hash(c, hash); c = (unsigned char)name[len]; } return hashlen_create(end_name_hash(hash), len); } EXPORT_SYMBOL(hashlen_string); /* * We know there's a real path component here of at least * one character. */ static inline const char *hash_name(struct nameidata *nd, const char *name, unsigned long *lastword) { unsigned long hash = init_name_hash(nd->path.dentry); unsigned long len = 0, c, last = 0; c = (unsigned char)*name; do { last = (last << 8) + c; len++; hash = partial_name_hash(c, hash); c = (unsigned char)name[len]; } while (c && c != '/'); // This is reliable for DOT or DOTDOT, since the component // cannot contain NUL characters - top bits being zero means // we cannot have had any other pathnames. *lastword = last; nd->last.hash = end_name_hash(hash); nd->last.len = len; return name + len; } #endif #ifndef LAST_WORD_IS_DOT #define LAST_WORD_IS_DOT 0x2e #define LAST_WORD_IS_DOTDOT 0x2e2e #endif /* * Name resolution. * This is the basic name resolution function, turning a pathname into * the final dentry. We expect 'base' to be positive and a directory. * * Returns 0 and nd will have valid dentry and mnt on success. * Returns error and drops reference to input namei data on failure. */ static int link_path_walk(const char *name, struct nameidata *nd) { int depth = 0; // depth <= nd->depth int err; nd->last_type = LAST_ROOT; nd->flags |= LOOKUP_PARENT; if (IS_ERR(name)) return PTR_ERR(name); while (*name=='/') name++; if (!*name) { nd->dir_mode = 0; // short-circuit the 'hardening' idiocy return 0; } /* At this point we know we have a real path component. */ for(;;) { struct mnt_idmap *idmap; const char *link; unsigned long lastword; idmap = mnt_idmap(nd->path.mnt); err = may_lookup(idmap, nd); if (err) return err; nd->last.name = name; name = hash_name(nd, name, &lastword); switch(lastword) { case LAST_WORD_IS_DOTDOT: nd->last_type = LAST_DOTDOT; nd->state |= ND_JUMPED; break; case LAST_WORD_IS_DOT: nd->last_type = LAST_DOT; break; default: nd->last_type = LAST_NORM; nd->state &= ~ND_JUMPED; struct dentry *parent = nd->path.dentry; if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { err = parent->d_op->d_hash(parent, &nd->last); if (err < 0) return err; } } if (!*name) goto OK; /* * If it wasn't NUL, we know it was '/'. Skip that * slash, and continue until no more slashes. */ do { name++; } while (unlikely(*name == '/')); if (unlikely(!*name)) { OK: /* pathname or trailing symlink, done */ if (!depth) { nd->dir_vfsuid = i_uid_into_vfsuid(idmap, nd->inode); nd->dir_mode = nd->inode->i_mode; nd->flags &= ~LOOKUP_PARENT; return 0; } /* last component of nested symlink */ name = nd->stack[--depth].name; link = walk_component(nd, 0); } else { /* not the last component */ link = walk_component(nd, WALK_MORE); } if (unlikely(link)) { if (IS_ERR(link)) return PTR_ERR(link); /* a symlink to follow */ nd->stack[depth++].name = name; name = link; continue; } if (unlikely(!d_can_lookup(nd->path.dentry))) { if (nd->flags & LOOKUP_RCU) { if (!try_to_unlazy(nd)) return -ECHILD; } return -ENOTDIR; } } } /* must be paired with terminate_walk() */ static const char *path_init(struct nameidata *nd, unsigned flags) { int error; const char *s = nd->pathname; /* LOOKUP_CACHED requires RCU, ask caller to retry */ if ((flags & (LOOKUP_RCU | LOOKUP_CACHED)) == LOOKUP_CACHED) return ERR_PTR(-EAGAIN); if (!*s) flags &= ~LOOKUP_RCU; if (flags & LOOKUP_RCU) rcu_read_lock(); else nd->seq = nd->next_seq = 0; nd->flags = flags; nd->state |= ND_JUMPED; nd->m_seq = __read_seqcount_begin(&mount_lock.seqcount); nd->r_seq = __read_seqcount_begin(&rename_lock.seqcount); smp_rmb(); if (nd->state & ND_ROOT_PRESET) { struct dentry *root = nd->root.dentry; struct inode *inode = root->d_inode; if (*s && unlikely(!d_can_lookup(root))) return ERR_PTR(-ENOTDIR); nd->path = nd->root; nd->inode = inode; if (flags & LOOKUP_RCU) { nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); nd->root_seq = nd->seq; } else { path_get(&nd->path); } return s; } nd->root.mnt = NULL; /* Absolute pathname -- fetch the root (LOOKUP_IN_ROOT uses nd->dfd). */ if (*s == '/' && !(flags & LOOKUP_IN_ROOT)) { error = nd_jump_root(nd); if (unlikely(error)) return ERR_PTR(error); return s; } /* Relative pathname -- get the starting-point it is relative to. */ if (nd->dfd == AT_FDCWD) { if (flags & LOOKUP_RCU) { struct fs_struct *fs = current->fs; unsigned seq; do { seq = read_seqcount_begin(&fs->seq); nd->path = fs->pwd; nd->inode = nd->path.dentry->d_inode; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); } while (read_seqcount_retry(&fs->seq, seq)); } else { get_fs_pwd(current->fs, &nd->path); nd->inode = nd->path.dentry->d_inode; } } else { /* Caller must check execute permissions on the starting path component */ CLASS(fd_raw, f)(nd->dfd); struct dentry *dentry; if (fd_empty(f)) return ERR_PTR(-EBADF); if (flags & LOOKUP_LINKAT_EMPTY) { if (fd_file(f)->f_cred != current_cred() && !ns_capable(fd_file(f)->f_cred->user_ns, CAP_DAC_READ_SEARCH)) return ERR_PTR(-ENOENT); } dentry = fd_file(f)->f_path.dentry; if (*s && unlikely(!d_can_lookup(dentry))) return ERR_PTR(-ENOTDIR); nd->path = fd_file(f)->f_path; if (flags & LOOKUP_RCU) { nd->inode = nd->path.dentry->d_inode; nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); } else { path_get(&nd->path); nd->inode = nd->path.dentry->d_inode; } } /* For scoped-lookups we need to set the root to the dirfd as well. */ if (flags & LOOKUP_IS_SCOPED) { nd->root = nd->path; if (flags & LOOKUP_RCU) { nd->root_seq = nd->seq; } else { path_get(&nd->root); nd->state |= ND_ROOT_GRABBED; } } return s; } static inline const char *lookup_last(struct nameidata *nd) { if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len]) nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; return walk_component(nd, WALK_TRAILING); } static int handle_lookup_down(struct nameidata *nd) { if (!(nd->flags & LOOKUP_RCU)) dget(nd->path.dentry); nd->next_seq = nd->seq; return PTR_ERR(step_into(nd, WALK_NOFOLLOW, nd->path.dentry)); } /* Returns 0 and nd will be valid on success; Returns error, otherwise. */ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path) { const char *s = path_init(nd, flags); int err; if (unlikely(flags & LOOKUP_DOWN) && !IS_ERR(s)) { err = handle_lookup_down(nd); if (unlikely(err < 0)) s = ERR_PTR(err); } while (!(err = link_path_walk(s, nd)) && (s = lookup_last(nd)) != NULL) ; if (!err && unlikely(nd->flags & LOOKUP_MOUNTPOINT)) { err = handle_lookup_down(nd); nd->state &= ~ND_JUMPED; // no d_weak_revalidate(), please... } if (!err) err = complete_walk(nd); if (!err && nd->flags & LOOKUP_DIRECTORY) if (!d_can_lookup(nd->path.dentry)) err = -ENOTDIR; if (!err) { *path = nd->path; nd->path.mnt = NULL; nd->path.dentry = NULL; } terminate_walk(nd); return err; } int filename_lookup(int dfd, struct filename *name, unsigned flags, struct path *path, struct path *root) { int retval; struct nameidata nd; if (IS_ERR(name)) return PTR_ERR(name); set_nameidata(&nd, dfd, name, root); retval = path_lookupat(&nd, flags | LOOKUP_RCU, path); if (unlikely(retval == -ECHILD)) retval = path_lookupat(&nd, flags, path); if (unlikely(retval == -ESTALE)) retval = path_lookupat(&nd, flags | LOOKUP_REVAL, path); if (likely(!retval)) audit_inode(name, path->dentry, flags & LOOKUP_MOUNTPOINT ? AUDIT_INODE_NOEVAL : 0); restore_nameidata(); return retval; } /* Returns 0 and nd will be valid on success; Returns error, otherwise. */ static int path_parentat(struct nameidata *nd, unsigned flags, struct path *parent) { const char *s = path_init(nd, flags); int err = link_path_walk(s, nd); if (!err) err = complete_walk(nd); if (!err) { *parent = nd->path; nd->path.mnt = NULL; nd->path.dentry = NULL; } terminate_walk(nd); return err; } /* Note: this does not consume "name" */ static int __filename_parentat(int dfd, struct filename *name, unsigned int flags, struct path *parent, struct qstr *last, int *type, const struct path *root) { int retval; struct nameidata nd; if (IS_ERR(name)) return PTR_ERR(name); set_nameidata(&nd, dfd, name, root); retval = path_parentat(&nd, flags | LOOKUP_RCU, parent); if (unlikely(retval == -ECHILD)) retval = path_parentat(&nd, flags, parent); if (unlikely(retval == -ESTALE)) retval = path_parentat(&nd, flags | LOOKUP_REVAL, parent); if (likely(!retval)) { *last = nd.last; *type = nd.last_type; audit_inode(name, parent->dentry, AUDIT_INODE_PARENT); } restore_nameidata(); return retval; } static int filename_parentat(int dfd, struct filename *name, unsigned int flags, struct path *parent, struct qstr *last, int *type) { return __filename_parentat(dfd, name, flags, parent, last, type, NULL); } /* does lookup, returns the object with parent locked */ static struct dentry *__kern_path_locked(int dfd, struct filename *name, struct path *path) { struct dentry *d; struct qstr last; int type, error; error = filename_parentat(dfd, name, 0, path, &last, &type); if (error) return ERR_PTR(error); if (unlikely(type != LAST_NORM)) { path_put(path); return ERR_PTR(-EINVAL); } inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT); d = lookup_one_qstr_excl(&last, path->dentry, 0); if (IS_ERR(d)) { inode_unlock(path->dentry->d_inode); path_put(path); } return d; } struct dentry *kern_path_locked(const char *name, struct path *path) { struct filename *filename = getname_kernel(name); struct dentry *res = __kern_path_locked(AT_FDCWD, filename, path); putname(filename); return res; } struct dentry *user_path_locked_at(int dfd, const char __user *name, struct path *path) { struct filename *filename = getname(name); struct dentry *res = __kern_path_locked(dfd, filename, path); putname(filename); return res; } EXPORT_SYMBOL(user_path_locked_at); int kern_path(const char *name, unsigned int flags, struct path *path) { struct filename *filename = getname_kernel(name); int ret = filename_lookup(AT_FDCWD, filename, flags, path, NULL); putname(filename); return ret; } EXPORT_SYMBOL(kern_path); /** * vfs_path_parent_lookup - lookup a parent path relative to a dentry-vfsmount pair * @filename: filename structure * @flags: lookup flags * @parent: pointer to struct path to fill * @last: last component * @type: type of the last component * @root: pointer to struct path of the base directory */ int vfs_path_parent_lookup(struct filename *filename, unsigned int flags, struct path *parent, struct qstr *last, int *type, const struct path *root) { return __filename_parentat(AT_FDCWD, filename, flags, parent, last, type, root); } EXPORT_SYMBOL(vfs_path_parent_lookup); /** * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair * @dentry: pointer to dentry of the base directory * @mnt: pointer to vfs mount of the base directory * @name: pointer to file name * @flags: lookup flags * @path: pointer to struct path to fill */ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, const char *name, unsigned int flags, struct path *path) { struct filename *filename; struct path root = {.mnt = mnt, .dentry = dentry}; int ret; filename = getname_kernel(name); /* the first argument of filename_lookup() is ignored with root */ ret = filename_lookup(AT_FDCWD, filename, flags, path, &root); putname(filename); return ret; } EXPORT_SYMBOL(vfs_path_lookup); static int lookup_one_common(struct mnt_idmap *idmap, const char *name, struct dentry *base, int len, struct qstr *this) { this->name = name; this->len = len; this->hash = full_name_hash(base, name, len); if (!len) return -EACCES; if (is_dot_dotdot(name, len)) return -EACCES; while (len--) { unsigned int c = *(const unsigned char *)name++; if (c == '/' || c == '\0') return -EACCES; } /* * See if the low-level filesystem might want * to use its own hash.. */ if (base->d_flags & DCACHE_OP_HASH) { int err = base->d_op->d_hash(base, this); if (err < 0) return err; } return inode_permission(idmap, base->d_inode, MAY_EXEC); } /** * try_lookup_one_len - filesystem helper to lookup single pathname component * @name: pathname component to lookup * @base: base directory to lookup from * @len: maximum length @len should be interpreted to * * Look up a dentry by name in the dcache, returning NULL if it does not * currently exist. The function does not try to create a dentry. * * Note that this routine is purely a helper for filesystem usage and should * not be called by generic code. * * No locks need be held - only a counted reference to @base is needed. * */ struct dentry *try_lookup_one_len(const char *name, struct dentry *base, int len) { struct qstr this; int err; err = lookup_one_common(&nop_mnt_idmap, name, base, len, &this); if (err) return ERR_PTR(err); return lookup_dcache(&this, base, 0); } EXPORT_SYMBOL(try_lookup_one_len); /** * lookup_one_len - filesystem helper to lookup single pathname component * @name: pathname component to lookup * @base: base directory to lookup from * @len: maximum length @len should be interpreted to * * Note that this routine is purely a helper for filesystem usage and should * not be called by generic code. * * The caller must hold base->i_mutex. */ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) { struct dentry *dentry; struct qstr this; int err; WARN_ON_ONCE(!inode_is_locked(base->d_inode)); err = lookup_one_common(&nop_mnt_idmap, name, base, len, &this); if (err) return ERR_PTR(err); dentry = lookup_dcache(&this, base, 0); return dentry ? dentry : __lookup_slow(&this, base, 0); } EXPORT_SYMBOL(lookup_one_len); /** * lookup_one - filesystem helper to lookup single pathname component * @idmap: idmap of the mount the lookup is performed from * @name: pathname component to lookup * @base: base directory to lookup from * @len: maximum length @len should be interpreted to * * Note that this routine is purely a helper for filesystem usage and should * not be called by generic code. * * The caller must hold base->i_mutex. */ struct dentry *lookup_one(struct mnt_idmap *idmap, const char *name, struct dentry *base, int len) { struct dentry *dentry; struct qstr this; int err; WARN_ON_ONCE(!inode_is_locked(base->d_inode)); err = lookup_one_common(idmap, name, base, len, &this); if (err) return ERR_PTR(err); dentry = lookup_dcache(&this, base, 0); return dentry ? dentry : __lookup_slow(&this, base, 0); } EXPORT_SYMBOL(lookup_one); /** * lookup_one_unlocked - filesystem helper to lookup single pathname component * @idmap: idmap of the mount the lookup is performed from * @name: pathname component to lookup * @base: base directory to lookup from * @len: maximum length @len should be interpreted to * * Note that this routine is purely a helper for filesystem usage and should * not be called by generic code. * * Unlike lookup_one_len, it should be called without the parent * i_mutex held, and will take the i_mutex itself if necessary. */ struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap, const char *name, struct dentry *base, int len) { struct qstr this; int err; struct dentry *ret; err = lookup_one_common(idmap, name, base, len, &this); if (err) return ERR_PTR(err); ret = lookup_dcache(&this, base, 0); if (!ret) ret = lookup_slow(&this, base, 0); return ret; } EXPORT_SYMBOL(lookup_one_unlocked); /** * lookup_one_positive_unlocked - filesystem helper to lookup single * pathname component * @idmap: idmap of the mount the lookup is performed from * @name: pathname component to lookup * @base: base directory to lookup from * @len: maximum length @len should be interpreted to * * This helper will yield ERR_PTR(-ENOENT) on negatives. The helper returns * known positive or ERR_PTR(). This is what most of the users want. * * Note that pinned negative with unlocked parent _can_ become positive at any * time, so callers of lookup_one_unlocked() need to be very careful; pinned * positives have >d_inode stable, so this one avoids such problems. * * Note that this routine is purely a helper for filesystem usage and should * not be called by generic code. * * The helper should be called without i_mutex held. */ struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap, const char *name, struct dentry *base, int len) { struct dentry *ret = lookup_one_unlocked(idmap, name, base, len); if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) { dput(ret); ret = ERR_PTR(-ENOENT); } return ret; } EXPORT_SYMBOL(lookup_one_positive_unlocked); /** * lookup_one_len_unlocked - filesystem helper to lookup single pathname component * @name: pathname component to lookup * @base: base directory to lookup from * @len: maximum length @len should be interpreted to * * Note that this routine is purely a helper for filesystem usage and should * not be called by generic code. * * Unlike lookup_one_len, it should be called without the parent * i_mutex held, and will take the i_mutex itself if necessary. */ struct dentry *lookup_one_len_unlocked(const char *name, struct dentry *base, int len) { return lookup_one_unlocked(&nop_mnt_idmap, name, base, len); } EXPORT_SYMBOL(lookup_one_len_unlocked); /* * Like lookup_one_len_unlocked(), except that it yields ERR_PTR(-ENOENT) * on negatives. Returns known positive or ERR_PTR(); that's what * most of the users want. Note that pinned negative with unlocked parent * _can_ become positive at any time, so callers of lookup_one_len_unlocked() * need to be very careful; pinned positives have ->d_inode stable, so * this one avoids such problems. */ struct dentry *lookup_positive_unlocked(const char *name, struct dentry *base, int len) { return lookup_one_positive_unlocked(&nop_mnt_idmap, name, base, len); } EXPORT_SYMBOL(lookup_positive_unlocked); #ifdef CONFIG_UNIX98_PTYS int path_pts(struct path *path) { /* Find something mounted on "pts" in the same directory as * the input path. */ struct dentry *parent = dget_parent(path->dentry); struct dentry *child; struct qstr this = QSTR_INIT("pts", 3); if (unlikely(!path_connected(path->mnt, parent))) { dput(parent); return -ENOENT; } dput(path->dentry); path->dentry = parent; child = d_hash_and_lookup(parent, &this); if (IS_ERR_OR_NULL(child)) return -ENOENT; path->dentry = child; dput(parent); follow_down(path, 0); return 0; } #endif int user_path_at(int dfd, const char __user *name, unsigned flags, struct path *path) { struct filename *filename = getname_flags(name, flags); int ret = filename_lookup(dfd, filename, flags, path, NULL); putname(filename); return ret; } EXPORT_SYMBOL(user_path_at); int __check_sticky(struct mnt_idmap *idmap, struct inode *dir, struct inode *inode) { kuid_t fsuid = current_fsuid(); if (vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), fsuid)) return 0; if (vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, dir), fsuid)) return 0; return !capable_wrt_inode_uidgid(idmap, inode, CAP_FOWNER); } EXPORT_SYMBOL(__check_sticky); /* * Check whether we can remove a link victim from directory dir, check * whether the type of victim is right. * 1. We can't do it if dir is read-only (done in permission()) * 2. We should have write and exec permissions on dir * 3. We can't remove anything from append-only dir * 4. We can't do anything with immutable dir (done in permission()) * 5. If the sticky bit on dir is set we should either * a. be owner of dir, or * b. be owner of victim, or * c. have CAP_FOWNER capability * 6. If the victim is append-only or immutable we can't do antyhing with * links pointing to it. * 7. If the victim has an unknown uid or gid we can't change the inode. * 8. If we were asked to remove a directory and victim isn't one - ENOTDIR. * 9. If we were asked to remove a non-directory and victim isn't one - EISDIR. * 10. We can't remove a root or mountpoint. * 11. We don't allow removal of NFS sillyrenamed files; it's handled by * nfs_async_unlink(). */ static int may_delete(struct mnt_idmap *idmap, struct inode *dir, struct dentry *victim, bool isdir) { struct inode *inode = d_backing_inode(victim); int error; if (d_is_negative(victim)) return -ENOENT; BUG_ON(!inode); BUG_ON(victim->d_parent->d_inode != dir); /* Inode writeback is not safe when the uid or gid are invalid. */ if (!vfsuid_valid(i_uid_into_vfsuid(idmap, inode)) || !vfsgid_valid(i_gid_into_vfsgid(idmap, inode))) return -EOVERFLOW; audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); error = inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC); if (error) return error; if (IS_APPEND(dir)) return -EPERM; if (check_sticky(idmap, dir, inode) || IS_APPEND(inode) || IS_IMMUTABLE(inode) || IS_SWAPFILE(inode) || HAS_UNMAPPED_ID(idmap, inode)) return -EPERM; if (isdir) { if (!d_is_dir(victim)) return -ENOTDIR; if (IS_ROOT(victim)) return -EBUSY; } else if (d_is_dir(victim)) return -EISDIR; if (IS_DEADDIR(dir)) return -ENOENT; if (victim->d_flags & DCACHE_NFSFS_RENAMED) return -EBUSY; return 0; } /* Check whether we can create an object with dentry child in directory * dir. * 1. We can't do it if child already exists (open has special treatment for * this case, but since we are inlined it's OK) * 2. We can't do it if dir is read-only (done in permission()) * 3. We can't do it if the fs can't represent the fsuid or fsgid. * 4. We should have write and exec permissions on dir * 5. We can't do it if dir is immutable (done in permission()) */ static inline int may_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *child) { audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE); if (child->d_inode) return -EEXIST; if (IS_DEADDIR(dir)) return -ENOENT; if (!fsuidgid_has_mapping(dir->i_sb, idmap)) return -EOVERFLOW; return inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC); } // p1 != p2, both are on the same filesystem, ->s_vfs_rename_mutex is held static struct dentry *lock_two_directories(struct dentry *p1, struct dentry *p2) { struct dentry *p = p1, *q = p2, *r; while ((r = p->d_parent) != p2 && r != p) p = r; if (r == p2) { // p is a child of p2 and an ancestor of p1 or p1 itself inode_lock_nested(p2->d_inode, I_MUTEX_PARENT); inode_lock_nested(p1->d_inode, I_MUTEX_PARENT2); return p; } // p is the root of connected component that contains p1 // p2 does not occur on the path from p to p1 while ((r = q->d_parent) != p1 && r != p && r != q) q = r; if (r == p1) { // q is a child of p1 and an ancestor of p2 or p2 itself inode_lock_nested(p1->d_inode, I_MUTEX_PARENT); inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2); return q; } else if (likely(r == p)) { // both p2 and p1 are descendents of p inode_lock_nested(p1->d_inode, I_MUTEX_PARENT); inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2); return NULL; } else { // no common ancestor at the time we'd been called mutex_unlock(&p1->d_sb->s_vfs_rename_mutex); return ERR_PTR(-EXDEV); } } /* * p1 and p2 should be directories on the same fs. */ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) { if (p1 == p2) { inode_lock_nested(p1->d_inode, I_MUTEX_PARENT); return NULL; } mutex_lock(&p1->d_sb->s_vfs_rename_mutex); return lock_two_directories(p1, p2); } EXPORT_SYMBOL(lock_rename); /* * c1 and p2 should be on the same fs. */ struct dentry *lock_rename_child(struct dentry *c1, struct dentry *p2) { if (READ_ONCE(c1->d_parent) == p2) { /* * hopefully won't need to touch ->s_vfs_rename_mutex at all. */ inode_lock_nested(p2->d_inode, I_MUTEX_PARENT); /* * now that p2 is locked, nobody can move in or out of it, * so the test below is safe. */ if (likely(c1->d_parent == p2)) return NULL; /* * c1 got moved out of p2 while we'd been taking locks; * unlock and fall back to slow case. */ inode_unlock(p2->d_inode); } mutex_lock(&c1->d_sb->s_vfs_rename_mutex); /* * nobody can move out of any directories on this fs. */ if (likely(c1->d_parent != p2)) return lock_two_directories(c1->d_parent, p2); /* * c1 got moved into p2 while we were taking locks; * we need p2 locked and ->s_vfs_rename_mutex unlocked, * for consistency with lock_rename(). */ inode_lock_nested(p2->d_inode, I_MUTEX_PARENT); mutex_unlock(&c1->d_sb->s_vfs_rename_mutex); return NULL; } EXPORT_SYMBOL(lock_rename_child); void unlock_rename(struct dentry *p1, struct dentry *p2) { inode_unlock(p1->d_inode); if (p1 != p2) { inode_unlock(p2->d_inode); mutex_unlock(&p1->d_sb->s_vfs_rename_mutex); } } EXPORT_SYMBOL(unlock_rename); /** * vfs_prepare_mode - prepare the mode to be used for a new inode * @idmap: idmap of the mount the inode was found from * @dir: parent directory of the new inode * @mode: mode of the new inode * @mask_perms: allowed permission by the vfs * @type: type of file to be created * * This helper consolidates and enforces vfs restrictions on the @mode of a new * object to be created. * * Umask stripping depends on whether the filesystem supports POSIX ACLs (see * the kernel documentation for mode_strip_umask()). Moving umask stripping * after setgid stripping allows the same ordering for both non-POSIX ACL and * POSIX ACL supporting filesystems. * * Note that it's currently valid for @type to be 0 if a directory is created. * Filesystems raise that flag individually and we need to check whether each * filesystem can deal with receiving S_IFDIR from the vfs before we enforce a * non-zero type. * * Returns: mode to be passed to the filesystem */ static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap, const struct inode *dir, umode_t mode, umode_t mask_perms, umode_t type) { mode = mode_strip_sgid(idmap, dir, mode); mode = mode_strip_umask(dir, mode); /* * Apply the vfs mandated allowed permission mask and set the type of * file to be created before we call into the filesystem. */ mode &= (mask_perms & ~S_IFMT); mode |= (type & S_IFMT); return mode; } /** * vfs_create - create new file * @idmap: idmap of the mount the inode was found from * @dir: inode of the parent directory * @dentry: dentry of the child file * @mode: mode of the child file * @want_excl: whether the file must not yet exist * * Create a new file. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ int vfs_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool want_excl) { int error; error = may_create(idmap, dir, dentry); if (error) return error; if (!dir->i_op->create) return -EACCES; /* shouldn't it be ENOSYS? */ mode = vfs_prepare_mode(idmap, dir, mode, S_IALLUGO, S_IFREG); error = security_inode_create(dir, dentry, mode); if (error) return error; error = dir->i_op->create(idmap, dir, dentry, mode, want_excl); if (!error) fsnotify_create(dir, dentry); return error; } EXPORT_SYMBOL(vfs_create); int vfs_mkobj(struct dentry *dentry, umode_t mode, int (*f)(struct dentry *, umode_t, void *), void *arg) { struct inode *dir = dentry->d_parent->d_inode; int error = may_create(&nop_mnt_idmap, dir, dentry); if (error) return error; mode &= S_IALLUGO; mode |= S_IFREG; error = security_inode_create(dir, dentry, mode); if (error) return error; error = f(dentry, mode, arg); if (!error) fsnotify_create(dir, dentry); return error; } EXPORT_SYMBOL(vfs_mkobj); bool may_open_dev(const struct path *path) { return !(path->mnt->mnt_flags & MNT_NODEV) && !(path->mnt->mnt_sb->s_iflags & SB_I_NODEV); } static int may_open(struct mnt_idmap *idmap, const struct path *path, int acc_mode, int flag) { struct dentry *dentry = path->dentry; struct inode *inode = dentry->d_inode; int error; if (!inode) return -ENOENT; switch (inode->i_mode & S_IFMT) { case S_IFLNK: return -ELOOP; case S_IFDIR: if (acc_mode & MAY_WRITE) return -EISDIR; if (acc_mode & MAY_EXEC) return -EACCES; break; case S_IFBLK: case S_IFCHR: if (!may_open_dev(path)) return -EACCES; fallthrough; case S_IFIFO: case S_IFSOCK: if (acc_mode & MAY_EXEC) return -EACCES; flag &= ~O_TRUNC; break; case S_IFREG: if ((acc_mode & MAY_EXEC) && path_noexec(path)) return -EACCES; break; default: VFS_BUG_ON_INODE(1, inode); } error = inode_permission(idmap, inode, MAY_OPEN | acc_mode); if (error) return error; /* * An append-only file must be opened in append mode for writing. */ if (IS_APPEND(inode)) { if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND)) return -EPERM; if (flag & O_TRUNC) return -EPERM; } /* O_NOATIME can only be set by the owner or superuser */ if (flag & O_NOATIME && !inode_owner_or_capable(idmap, inode)) return -EPERM; return 0; } static int handle_truncate(struct mnt_idmap *idmap, struct file *filp) { const struct path *path = &filp->f_path; struct inode *inode = path->dentry->d_inode; int error = get_write_access(inode); if (error) return error; error = security_file_truncate(filp); if (!error) { error = do_truncate(idmap, path->dentry, 0, ATTR_MTIME|ATTR_CTIME|ATTR_OPEN, filp); } put_write_access(inode); return error; } static inline int open_to_namei_flags(int flag) { if ((flag & O_ACCMODE) == 3) flag--; return flag; } static int may_o_create(struct mnt_idmap *idmap, const struct path *dir, struct dentry *dentry, umode_t mode) { int error = security_path_mknod(dir, dentry, mode, 0); if (error) return error; if (!fsuidgid_has_mapping(dir->dentry->d_sb, idmap)) return -EOVERFLOW; error = inode_permission(idmap, dir->dentry->d_inode, MAY_WRITE | MAY_EXEC); if (error) return error; return security_inode_create(dir->dentry->d_inode, dentry, mode); } /* * Attempt to atomically look up, create and open a file from a negative * dentry. * * Returns 0 if successful. The file will have been created and attached to * @file by the filesystem calling finish_open(). * * If the file was looked up only or didn't need creating, FMODE_OPENED won't * be set. The caller will need to perform the open themselves. @path will * have been updated to point to the new dentry. This may be negative. * * Returns an error code otherwise. */ static struct dentry *atomic_open(struct nameidata *nd, struct dentry *dentry, struct file *file, int open_flag, umode_t mode) { struct dentry *const DENTRY_NOT_SET = (void *) -1UL; struct inode *dir = nd->path.dentry->d_inode; int error; if (nd->flags & LOOKUP_DIRECTORY) open_flag |= O_DIRECTORY; file->f_path.dentry = DENTRY_NOT_SET; file->f_path.mnt = nd->path.mnt; error = dir->i_op->atomic_open(dir, dentry, file, open_to_namei_flags(open_flag), mode); d_lookup_done(dentry); if (!error) { if (file->f_mode & FMODE_OPENED) { if (unlikely(dentry != file->f_path.dentry)) { dput(dentry); dentry = dget(file->f_path.dentry); } } else if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) { error = -EIO; } else { if (file->f_path.dentry) { dput(dentry); dentry = file->f_path.dentry; } if (unlikely(d_is_negative(dentry))) error = -ENOENT; } } if (error) { dput(dentry); dentry = ERR_PTR(error); } return dentry; } /* * Look up and maybe create and open the last component. * * Must be called with parent locked (exclusive in O_CREAT case). * * Returns 0 on success, that is, if * the file was successfully atomically created (if necessary) and opened, or * the file was not completely opened at this time, though lookups and * creations were performed. * These case are distinguished by presence of FMODE_OPENED on file->f_mode. * In the latter case dentry returned in @path might be negative if O_CREAT * hadn't been specified. * * An error code is returned on failure. */ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, const struct open_flags *op, bool got_write) { struct mnt_idmap *idmap; struct dentry *dir = nd->path.dentry; struct inode *dir_inode = dir->d_inode; int open_flag = op->open_flag; struct dentry *dentry; int error, create_error = 0; umode_t mode = op->mode; DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); if (unlikely(IS_DEADDIR(dir_inode))) return ERR_PTR(-ENOENT); file->f_mode &= ~FMODE_CREATED; dentry = d_lookup(dir, &nd->last); for (;;) { if (!dentry) { dentry = d_alloc_parallel(dir, &nd->last, &wq); if (IS_ERR(dentry)) return dentry; } if (d_in_lookup(dentry)) break; error = d_revalidate(dir_inode, &nd->last, dentry, nd->flags); if (likely(error > 0)) break; if (error) goto out_dput; d_invalidate(dentry); dput(dentry); dentry = NULL; } if (dentry->d_inode) { /* Cached positive dentry: will open in f_op->open */ return dentry; } if (open_flag & O_CREAT) audit_inode(nd->name, dir, AUDIT_INODE_PARENT); /* * Checking write permission is tricky, bacuse we don't know if we are * going to actually need it: O_CREAT opens should work as long as the * file exists. But checking existence breaks atomicity. The trick is * to check access and if not granted clear O_CREAT from the flags. * * Another problem is returing the "right" error value (e.g. for an * O_EXCL open we want to return EEXIST not EROFS). */ if (unlikely(!got_write)) open_flag &= ~O_TRUNC; idmap = mnt_idmap(nd->path.mnt); if (open_flag & O_CREAT) { if (open_flag & O_EXCL) open_flag &= ~O_TRUNC; mode = vfs_prepare_mode(idmap, dir->d_inode, mode, mode, mode); if (likely(got_write)) create_error = may_o_create(idmap, &nd->path, dentry, mode); else create_error = -EROFS; } if (create_error) open_flag &= ~O_CREAT; if (dir_inode->i_op->atomic_open) { dentry = atomic_open(nd, dentry, file, open_flag, mode); if (unlikely(create_error) && dentry == ERR_PTR(-ENOENT)) dentry = ERR_PTR(create_error); return dentry; } if (d_in_lookup(dentry)) { struct dentry *res = dir_inode->i_op->lookup(dir_inode, dentry, nd->flags); d_lookup_done(dentry); if (unlikely(res)) { if (IS_ERR(res)) { error = PTR_ERR(res); goto out_dput; } dput(dentry); dentry = res; } } /* Negative dentry, just create the file */ if (!dentry->d_inode && (open_flag & O_CREAT)) { file->f_mode |= FMODE_CREATED; audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE); if (!dir_inode->i_op->create) { error = -EACCES; goto out_dput; } error = dir_inode->i_op->create(idmap, dir_inode, dentry, mode, open_flag & O_EXCL); if (error) goto out_dput; } if (unlikely(create_error) && !dentry->d_inode) { error = create_error; goto out_dput; } return dentry; out_dput: dput(dentry); return ERR_PTR(error); } static inline bool trailing_slashes(struct nameidata *nd) { return (bool)nd->last.name[nd->last.len]; } static struct dentry *lookup_fast_for_open(struct nameidata *nd, int open_flag) { struct dentry *dentry; if (open_flag & O_CREAT) { if (trailing_slashes(nd)) return ERR_PTR(-EISDIR); /* Don't bother on an O_EXCL create */ if (open_flag & O_EXCL) return NULL; } if (trailing_slashes(nd)) nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; dentry = lookup_fast(nd); if (IS_ERR_OR_NULL(dentry)) return dentry; if (open_flag & O_CREAT) { /* Discard negative dentries. Need inode_lock to do the create */ if (!dentry->d_inode) { if (!(nd->flags & LOOKUP_RCU)) dput(dentry); dentry = NULL; } } return dentry; } static const char *open_last_lookups(struct nameidata *nd, struct file *file, const struct open_flags *op) { struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; bool got_write = false; struct dentry *dentry; const char *res; nd->flags |= op->intent; if (nd->last_type != LAST_NORM) { if (nd->depth) put_link(nd); return handle_dots(nd, nd->last_type); } /* We _can_ be in RCU mode here */ dentry = lookup_fast_for_open(nd, open_flag); if (IS_ERR(dentry)) return ERR_CAST(dentry); if (likely(dentry)) goto finish_lookup; if (!(open_flag & O_CREAT)) { if (WARN_ON_ONCE(nd->flags & LOOKUP_RCU)) return ERR_PTR(-ECHILD); } else { if (nd->flags & LOOKUP_RCU) { if (!try_to_unlazy(nd)) return ERR_PTR(-ECHILD); } } if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { got_write = !mnt_want_write(nd->path.mnt); /* * do _not_ fail yet - we might not need that or fail with * a different error; let lookup_open() decide; we'll be * dropping this one anyway. */ } if (open_flag & O_CREAT) inode_lock(dir->d_inode); else inode_lock_shared(dir->d_inode); dentry = lookup_open(nd, file, op, got_write); if (!IS_ERR(dentry)) { if (file->f_mode & FMODE_CREATED) fsnotify_create(dir->d_inode, dentry); if (file->f_mode & FMODE_OPENED) fsnotify_open(file); } if (open_flag & O_CREAT) inode_unlock(dir->d_inode); else inode_unlock_shared(dir->d_inode); if (got_write) mnt_drop_write(nd->path.mnt); if (IS_ERR(dentry)) return ERR_CAST(dentry); if (file->f_mode & (FMODE_OPENED | FMODE_CREATED)) { dput(nd->path.dentry); nd->path.dentry = dentry; return NULL; } finish_lookup: if (nd->depth) put_link(nd); res = step_into(nd, WALK_TRAILING, dentry); if (unlikely(res)) nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); return res; } /* * Handle the last step of open() */ static int do_open(struct nameidata *nd, struct file *file, const struct open_flags *op) { struct mnt_idmap *idmap; int open_flag = op->open_flag; bool do_truncate; int acc_mode; int error; if (!(file->f_mode & (FMODE_OPENED | FMODE_CREATED))) { error = complete_walk(nd); if (error) return error; } if (!(file->f_mode & FMODE_CREATED)) audit_inode(nd->name, nd->path.dentry, 0); idmap = mnt_idmap(nd->path.mnt); if (open_flag & O_CREAT) { if ((open_flag & O_EXCL) && !(file->f_mode & FMODE_CREATED)) return -EEXIST; if (d_is_dir(nd->path.dentry)) return -EISDIR; error = may_create_in_sticky(idmap, nd, d_backing_inode(nd->path.dentry)); if (unlikely(error)) return error; } if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry)) return -ENOTDIR; do_truncate = false; acc_mode = op->acc_mode; if (file->f_mode & FMODE_CREATED) { /* Don't check for write permission, don't truncate */ open_flag &= ~O_TRUNC; acc_mode = 0; } else if (d_is_reg(nd->path.dentry) && open_flag & O_TRUNC) { error = mnt_want_write(nd->path.mnt); if (error) return error; do_truncate = true; } error = may_open(idmap, &nd->path, acc_mode, open_flag); if (!error && !(file->f_mode & FMODE_OPENED)) error = vfs_open(&nd->path, file); if (!error) error = security_file_post_open(file, op->acc_mode); if (!error && do_truncate) error = handle_truncate(idmap, file); if (unlikely(error > 0)) { WARN_ON(1); error = -EINVAL; } if (do_truncate) mnt_drop_write(nd->path.mnt); return error; } /** * vfs_tmpfile - create tmpfile * @idmap: idmap of the mount the inode was found from * @parentpath: pointer to the path of the base directory * @file: file descriptor of the new tmpfile * @mode: mode of the new tmpfile * * Create a temporary file. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ int vfs_tmpfile(struct mnt_idmap *idmap, const struct path *parentpath, struct file *file, umode_t mode) { struct dentry *child; struct inode *dir = d_inode(parentpath->dentry); struct inode *inode; int error; int open_flag = file->f_flags; /* we want directory to be writable */ error = inode_permission(idmap, dir, MAY_WRITE | MAY_EXEC); if (error) return error; if (!dir->i_op->tmpfile) return -EOPNOTSUPP; child = d_alloc(parentpath->dentry, &slash_name); if (unlikely(!child)) return -ENOMEM; file->f_path.mnt = parentpath->mnt; file->f_path.dentry = child; mode = vfs_prepare_mode(idmap, dir, mode, mode, mode); error = dir->i_op->tmpfile(idmap, dir, file, mode); dput(child); if (file->f_mode & FMODE_OPENED) fsnotify_open(file); if (error) return error; /* Don't check for other permissions, the inode was just created */ error = may_open(idmap, &file->f_path, 0, file->f_flags); if (error) return error; inode = file_inode(file); if (!(open_flag & O_EXCL)) { spin_lock(&inode->i_lock); inode->i_state |= I_LINKABLE; spin_unlock(&inode->i_lock); } security_inode_post_create_tmpfile(idmap, inode); return 0; } /** * kernel_tmpfile_open - open a tmpfile for kernel internal use * @idmap: idmap of the mount the inode was found from * @parentpath: path of the base directory * @mode: mode of the new tmpfile * @open_flag: flags * @cred: credentials for open * * Create and open a temporary file. The file is not accounted in nr_files, * hence this is only for kernel internal use, and must not be installed into * file tables or such. */ struct file *kernel_tmpfile_open(struct mnt_idmap *idmap, const struct path *parentpath, umode_t mode, int open_flag, const struct cred *cred) { struct file *file; int error; file = alloc_empty_file_noaccount(open_flag, cred); if (IS_ERR(file)) return file; error = vfs_tmpfile(idmap, parentpath, file, mode); if (error) { fput(file); file = ERR_PTR(error); } return file; } EXPORT_SYMBOL(kernel_tmpfile_open); static int do_tmpfile(struct nameidata *nd, unsigned flags, const struct open_flags *op, struct file *file) { struct path path; int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path); if (unlikely(error)) return error; error = mnt_want_write(path.mnt); if (unlikely(error)) goto out; error = vfs_tmpfile(mnt_idmap(path.mnt), &path, file, op->mode); if (error) goto out2; audit_inode(nd->name, file->f_path.dentry, 0); out2: mnt_drop_write(path.mnt); out: path_put(&path); return error; } static int do_o_path(struct nameidata *nd, unsigned flags, struct file *file) { struct path path; int error = path_lookupat(nd, flags, &path); if (!error) { audit_inode(nd->name, path.dentry, 0); error = vfs_open(&path, file); path_put(&path); } return error; } static struct file *path_openat(struct nameidata *nd, const struct open_flags *op, unsigned flags) { struct file *file; int error; file = alloc_empty_file(op->open_flag, current_cred()); if (IS_ERR(file)) return file; if (unlikely(file->f_flags & __O_TMPFILE)) { error = do_tmpfile(nd, flags, op, file); } else if (unlikely(file->f_flags & O_PATH)) { error = do_o_path(nd, flags, file); } else { const char *s = path_init(nd, flags); while (!(error = link_path_walk(s, nd)) && (s = open_last_lookups(nd, file, op)) != NULL) ; if (!error) error = do_open(nd, file, op); terminate_walk(nd); } if (likely(!error)) { if (likely(file->f_mode & FMODE_OPENED)) return file; WARN_ON(1); error = -EINVAL; } fput_close(file); if (error == -EOPENSTALE) { if (flags & LOOKUP_RCU) error = -ECHILD; else error = -ESTALE; } return ERR_PTR(error); } struct file *do_filp_open(int dfd, struct filename *pathname, const struct open_flags *op) { struct nameidata nd; int flags = op->lookup_flags; struct file *filp; set_nameidata(&nd, dfd, pathname, NULL); filp = path_openat(&nd, op, flags | LOOKUP_RCU); if (unlikely(filp == ERR_PTR(-ECHILD))) filp = path_openat(&nd, op, flags); if (unlikely(filp == ERR_PTR(-ESTALE))) filp = path_openat(&nd, op, flags | LOOKUP_REVAL); restore_nameidata(); return filp; } struct file *do_file_open_root(const struct path *root, const char *name, const struct open_flags *op) { struct nameidata nd; struct file *file; struct filename *filename; int flags = op->lookup_flags; if (d_is_symlink(root->dentry) && op->intent & LOOKUP_OPEN) return ERR_PTR(-ELOOP); filename = getname_kernel(name); if (IS_ERR(filename)) return ERR_CAST(filename); set_nameidata(&nd, -1, filename, root); file = path_openat(&nd, op, flags | LOOKUP_RCU); if (unlikely(file == ERR_PTR(-ECHILD))) file = path_openat(&nd, op, flags); if (unlikely(file == ERR_PTR(-ESTALE))) file = path_openat(&nd, op, flags | LOOKUP_REVAL); restore_nameidata(); putname(filename); return file; } static struct dentry *filename_create(int dfd, struct filename *name, struct path *path, unsigned int lookup_flags) { struct dentry *dentry = ERR_PTR(-EEXIST); struct qstr last; bool want_dir = lookup_flags & LOOKUP_DIRECTORY; unsigned int reval_flag = lookup_flags & LOOKUP_REVAL; unsigned int create_flags = LOOKUP_CREATE | LOOKUP_EXCL; int type; int err2; int error; error = filename_parentat(dfd, name, reval_flag, path, &last, &type); if (error) return ERR_PTR(error); /* * Yucky last component or no last component at all? * (foo/., foo/.., /////) */ if (unlikely(type != LAST_NORM)) goto out; /* don't fail immediately if it's r/o, at least try to report other errors */ err2 = mnt_want_write(path->mnt); /* * Do the final lookup. Suppress 'create' if there is a trailing * '/', and a directory wasn't requested. */ if (last.name[last.len] && !want_dir) create_flags &= ~LOOKUP_CREATE; inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT); dentry = lookup_one_qstr_excl(&last, path->dentry, reval_flag | create_flags); if (IS_ERR(dentry)) goto unlock; if (unlikely(err2)) { error = err2; goto fail; } return dentry; fail: dput(dentry); dentry = ERR_PTR(error); unlock: inode_unlock(path->dentry->d_inode); if (!err2) mnt_drop_write(path->mnt); out: path_put(path); return dentry; } struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path, unsigned int lookup_flags) { struct filename *filename = getname_kernel(pathname); struct dentry *res = filename_create(dfd, filename, path, lookup_flags); putname(filename); return res; } EXPORT_SYMBOL(kern_path_create); void done_path_create(struct path *path, struct dentry *dentry) { if (!IS_ERR(dentry)) dput(dentry); inode_unlock(path->dentry->d_inode); mnt_drop_write(path->mnt); path_put(path); } EXPORT_SYMBOL(done_path_create); inline struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, unsigned int lookup_flags) { struct filename *filename = getname(pathname); struct dentry *res = filename_create(dfd, filename, path, lookup_flags); putname(filename); return res; } EXPORT_SYMBOL(user_path_create); /** * vfs_mknod - create device node or file * @idmap: idmap of the mount the inode was found from * @dir: inode of the parent directory * @dentry: dentry of the child device node * @mode: mode of the child device node * @dev: device number of device to create * * Create a device node or file. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV; int error = may_create(idmap, dir, dentry); if (error) return error; if ((S_ISCHR(mode) || S_ISBLK(mode)) && !is_whiteout && !capable(CAP_MKNOD)) return -EPERM; if (!dir->i_op->mknod) return -EPERM; mode = vfs_prepare_mode(idmap, dir, mode, mode, mode); error = devcgroup_inode_mknod(mode, dev); if (error) return error; error = security_inode_mknod(dir, dentry, mode, dev); if (error) return error; error = dir->i_op->mknod(idmap, dir, dentry, mode, dev); if (!error) fsnotify_create(dir, dentry); return error; } EXPORT_SYMBOL(vfs_mknod); static int may_mknod(umode_t mode) { switch (mode & S_IFMT) { case S_IFREG: case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: case 0: /* zero mode translates to S_IFREG */ return 0; case S_IFDIR: return -EPERM; default: return -EINVAL; } } static int do_mknodat(int dfd, struct filename *name, umode_t mode, unsigned int dev) { struct mnt_idmap *idmap; struct dentry *dentry; struct path path; int error; unsigned int lookup_flags = 0; error = may_mknod(mode); if (error) goto out1; retry: dentry = filename_create(dfd, name, &path, lookup_flags); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out1; error = security_path_mknod(&path, dentry, mode_strip_umask(path.dentry->d_inode, mode), dev); if (error) goto out2; idmap = mnt_idmap(path.mnt); switch (mode & S_IFMT) { case 0: case S_IFREG: error = vfs_create(idmap, path.dentry->d_inode, dentry, mode, true); if (!error) security_path_post_mknod(idmap, dentry); break; case S_IFCHR: case S_IFBLK: error = vfs_mknod(idmap, path.dentry->d_inode, dentry, mode, new_decode_dev(dev)); break; case S_IFIFO: case S_IFSOCK: error = vfs_mknod(idmap, path.dentry->d_inode, dentry, mode, 0); break; } out2: done_path_create(&path, dentry); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } out1: putname(name); return error; } SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, unsigned int, dev) { return do_mknodat(dfd, getname(filename), mode, dev); } SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev) { return do_mknodat(AT_FDCWD, getname(filename), mode, dev); } /** * vfs_mkdir - create directory returning correct dentry if possible * @idmap: idmap of the mount the inode was found from * @dir: inode of the parent directory * @dentry: dentry of the child directory * @mode: mode of the child directory * * Create a directory. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. * * In the event that the filesystem does not use the *@dentry but leaves it * negative or unhashes it and possibly splices a different one returning it, * the original dentry is dput() and the alternate is returned. * * In case of an error the dentry is dput() and an ERR_PTR() is returned. */ struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { int error; unsigned max_links = dir->i_sb->s_max_links; struct dentry *de; error = may_create(idmap, dir, dentry); if (error) goto err; error = -EPERM; if (!dir->i_op->mkdir) goto err; mode = vfs_prepare_mode(idmap, dir, mode, S_IRWXUGO | S_ISVTX, 0); error = security_inode_mkdir(dir, dentry, mode); if (error) goto err; error = -EMLINK; if (max_links && dir->i_nlink >= max_links) goto err; de = dir->i_op->mkdir(idmap, dir, dentry, mode); error = PTR_ERR(de); if (IS_ERR(de)) goto err; if (de) { dput(dentry); dentry = de; } fsnotify_mkdir(dir, dentry); return dentry; err: dput(dentry); return ERR_PTR(error); } EXPORT_SYMBOL(vfs_mkdir); int do_mkdirat(int dfd, struct filename *name, umode_t mode) { struct dentry *dentry; struct path path; int error; unsigned int lookup_flags = LOOKUP_DIRECTORY; retry: dentry = filename_create(dfd, name, &path, lookup_flags); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_putname; error = security_path_mkdir(&path, dentry, mode_strip_umask(path.dentry->d_inode, mode)); if (!error) { dentry = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode, dentry, mode); if (IS_ERR(dentry)) error = PTR_ERR(dentry); } done_path_create(&path, dentry); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } out_putname: putname(name); return error; } SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode) { return do_mkdirat(dfd, getname(pathname), mode); } SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode) { return do_mkdirat(AT_FDCWD, getname(pathname), mode); } /** * vfs_rmdir - remove directory * @idmap: idmap of the mount the inode was found from * @dir: inode of the parent directory * @dentry: dentry of the child directory * * Remove a directory. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry) { int error = may_delete(idmap, dir, dentry, 1); if (error) return error; if (!dir->i_op->rmdir) return -EPERM; dget(dentry); inode_lock(dentry->d_inode); error = -EBUSY; if (is_local_mountpoint(dentry) || (dentry->d_inode->i_flags & S_KERNEL_FILE)) goto out; error = security_inode_rmdir(dir, dentry); if (error) goto out; error = dir->i_op->rmdir(dir, dentry); if (error) goto out; shrink_dcache_parent(dentry); dentry->d_inode->i_flags |= S_DEAD; dont_mount(dentry); detach_mounts(dentry); out: inode_unlock(dentry->d_inode); dput(dentry); if (!error) d_delete_notify(dir, dentry); return error; } EXPORT_SYMBOL(vfs_rmdir); int do_rmdir(int dfd, struct filename *name) { int error; struct dentry *dentry; struct path path; struct qstr last; int type; unsigned int lookup_flags = 0; retry: error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type); if (error) goto exit1; switch (type) { case LAST_DOTDOT: error = -ENOTEMPTY; goto exit2; case LAST_DOT: error = -EINVAL; goto exit2; case LAST_ROOT: error = -EBUSY; goto exit2; } error = mnt_want_write(path.mnt); if (error) goto exit2; inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT); dentry = lookup_one_qstr_excl(&last, path.dentry, lookup_flags); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto exit3; error = security_path_rmdir(&path, dentry); if (error) goto exit4; error = vfs_rmdir(mnt_idmap(path.mnt), path.dentry->d_inode, dentry); exit4: dput(dentry); exit3: inode_unlock(path.dentry->d_inode); mnt_drop_write(path.mnt); exit2: path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } exit1: putname(name); return error; } SYSCALL_DEFINE1(rmdir, const char __user *, pathname) { return do_rmdir(AT_FDCWD, getname(pathname)); } /** * vfs_unlink - unlink a filesystem object * @idmap: idmap of the mount the inode was found from * @dir: parent directory * @dentry: victim * @delegated_inode: returns victim inode, if the inode is delegated. * * The caller must hold dir->i_mutex. * * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and * return a reference to the inode in delegated_inode. The caller * should then break the delegation on that inode and retry. Because * breaking a delegation may take a long time, the caller should drop * dir->i_mutex before doing so. * * Alternatively, a caller may pass NULL for delegated_inode. This may * be appropriate for callers that expect the underlying filesystem not * to be NFS exported. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, struct inode **delegated_inode) { struct inode *target = dentry->d_inode; int error = may_delete(idmap, dir, dentry, 0); if (error) return error; if (!dir->i_op->unlink) return -EPERM; inode_lock(target); if (IS_SWAPFILE(target)) error = -EPERM; else if (is_local_mountpoint(dentry)) error = -EBUSY; else { error = security_inode_unlink(dir, dentry); if (!error) { error = try_break_deleg(target, delegated_inode); if (error) goto out; error = dir->i_op->unlink(dir, dentry); if (!error) { dont_mount(dentry); detach_mounts(dentry); } } } out: inode_unlock(target); /* We don't d_delete() NFS sillyrenamed files--they still exist. */ if (!error && dentry->d_flags & DCACHE_NFSFS_RENAMED) { fsnotify_unlink(dir, dentry); } else if (!error) { fsnotify_link_count(target); d_delete_notify(dir, dentry); } return error; } EXPORT_SYMBOL(vfs_unlink); /* * Make sure that the actual truncation of the file will occur outside its * directory's i_mutex. Truncate can take a long time if there is a lot of * writeout happening, and we don't want to prevent access to the directory * while waiting on the I/O. */ int do_unlinkat(int dfd, struct filename *name) { int error; struct dentry *dentry; struct path path; struct qstr last; int type; struct inode *inode = NULL; struct inode *delegated_inode = NULL; unsigned int lookup_flags = 0; retry: error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type); if (error) goto exit1; error = -EISDIR; if (type != LAST_NORM) goto exit2; error = mnt_want_write(path.mnt); if (error) goto exit2; retry_deleg: inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT); dentry = lookup_one_qstr_excl(&last, path.dentry, lookup_flags); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { /* Why not before? Because we want correct error value */ if (last.name[last.len]) goto slashes; inode = dentry->d_inode; ihold(inode); error = security_path_unlink(&path, dentry); if (error) goto exit3; error = vfs_unlink(mnt_idmap(path.mnt), path.dentry->d_inode, dentry, &delegated_inode); exit3: dput(dentry); } inode_unlock(path.dentry->d_inode); if (inode) iput(inode); /* truncate the inode here */ inode = NULL; if (delegated_inode) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; } mnt_drop_write(path.mnt); exit2: path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; inode = NULL; goto retry; } exit1: putname(name); return error; slashes: if (d_is_dir(dentry)) error = -EISDIR; else error = -ENOTDIR; goto exit3; } SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag) { if ((flag & ~AT_REMOVEDIR) != 0) return -EINVAL; if (flag & AT_REMOVEDIR) return do_rmdir(dfd, getname(pathname)); return do_unlinkat(dfd, getname(pathname)); } SYSCALL_DEFINE1(unlink, const char __user *, pathname) { return do_unlinkat(AT_FDCWD, getname(pathname)); } /** * vfs_symlink - create symlink * @idmap: idmap of the mount the inode was found from * @dir: inode of the parent directory * @dentry: dentry of the child symlink file * @oldname: name of the file to link to * * Create a symlink. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *oldname) { int error; error = may_create(idmap, dir, dentry); if (error) return error; if (!dir->i_op->symlink) return -EPERM; error = security_inode_symlink(dir, dentry, oldname); if (error) return error; error = dir->i_op->symlink(idmap, dir, dentry, oldname); if (!error) fsnotify_create(dir, dentry); return error; } EXPORT_SYMBOL(vfs_symlink); int do_symlinkat(struct filename *from, int newdfd, struct filename *to) { int error; struct dentry *dentry; struct path path; unsigned int lookup_flags = 0; if (IS_ERR(from)) { error = PTR_ERR(from); goto out_putnames; } retry: dentry = filename_create(newdfd, to, &path, lookup_flags); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_putnames; error = security_path_symlink(&path, dentry, from->name); if (!error) error = vfs_symlink(mnt_idmap(path.mnt), path.dentry->d_inode, dentry, from->name); done_path_create(&path, dentry); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } out_putnames: putname(to); putname(from); return error; } SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, int, newdfd, const char __user *, newname) { return do_symlinkat(getname(oldname), newdfd, getname(newname)); } SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newname) { return do_symlinkat(getname(oldname), AT_FDCWD, getname(newname)); } /** * vfs_link - create a new link * @old_dentry: object to be linked * @idmap: idmap of the mount * @dir: new parent * @new_dentry: where to create the new link * @delegated_inode: returns inode needing a delegation break * * The caller must hold dir->i_mutex * * If vfs_link discovers a delegation on the to-be-linked file in need * of breaking, it will return -EWOULDBLOCK and return a reference to the * inode in delegated_inode. The caller should then break the delegation * and retry. Because breaking a delegation may take a long time, the * caller should drop the i_mutex before doing so. * * Alternatively, a caller may pass NULL for delegated_inode. This may * be appropriate for callers that expect the underlying filesystem not * to be NFS exported. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then take * care to map the inode according to @idmap before checking permissions. * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ int vfs_link(struct dentry *old_dentry, struct mnt_idmap *idmap, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode) { struct inode *inode = old_dentry->d_inode; unsigned max_links = dir->i_sb->s_max_links; int error; if (!inode) return -ENOENT; error = may_create(idmap, dir, new_dentry); if (error) return error; if (dir->i_sb != inode->i_sb) return -EXDEV; /* * A link to an append-only or immutable file cannot be created. */ if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) return -EPERM; /* * Updating the link count will likely cause i_uid and i_gid to * be writen back improperly if their true value is unknown to * the vfs. */ if (HAS_UNMAPPED_ID(idmap, inode)) return -EPERM; if (!dir->i_op->link) return -EPERM; if (S_ISDIR(inode->i_mode)) return -EPERM; error = security_inode_link(old_dentry, dir, new_dentry); if (error) return error; inode_lock(inode); /* Make sure we don't allow creating hardlink to an unlinked file */ if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE)) error = -ENOENT; else if (max_links && inode->i_nlink >= max_links) error = -EMLINK; else { error = try_break_deleg(inode, delegated_inode); if (!error) error = dir->i_op->link(old_dentry, dir, new_dentry); } if (!error && (inode->i_state & I_LINKABLE)) { spin_lock(&inode->i_lock); inode->i_state &= ~I_LINKABLE; spin_unlock(&inode->i_lock); } inode_unlock(inode); if (!error) fsnotify_link(dir, inode, new_dentry); return error; } EXPORT_SYMBOL(vfs_link); /* * Hardlinks are often used in delicate situations. We avoid * security-related surprises by not following symlinks on the * newname. --KAB * * We don't follow them on the oldname either to be compatible * with linux 2.0, and to avoid hard-linking to directories * and other special files. --ADM */ int do_linkat(int olddfd, struct filename *old, int newdfd, struct filename *new, int flags) { struct mnt_idmap *idmap; struct dentry *new_dentry; struct path old_path, new_path; struct inode *delegated_inode = NULL; int how = 0; int error; if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) { error = -EINVAL; goto out_putnames; } /* * To use null names we require CAP_DAC_READ_SEARCH or * that the open-time creds of the dfd matches current. * This ensures that not everyone will be able to create * a hardlink using the passed file descriptor. */ if (flags & AT_EMPTY_PATH) how |= LOOKUP_LINKAT_EMPTY; if (flags & AT_SYMLINK_FOLLOW) how |= LOOKUP_FOLLOW; retry: error = filename_lookup(olddfd, old, how, &old_path, NULL); if (error) goto out_putnames; new_dentry = filename_create(newdfd, new, &new_path, (how & LOOKUP_REVAL)); error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) goto out_putpath; error = -EXDEV; if (old_path.mnt != new_path.mnt) goto out_dput; idmap = mnt_idmap(new_path.mnt); error = may_linkat(idmap, &old_path); if (unlikely(error)) goto out_dput; error = security_path_link(old_path.dentry, &new_path, new_dentry); if (error) goto out_dput; error = vfs_link(old_path.dentry, idmap, new_path.dentry->d_inode, new_dentry, &delegated_inode); out_dput: done_path_create(&new_path, new_dentry); if (delegated_inode) { error = break_deleg_wait(&delegated_inode); if (!error) { path_put(&old_path); goto retry; } } if (retry_estale(error, how)) { path_put(&old_path); how |= LOOKUP_REVAL; goto retry; } out_putpath: path_put(&old_path); out_putnames: putname(old); putname(new); return error; } SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, int, flags) { return do_linkat(olddfd, getname_uflags(oldname, flags), newdfd, getname(newname), flags); } SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname) { return do_linkat(AT_FDCWD, getname(oldname), AT_FDCWD, getname(newname), 0); } /** * vfs_rename - rename a filesystem object * @rd: pointer to &struct renamedata info * * The caller must hold multiple mutexes--see lock_rename()). * * If vfs_rename discovers a delegation in need of breaking at either * the source or destination, it will return -EWOULDBLOCK and return a * reference to the inode in delegated_inode. The caller should then * break the delegation and retry. Because breaking a delegation may * take a long time, the caller should drop all locks before doing * so. * * Alternatively, a caller may pass NULL for delegated_inode. This may * be appropriate for callers that expect the underlying filesystem not * to be NFS exported. * * The worst of all namespace operations - renaming directory. "Perverted" * doesn't even start to describe it. Somebody in UCB had a heck of a trip... * Problems: * * a) we can get into loop creation. * b) race potential - two innocent renames can create a loop together. * That's where 4.4BSD screws up. Current fix: serialization on * sb->s_vfs_rename_mutex. We might be more accurate, but that's another * story. * c) we may have to lock up to _four_ objects - parents and victim (if it exists), * and source (if it's a non-directory or a subdirectory that moves to * different parent). * And that - after we got ->i_mutex on parents (until then we don't know * whether the target exists). Solution: try to be smart with locking * order for inodes. We rely on the fact that tree topology may change * only under ->s_vfs_rename_mutex _and_ that parent of the object we * move will be locked. Thus we can rank directories by the tree * (ancestors first) and rank all non-directories after them. * That works since everybody except rename does "lock parent, lookup, * lock child" and rename is under ->s_vfs_rename_mutex. * HOWEVER, it relies on the assumption that any object with ->lookup() * has no more than 1 dentry. If "hybrid" objects will ever appear, * we'd better make sure that there's no link(2) for them. * d) conversion from fhandle to dentry may come in the wrong moment - when * we are removing the target. Solution: we will have to grab ->i_mutex * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on * ->i_mutex on parents, which works but leads to some truly excessive * locking]. */ int vfs_rename(struct renamedata *rd) { int error; struct inode *old_dir = rd->old_dir, *new_dir = rd->new_dir; struct dentry *old_dentry = rd->old_dentry; struct dentry *new_dentry = rd->new_dentry; struct inode **delegated_inode = rd->delegated_inode; unsigned int flags = rd->flags; bool is_dir = d_is_dir(old_dentry); struct inode *source = old_dentry->d_inode; struct inode *target = new_dentry->d_inode; bool new_is_dir = false; unsigned max_links = new_dir->i_sb->s_max_links; struct name_snapshot old_name; bool lock_old_subdir, lock_new_subdir; if (source == target) return 0; error = may_delete(rd->old_mnt_idmap, old_dir, old_dentry, is_dir); if (error) return error; if (!target) { error = may_create(rd->new_mnt_idmap, new_dir, new_dentry); } else { new_is_dir = d_is_dir(new_dentry); if (!(flags & RENAME_EXCHANGE)) error = may_delete(rd->new_mnt_idmap, new_dir, new_dentry, is_dir); else error = may_delete(rd->new_mnt_idmap, new_dir, new_dentry, new_is_dir); } if (error) return error; if (!old_dir->i_op->rename) return -EPERM; /* * If we are going to change the parent - check write permissions, * we'll need to flip '..'. */ if (new_dir != old_dir) { if (is_dir) { error = inode_permission(rd->old_mnt_idmap, source, MAY_WRITE); if (error) return error; } if ((flags & RENAME_EXCHANGE) && new_is_dir) { error = inode_permission(rd->new_mnt_idmap, target, MAY_WRITE); if (error) return error; } } error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry, flags); if (error) return error; take_dentry_name_snapshot(&old_name, old_dentry); dget(new_dentry); /* * Lock children. * The source subdirectory needs to be locked on cross-directory * rename or cross-directory exchange since its parent changes. * The target subdirectory needs to be locked on cross-directory * exchange due to parent change and on any rename due to becoming * a victim. * Non-directories need locking in all cases (for NFS reasons); * they get locked after any subdirectories (in inode address order). * * NOTE: WE ONLY LOCK UNRELATED DIRECTORIES IN CROSS-DIRECTORY CASE. * NEVER, EVER DO THAT WITHOUT ->s_vfs_rename_mutex. */ lock_old_subdir = new_dir != old_dir; lock_new_subdir = new_dir != old_dir || !(flags & RENAME_EXCHANGE); if (is_dir) { if (lock_old_subdir) inode_lock_nested(source, I_MUTEX_CHILD); if (target && (!new_is_dir || lock_new_subdir)) inode_lock(target); } else if (new_is_dir) { if (lock_new_subdir) inode_lock_nested(target, I_MUTEX_CHILD); inode_lock(source); } else { lock_two_nondirectories(source, target); } error = -EPERM; if (IS_SWAPFILE(source) || (target && IS_SWAPFILE(target))) goto out; error = -EBUSY; if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry)) goto out; if (max_links && new_dir != old_dir) { error = -EMLINK; if (is_dir && !new_is_dir && new_dir->i_nlink >= max_links) goto out; if ((flags & RENAME_EXCHANGE) && !is_dir && new_is_dir && old_dir->i_nlink >= max_links) goto out; } if (!is_dir) { error = try_break_deleg(source, delegated_inode); if (error) goto out; } if (target && !new_is_dir) { error = try_break_deleg(target, delegated_inode); if (error) goto out; } error = old_dir->i_op->rename(rd->new_mnt_idmap, old_dir, old_dentry, new_dir, new_dentry, flags); if (error) goto out; if (!(flags & RENAME_EXCHANGE) && target) { if (is_dir) { shrink_dcache_parent(new_dentry); target->i_flags |= S_DEAD; } dont_mount(new_dentry); detach_mounts(new_dentry); } if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) { if (!(flags & RENAME_EXCHANGE)) d_move(old_dentry, new_dentry); else d_exchange(old_dentry, new_dentry); } out: if (!is_dir || lock_old_subdir) inode_unlock(source); if (target && (!new_is_dir || lock_new_subdir)) inode_unlock(target); dput(new_dentry); if (!error) { fsnotify_move(old_dir, new_dir, &old_name.name, is_dir, !(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry); if (flags & RENAME_EXCHANGE) { fsnotify_move(new_dir, old_dir, &old_dentry->d_name, new_is_dir, NULL, new_dentry); } } release_dentry_name_snapshot(&old_name); return error; } EXPORT_SYMBOL(vfs_rename); int do_renameat2(int olddfd, struct filename *from, int newdfd, struct filename *to, unsigned int flags) { struct renamedata rd; struct dentry *old_dentry, *new_dentry; struct dentry *trap; struct path old_path, new_path; struct qstr old_last, new_last; int old_type, new_type; struct inode *delegated_inode = NULL; unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET | LOOKUP_CREATE; bool should_retry = false; int error = -EINVAL; if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) goto put_names; if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) && (flags & RENAME_EXCHANGE)) goto put_names; if (flags & RENAME_EXCHANGE) target_flags = 0; if (flags & RENAME_NOREPLACE) target_flags |= LOOKUP_EXCL; retry: error = filename_parentat(olddfd, from, lookup_flags, &old_path, &old_last, &old_type); if (error) goto put_names; error = filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last, &new_type); if (error) goto exit1; error = -EXDEV; if (old_path.mnt != new_path.mnt) goto exit2; error = -EBUSY; if (old_type != LAST_NORM) goto exit2; if (flags & RENAME_NOREPLACE) error = -EEXIST; if (new_type != LAST_NORM) goto exit2; error = mnt_want_write(old_path.mnt); if (error) goto exit2; retry_deleg: trap = lock_rename(new_path.dentry, old_path.dentry); if (IS_ERR(trap)) { error = PTR_ERR(trap); goto exit_lock_rename; } old_dentry = lookup_one_qstr_excl(&old_last, old_path.dentry, lookup_flags); error = PTR_ERR(old_dentry); if (IS_ERR(old_dentry)) goto exit3; new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry, lookup_flags | target_flags); error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) goto exit4; if (flags & RENAME_EXCHANGE) { if (!d_is_dir(new_dentry)) { error = -ENOTDIR; if (new_last.name[new_last.len]) goto exit5; } } /* unless the source is a directory trailing slashes give -ENOTDIR */ if (!d_is_dir(old_dentry)) { error = -ENOTDIR; if (old_last.name[old_last.len]) goto exit5; if (!(flags & RENAME_EXCHANGE) && new_last.name[new_last.len]) goto exit5; } /* source should not be ancestor of target */ error = -EINVAL; if (old_dentry == trap) goto exit5; /* target should not be an ancestor of source */ if (!(flags & RENAME_EXCHANGE)) error = -ENOTEMPTY; if (new_dentry == trap) goto exit5; error = security_path_rename(&old_path, old_dentry, &new_path, new_dentry, flags); if (error) goto exit5; rd.old_dir = old_path.dentry->d_inode; rd.old_dentry = old_dentry; rd.old_mnt_idmap = mnt_idmap(old_path.mnt); rd.new_dir = new_path.dentry->d_inode; rd.new_dentry = new_dentry; rd.new_mnt_idmap = mnt_idmap(new_path.mnt); rd.delegated_inode = &delegated_inode; rd.flags = flags; error = vfs_rename(&rd); exit5: dput(new_dentry); exit4: dput(old_dentry); exit3: unlock_rename(new_path.dentry, old_path.dentry); exit_lock_rename: if (delegated_inode) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; } mnt_drop_write(old_path.mnt); exit2: if (retry_estale(error, lookup_flags)) should_retry = true; path_put(&new_path); exit1: path_put(&old_path); if (should_retry) { should_retry = false; lookup_flags |= LOOKUP_REVAL; goto retry; } put_names: putname(from); putname(to); return error; } SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, unsigned int, flags) { return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname), flags); } SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname) { return do_renameat2(olddfd, getname(oldname), newdfd, getname(newname), 0); } SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname) { return do_renameat2(AT_FDCWD, getname(oldname), AT_FDCWD, getname(newname), 0); } int readlink_copy(char __user *buffer, int buflen, const char *link, int linklen) { int copylen; copylen = linklen; if (unlikely(copylen > (unsigned) buflen)) copylen = buflen; if (copy_to_user(buffer, link, copylen)) copylen = -EFAULT; return copylen; } /** * vfs_readlink - copy symlink body into userspace buffer * @dentry: dentry on which to get symbolic link * @buffer: user memory pointer * @buflen: size of buffer * * Does not touch atime. That's up to the caller if necessary * * Does not call security hook. */ int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen) { struct inode *inode = d_inode(dentry); DEFINE_DELAYED_CALL(done); const char *link; int res; if (inode->i_opflags & IOP_CACHED_LINK) return readlink_copy(buffer, buflen, inode->i_link, inode->i_linklen); if (unlikely(!(inode->i_opflags & IOP_DEFAULT_READLINK))) { if (unlikely(inode->i_op->readlink)) return inode->i_op->readlink(dentry, buffer, buflen); if (!d_is_symlink(dentry)) return -EINVAL; spin_lock(&inode->i_lock); inode->i_opflags |= IOP_DEFAULT_READLINK; spin_unlock(&inode->i_lock); } link = READ_ONCE(inode->i_link); if (!link) { link = inode->i_op->get_link(dentry, inode, &done); if (IS_ERR(link)) return PTR_ERR(link); } res = readlink_copy(buffer, buflen, link, strlen(link)); do_delayed_call(&done); return res; } EXPORT_SYMBOL(vfs_readlink); /** * vfs_get_link - get symlink body * @dentry: dentry on which to get symbolic link * @done: caller needs to free returned data with this * * Calls security hook and i_op->get_link() on the supplied inode. * * It does not touch atime. That's up to the caller if necessary. * * Does not work on "special" symlinks like /proc/$$/fd/N */ const char *vfs_get_link(struct dentry *dentry, struct delayed_call *done) { const char *res = ERR_PTR(-EINVAL); struct inode *inode = d_inode(dentry); if (d_is_symlink(dentry)) { res = ERR_PTR(security_inode_readlink(dentry)); if (!res) res = inode->i_op->get_link(dentry, inode, done); } return res; } EXPORT_SYMBOL(vfs_get_link); /* get the link contents into pagecache */ static char *__page_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *callback) { struct page *page; struct address_space *mapping = inode->i_mapping; if (!dentry) { page = find_get_page(mapping, 0); if (!page) return ERR_PTR(-ECHILD); if (!PageUptodate(page)) { put_page(page); return ERR_PTR(-ECHILD); } } else { page = read_mapping_page(mapping, 0, NULL); if (IS_ERR(page)) return (char*)page; } set_delayed_call(callback, page_put_link, page); BUG_ON(mapping_gfp_mask(mapping) & __GFP_HIGHMEM); return page_address(page); } const char *page_get_link_raw(struct dentry *dentry, struct inode *inode, struct delayed_call *callback) { return __page_get_link(dentry, inode, callback); } EXPORT_SYMBOL_GPL(page_get_link_raw); const char *page_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *callback) { char *kaddr = __page_get_link(dentry, inode, callback); if (!IS_ERR(kaddr)) nd_terminate_link(kaddr, inode->i_size, PAGE_SIZE - 1); return kaddr; } EXPORT_SYMBOL(page_get_link); void page_put_link(void *arg) { put_page(arg); } EXPORT_SYMBOL(page_put_link); int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) { const char *link; int res; DEFINE_DELAYED_CALL(done); link = page_get_link(dentry, d_inode(dentry), &done); res = PTR_ERR(link); if (!IS_ERR(link)) res = readlink_copy(buffer, buflen, link, strlen(link)); do_delayed_call(&done); return res; } EXPORT_SYMBOL(page_readlink); int page_symlink(struct inode *inode, const char *symname, int len) { struct address_space *mapping = inode->i_mapping; const struct address_space_operations *aops = mapping->a_ops; bool nofs = !mapping_gfp_constraint(mapping, __GFP_FS); struct folio *folio; void *fsdata = NULL; int err; unsigned int flags; retry: if (nofs) flags = memalloc_nofs_save(); err = aops->write_begin(NULL, mapping, 0, len-1, &folio, &fsdata); if (nofs) memalloc_nofs_restore(flags); if (err) goto fail; memcpy(folio_address(folio), symname, len - 1); err = aops->write_end(NULL, mapping, 0, len - 1, len - 1, folio, fsdata); if (err < 0) goto fail; if (err < len-1) goto retry; mark_inode_dirty(inode); return 0; fail: return err; } EXPORT_SYMBOL(page_symlink); const struct inode_operations page_symlink_inode_operations = { .get_link = page_get_link, }; EXPORT_SYMBOL(page_symlink_inode_operations); |
146 146 146 146 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 | // SPDX-License-Identifier: GPL-2.0-or-later /* 6LoWPAN fragment reassembly * * Authors: * Alexander Aring <aar@pengutronix.de> * * Based on: net/ipv6/reassembly.c */ #define pr_fmt(fmt) "6LoWPAN: " fmt #include <linux/net.h> #include <linux/list.h> #include <linux/netdevice.h> #include <linux/random.h> #include <linux/jhash.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/export.h> #include <net/ieee802154_netdev.h> #include <net/6lowpan.h> #include <net/ipv6_frag.h> #include <net/inet_frag.h> #include <net/ip.h> #include "6lowpan_i.h" static const char lowpan_frags_cache_name[] = "lowpan-frags"; static struct inet_frags lowpan_frags; static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *skb, struct sk_buff *prev, struct net_device *ldev, int *refs); static void lowpan_frag_init(struct inet_frag_queue *q, const void *a) { const struct frag_lowpan_compare_key *key = a; BUILD_BUG_ON(sizeof(*key) > sizeof(q->key)); memcpy(&q->key, key, sizeof(*key)); } static void lowpan_frag_expire(struct timer_list *t) { struct inet_frag_queue *frag = from_timer(frag, t, timer); struct frag_queue *fq; int refs = 1; fq = container_of(frag, struct frag_queue, q); spin_lock(&fq->q.lock); if (fq->q.flags & INET_FRAG_COMPLETE) goto out; inet_frag_kill(&fq->q, &refs); out: spin_unlock(&fq->q.lock); inet_frag_putn(&fq->q, refs); } static inline struct lowpan_frag_queue * fq_find(struct net *net, const struct lowpan_802154_cb *cb, const struct ieee802154_addr *src, const struct ieee802154_addr *dst) { struct netns_ieee802154_lowpan *ieee802154_lowpan = net_ieee802154_lowpan(net); struct frag_lowpan_compare_key key = {}; struct inet_frag_queue *q; key.tag = cb->d_tag; key.d_size = cb->d_size; key.src = *src; key.dst = *dst; q = inet_frag_find(ieee802154_lowpan->fqdir, &key); if (!q) return NULL; return container_of(q, struct lowpan_frag_queue, q); } static int lowpan_frag_queue(struct lowpan_frag_queue *fq, struct sk_buff *skb, u8 frag_type, int *refs) { struct sk_buff *prev_tail; struct net_device *ldev; int end, offset, err; /* inet_frag_queue_* functions use skb->cb; see struct ipfrag_skb_cb * in inet_fragment.c */ BUILD_BUG_ON(sizeof(struct lowpan_802154_cb) > sizeof(struct inet_skb_parm)); BUILD_BUG_ON(sizeof(struct lowpan_802154_cb) > sizeof(struct inet6_skb_parm)); if (fq->q.flags & INET_FRAG_COMPLETE) goto err; offset = lowpan_802154_cb(skb)->d_offset << 3; end = lowpan_802154_cb(skb)->d_size; /* Is this the final fragment? */ if (offset + skb->len == end) { /* If we already have some bits beyond end * or have different end, the segment is corrupted. */ if (end < fq->q.len || ((fq->q.flags & INET_FRAG_LAST_IN) && end != fq->q.len)) goto err; fq->q.flags |= INET_FRAG_LAST_IN; fq->q.len = end; } else { if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ if (fq->q.flags & INET_FRAG_LAST_IN) goto err; fq->q.len = end; } } ldev = skb->dev; if (ldev) skb->dev = NULL; barrier(); prev_tail = fq->q.fragments_tail; err = inet_frag_queue_insert(&fq->q, skb, offset, end); if (err) goto err; fq->q.stamp = skb->tstamp; fq->q.tstamp_type = skb->tstamp_type; if (frag_type == LOWPAN_DISPATCH_FRAG1) fq->q.flags |= INET_FRAG_FIRST_IN; fq->q.meat += skb->len; add_frag_mem_limit(fq->q.fqdir, skb->truesize); if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { int res; unsigned long orefdst = skb->_skb_refdst; skb->_skb_refdst = 0UL; res = lowpan_frag_reasm(fq, skb, prev_tail, ldev, refs); skb->_skb_refdst = orefdst; return res; } skb_dst_drop(skb); return -1; err: kfree_skb(skb); return -1; } /* Check if this packet is complete. * * It is called with locked fq, and caller must check that * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. */ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *skb, struct sk_buff *prev_tail, struct net_device *ldev, int *refs) { void *reasm_data; inet_frag_kill(&fq->q, refs); reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail); if (!reasm_data) goto out_oom; inet_frag_reasm_finish(&fq->q, skb, reasm_data, false); skb->dev = ldev; skb->tstamp = fq->q.stamp; fq->q.rb_fragments = RB_ROOT; fq->q.fragments_tail = NULL; fq->q.last_run_head = NULL; return 1; out_oom: net_dbg_ratelimited("lowpan_frag_reasm: no memory for reassembly\n"); return -1; } static int lowpan_frag_rx_handlers_result(struct sk_buff *skb, lowpan_rx_result res) { switch (res) { case RX_QUEUED: return NET_RX_SUCCESS; case RX_CONTINUE: /* nobody cared about this packet */ net_warn_ratelimited("%s: received unknown dispatch\n", __func__); fallthrough; default: /* all others failure */ return NET_RX_DROP; } } static lowpan_rx_result lowpan_frag_rx_h_iphc(struct sk_buff *skb) { int ret; if (!lowpan_is_iphc(*skb_network_header(skb))) return RX_CONTINUE; ret = lowpan_iphc_decompress(skb); if (ret < 0) return RX_DROP; return RX_QUEUED; } static int lowpan_invoke_frag_rx_handlers(struct sk_buff *skb) { lowpan_rx_result res; #define CALL_RXH(rxh) \ do { \ res = rxh(skb); \ if (res != RX_CONTINUE) \ goto rxh_next; \ } while (0) /* likely at first */ CALL_RXH(lowpan_frag_rx_h_iphc); CALL_RXH(lowpan_rx_h_ipv6); rxh_next: return lowpan_frag_rx_handlers_result(skb, res); #undef CALL_RXH } #define LOWPAN_FRAG_DGRAM_SIZE_HIGH_MASK 0x07 #define LOWPAN_FRAG_DGRAM_SIZE_HIGH_SHIFT 8 static int lowpan_get_cb(struct sk_buff *skb, u8 frag_type, struct lowpan_802154_cb *cb) { bool fail; u8 high = 0, low = 0; __be16 d_tag = 0; fail = lowpan_fetch_skb(skb, &high, 1); fail |= lowpan_fetch_skb(skb, &low, 1); /* remove the dispatch value and use first three bits as high value * for the datagram size */ cb->d_size = (high & LOWPAN_FRAG_DGRAM_SIZE_HIGH_MASK) << LOWPAN_FRAG_DGRAM_SIZE_HIGH_SHIFT | low; fail |= lowpan_fetch_skb(skb, &d_tag, 2); cb->d_tag = ntohs(d_tag); if (frag_type == LOWPAN_DISPATCH_FRAGN) { fail |= lowpan_fetch_skb(skb, &cb->d_offset, 1); } else { skb_reset_network_header(skb); cb->d_offset = 0; /* check if datagram_size has ipv6hdr on FRAG1 */ fail |= cb->d_size < sizeof(struct ipv6hdr); /* check if we can dereference the dispatch value */ fail |= !skb->len; } if (unlikely(fail)) return -EIO; return 0; } int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type) { struct lowpan_frag_queue *fq; struct net *net = dev_net(skb->dev); struct lowpan_802154_cb *cb = lowpan_802154_cb(skb); struct ieee802154_hdr hdr = {}; int err; if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) goto err; err = lowpan_get_cb(skb, frag_type, cb); if (err < 0) goto err; if (frag_type == LOWPAN_DISPATCH_FRAG1) { err = lowpan_invoke_frag_rx_handlers(skb); if (err == NET_RX_DROP) goto err; } if (cb->d_size > IPV6_MIN_MTU) { net_warn_ratelimited("lowpan_frag_rcv: datagram size exceeds MTU\n"); goto err; } rcu_read_lock(); fq = fq_find(net, cb, &hdr.source, &hdr.dest); if (fq != NULL) { int ret, refs = 0; spin_lock(&fq->q.lock); ret = lowpan_frag_queue(fq, skb, frag_type, &refs); spin_unlock(&fq->q.lock); rcu_read_unlock(); inet_frag_putn(&fq->q, refs); return ret; } rcu_read_unlock(); err: kfree_skb(skb); return -1; } #ifdef CONFIG_SYSCTL static struct ctl_table lowpan_frags_ns_ctl_table[] = { { .procname = "6lowpanfrag_high_thresh", .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, { .procname = "6lowpanfrag_low_thresh", .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, { .procname = "6lowpanfrag_time", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, }; /* secret interval has been deprecated */ static int lowpan_frags_secret_interval_unused; static struct ctl_table lowpan_frags_ctl_table[] = { { .procname = "6lowpanfrag_secret_interval", .data = &lowpan_frags_secret_interval_unused, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, }; static int __net_init lowpan_frags_ns_sysctl_register(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; struct netns_ieee802154_lowpan *ieee802154_lowpan = net_ieee802154_lowpan(net); size_t table_size = ARRAY_SIZE(lowpan_frags_ns_ctl_table); table = lowpan_frags_ns_ctl_table; if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(lowpan_frags_ns_ctl_table), GFP_KERNEL); if (table == NULL) goto err_alloc; /* Don't export sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) table_size = 0; } table[0].data = &ieee802154_lowpan->fqdir->high_thresh; table[0].extra1 = &ieee802154_lowpan->fqdir->low_thresh; table[1].data = &ieee802154_lowpan->fqdir->low_thresh; table[1].extra2 = &ieee802154_lowpan->fqdir->high_thresh; table[2].data = &ieee802154_lowpan->fqdir->timeout; hdr = register_net_sysctl_sz(net, "net/ieee802154/6lowpan", table, table_size); if (hdr == NULL) goto err_reg; ieee802154_lowpan->sysctl.frags_hdr = hdr; return 0; err_reg: if (!net_eq(net, &init_net)) kfree(table); err_alloc: return -ENOMEM; } static void __net_exit lowpan_frags_ns_sysctl_unregister(struct net *net) { const struct ctl_table *table; struct netns_ieee802154_lowpan *ieee802154_lowpan = net_ieee802154_lowpan(net); table = ieee802154_lowpan->sysctl.frags_hdr->ctl_table_arg; unregister_net_sysctl_table(ieee802154_lowpan->sysctl.frags_hdr); if (!net_eq(net, &init_net)) kfree(table); } static struct ctl_table_header *lowpan_ctl_header; static int __init lowpan_frags_sysctl_register(void) { lowpan_ctl_header = register_net_sysctl(&init_net, "net/ieee802154/6lowpan", lowpan_frags_ctl_table); return lowpan_ctl_header == NULL ? -ENOMEM : 0; } static void lowpan_frags_sysctl_unregister(void) { unregister_net_sysctl_table(lowpan_ctl_header); } #else static inline int lowpan_frags_ns_sysctl_register(struct net *net) { return 0; } static inline void lowpan_frags_ns_sysctl_unregister(struct net *net) { } static inline int __init lowpan_frags_sysctl_register(void) { return 0; } static inline void lowpan_frags_sysctl_unregister(void) { } #endif static int __net_init lowpan_frags_init_net(struct net *net) { struct netns_ieee802154_lowpan *ieee802154_lowpan = net_ieee802154_lowpan(net); int res; res = fqdir_init(&ieee802154_lowpan->fqdir, &lowpan_frags, net); if (res < 0) return res; ieee802154_lowpan->fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH; ieee802154_lowpan->fqdir->low_thresh = IPV6_FRAG_LOW_THRESH; ieee802154_lowpan->fqdir->timeout = IPV6_FRAG_TIMEOUT; res = lowpan_frags_ns_sysctl_register(net); if (res < 0) fqdir_exit(ieee802154_lowpan->fqdir); return res; } static void __net_exit lowpan_frags_pre_exit_net(struct net *net) { struct netns_ieee802154_lowpan *ieee802154_lowpan = net_ieee802154_lowpan(net); fqdir_pre_exit(ieee802154_lowpan->fqdir); } static void __net_exit lowpan_frags_exit_net(struct net *net) { struct netns_ieee802154_lowpan *ieee802154_lowpan = net_ieee802154_lowpan(net); lowpan_frags_ns_sysctl_unregister(net); fqdir_exit(ieee802154_lowpan->fqdir); } static struct pernet_operations lowpan_frags_ops = { .init = lowpan_frags_init_net, .pre_exit = lowpan_frags_pre_exit_net, .exit = lowpan_frags_exit_net, }; static u32 lowpan_key_hashfn(const void *data, u32 len, u32 seed) { return jhash2(data, sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed); } static u32 lowpan_obj_hashfn(const void *data, u32 len, u32 seed) { const struct inet_frag_queue *fq = data; return jhash2((const u32 *)&fq->key, sizeof(struct frag_lowpan_compare_key) / sizeof(u32), seed); } static int lowpan_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr) { const struct frag_lowpan_compare_key *key = arg->key; const struct inet_frag_queue *fq = ptr; return !!memcmp(&fq->key, key, sizeof(*key)); } static const struct rhashtable_params lowpan_rhash_params = { .head_offset = offsetof(struct inet_frag_queue, node), .hashfn = lowpan_key_hashfn, .obj_hashfn = lowpan_obj_hashfn, .obj_cmpfn = lowpan_obj_cmpfn, .automatic_shrinking = true, }; int __init lowpan_net_frag_init(void) { int ret; lowpan_frags.constructor = lowpan_frag_init; lowpan_frags.destructor = NULL; lowpan_frags.qsize = sizeof(struct frag_queue); lowpan_frags.frag_expire = lowpan_frag_expire; lowpan_frags.frags_cache_name = lowpan_frags_cache_name; lowpan_frags.rhash_params = lowpan_rhash_params; ret = inet_frags_init(&lowpan_frags); if (ret) goto out; ret = lowpan_frags_sysctl_register(); if (ret) goto err_sysctl; ret = register_pernet_subsys(&lowpan_frags_ops); if (ret) goto err_pernet; out: return ret; err_pernet: lowpan_frags_sysctl_unregister(); err_sysctl: inet_frags_fini(&lowpan_frags); return ret; } void lowpan_net_frag_exit(void) { lowpan_frags_sysctl_unregister(); unregister_pernet_subsys(&lowpan_frags_ops); inet_frags_fini(&lowpan_frags); } |
5 4 4 2 2 14 6 1 40 40 11 11 11 9 2 7 7 5 2 10 10 5 2 2 1 1 10 23 4 27 30 4 27 16 13 17 17 17 17 17 33 30 34 33 16 30 30 31 34 24 10 7 31 13 13 13 26 25 26 26 7 26 26 38 37 1 30 2 28 28 8 23 3 20 23 23 23 11 1 10 10 9 3 3 3 3 41 41 11 30 41 26 3 1 17 9 31 20 1 2 2 2 2 2 55 78 75 3 1 1 64 12 1 72 2 1 75 44 21 28 30 2 1 2 50 50 6 44 41 42 5 1 36 39 2 1 38 72 68 2 4 2 2 54 16 2 2 11 52 12 7 3 49 5 57 7 62 2 58 5 1 2 61 1 53 7 4 5 45 1 6 7 51 3 4 55 3 7 41 4 10 28 9 6 4 16 32 55 55 5 21 39 8 6 8 3 6 3 3 5 19 10 8 8 8 11 11 99 160 1 5 1 133 2 113 3 2 2 160 1 158 159 1980 1979 2 146 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 | // SPDX-License-Identifier: GPL-2.0-only /* * GENEVE: Generic Network Virtualization Encapsulation * * Copyright (c) 2015 Red Hat, Inc. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/ethtool.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/etherdevice.h> #include <linux/hash.h> #include <net/ipv6_stubs.h> #include <net/dst_metadata.h> #include <net/gro_cells.h> #include <net/rtnetlink.h> #include <net/geneve.h> #include <net/gro.h> #include <net/netdev_lock.h> #include <net/protocol.h> #define GENEVE_NETDEV_VER "0.6" #define GENEVE_N_VID (1u << 24) #define GENEVE_VID_MASK (GENEVE_N_VID - 1) #define VNI_HASH_BITS 10 #define VNI_HASH_SIZE (1<<VNI_HASH_BITS) static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); #define GENEVE_VER 0 #define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr)) #define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN) #define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN) /* per-network namespace private data for this module */ struct geneve_net { struct list_head geneve_list; struct list_head sock_list; }; static unsigned int geneve_net_id; struct geneve_dev_node { struct hlist_node hlist; struct geneve_dev *geneve; }; struct geneve_config { struct ip_tunnel_info info; bool collect_md; bool use_udp6_rx_checksums; bool ttl_inherit; enum ifla_geneve_df df; bool inner_proto_inherit; u16 port_min; u16 port_max; }; /* Pseudo network device */ struct geneve_dev { struct geneve_dev_node hlist4; /* vni hash table for IPv4 socket */ #if IS_ENABLED(CONFIG_IPV6) struct geneve_dev_node hlist6; /* vni hash table for IPv6 socket */ #endif struct net *net; /* netns for packet i/o */ struct net_device *dev; /* netdev for geneve tunnel */ struct geneve_sock __rcu *sock4; /* IPv4 socket used for geneve tunnel */ #if IS_ENABLED(CONFIG_IPV6) struct geneve_sock __rcu *sock6; /* IPv6 socket used for geneve tunnel */ #endif struct list_head next; /* geneve's per namespace list */ struct gro_cells gro_cells; struct geneve_config cfg; }; struct geneve_sock { bool collect_md; struct list_head list; struct socket *sock; struct rcu_head rcu; int refcnt; struct hlist_head vni_list[VNI_HASH_SIZE]; }; static inline __u32 geneve_net_vni_hash(u8 vni[3]) { __u32 vnid; vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2]; return hash_32(vnid, VNI_HASH_BITS); } static __be64 vni_to_tunnel_id(const __u8 *vni) { #ifdef __BIG_ENDIAN return (vni[0] << 16) | (vni[1] << 8) | vni[2]; #else return (__force __be64)(((__force u64)vni[0] << 40) | ((__force u64)vni[1] << 48) | ((__force u64)vni[2] << 56)); #endif } /* Convert 64 bit tunnel ID to 24 bit VNI. */ static void tunnel_id_to_vni(__be64 tun_id, __u8 *vni) { #ifdef __BIG_ENDIAN vni[0] = (__force __u8)(tun_id >> 16); vni[1] = (__force __u8)(tun_id >> 8); vni[2] = (__force __u8)tun_id; #else vni[0] = (__force __u8)((__force u64)tun_id >> 40); vni[1] = (__force __u8)((__force u64)tun_id >> 48); vni[2] = (__force __u8)((__force u64)tun_id >> 56); #endif } static bool eq_tun_id_and_vni(u8 *tun_id, u8 *vni) { return !memcmp(vni, &tun_id[5], 3); } static sa_family_t geneve_get_sk_family(struct geneve_sock *gs) { return gs->sock->sk->sk_family; } static struct geneve_dev *geneve_lookup(struct geneve_sock *gs, __be32 addr, u8 vni[]) { struct hlist_head *vni_list_head; struct geneve_dev_node *node; __u32 hash; /* Find the device for this VNI */ hash = geneve_net_vni_hash(vni); vni_list_head = &gs->vni_list[hash]; hlist_for_each_entry_rcu(node, vni_list_head, hlist) { if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) && addr == node->geneve->cfg.info.key.u.ipv4.dst) return node->geneve; } return NULL; } #if IS_ENABLED(CONFIG_IPV6) static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs, struct in6_addr addr6, u8 vni[]) { struct hlist_head *vni_list_head; struct geneve_dev_node *node; __u32 hash; /* Find the device for this VNI */ hash = geneve_net_vni_hash(vni); vni_list_head = &gs->vni_list[hash]; hlist_for_each_entry_rcu(node, vni_list_head, hlist) { if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) && ipv6_addr_equal(&addr6, &node->geneve->cfg.info.key.u.ipv6.dst)) return node->geneve; } return NULL; } #endif static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb) { return (struct genevehdr *)(udp_hdr(skb) + 1); } static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs, struct sk_buff *skb) { static u8 zero_vni[3]; u8 *vni; if (geneve_get_sk_family(gs) == AF_INET) { struct iphdr *iph; __be32 addr; iph = ip_hdr(skb); /* outer IP header... */ if (gs->collect_md) { vni = zero_vni; addr = 0; } else { vni = geneve_hdr(skb)->vni; addr = iph->saddr; } return geneve_lookup(gs, addr, vni); #if IS_ENABLED(CONFIG_IPV6) } else if (geneve_get_sk_family(gs) == AF_INET6) { static struct in6_addr zero_addr6; struct ipv6hdr *ip6h; struct in6_addr addr6; ip6h = ipv6_hdr(skb); /* outer IPv6 header... */ if (gs->collect_md) { vni = zero_vni; addr6 = zero_addr6; } else { vni = geneve_hdr(skb)->vni; addr6 = ip6h->saddr; } return geneve6_lookup(gs, addr6, vni); #endif } return NULL; } /* geneve receive/decap routine */ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, struct sk_buff *skb) { struct genevehdr *gnvh = geneve_hdr(skb); struct metadata_dst *tun_dst = NULL; unsigned int len; int nh, err = 0; void *oiph; if (ip_tunnel_collect_metadata() || gs->collect_md) { IP_TUNNEL_DECLARE_FLAGS(flags) = { }; __set_bit(IP_TUNNEL_KEY_BIT, flags); __assign_bit(IP_TUNNEL_OAM_BIT, flags, gnvh->oam); __assign_bit(IP_TUNNEL_CRIT_OPT_BIT, flags, gnvh->critical); tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags, vni_to_tunnel_id(gnvh->vni), gnvh->opt_len * 4); if (!tun_dst) { dev_dstats_rx_dropped(geneve->dev); goto drop; } /* Update tunnel dst according to Geneve options. */ ip_tunnel_flags_zero(flags); __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, flags); ip_tunnel_info_opts_set(&tun_dst->u.tun_info, gnvh->options, gnvh->opt_len * 4, flags); } else { /* Drop packets w/ critical options, * since we don't support any... */ if (gnvh->critical) { DEV_STATS_INC(geneve->dev, rx_frame_errors); DEV_STATS_INC(geneve->dev, rx_errors); goto drop; } } if (tun_dst) skb_dst_set(skb, &tun_dst->dst); if (gnvh->proto_type == htons(ETH_P_TEB)) { skb_reset_mac_header(skb); skb->protocol = eth_type_trans(skb, geneve->dev); skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); /* Ignore packet loops (and multicast echo) */ if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) { DEV_STATS_INC(geneve->dev, rx_errors); goto drop; } } else { skb_reset_mac_header(skb); skb->dev = geneve->dev; skb->pkt_type = PACKET_HOST; } /* Save offset of outer header relative to skb->head, * because we are going to reset the network header to the inner header * and might change skb->head. */ nh = skb_network_header(skb) - skb->head; skb_reset_network_header(skb); if (!pskb_inet_may_pull(skb)) { DEV_STATS_INC(geneve->dev, rx_length_errors); DEV_STATS_INC(geneve->dev, rx_errors); goto drop; } /* Get the outer header. */ oiph = skb->head + nh; if (geneve_get_sk_family(gs) == AF_INET) err = IP_ECN_decapsulate(oiph, skb); #if IS_ENABLED(CONFIG_IPV6) else err = IP6_ECN_decapsulate(oiph, skb); #endif if (unlikely(err)) { if (log_ecn_error) { if (geneve_get_sk_family(gs) == AF_INET) net_info_ratelimited("non-ECT from %pI4 " "with TOS=%#x\n", &((struct iphdr *)oiph)->saddr, ((struct iphdr *)oiph)->tos); #if IS_ENABLED(CONFIG_IPV6) else net_info_ratelimited("non-ECT from %pI6\n", &((struct ipv6hdr *)oiph)->saddr); #endif } if (err > 1) { DEV_STATS_INC(geneve->dev, rx_frame_errors); DEV_STATS_INC(geneve->dev, rx_errors); goto drop; } } len = skb->len; err = gro_cells_receive(&geneve->gro_cells, skb); if (likely(err == NET_RX_SUCCESS)) dev_dstats_rx_add(geneve->dev, len); return; drop: /* Consume bad packet */ kfree_skb(skb); } /* Setup stats when device is created */ static int geneve_init(struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); int err; err = gro_cells_init(&geneve->gro_cells, dev); if (err) return err; err = dst_cache_init(&geneve->cfg.info.dst_cache, GFP_KERNEL); if (err) { gro_cells_destroy(&geneve->gro_cells); return err; } netdev_lockdep_set_classes(dev); return 0; } static void geneve_uninit(struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); dst_cache_destroy(&geneve->cfg.info.dst_cache); gro_cells_destroy(&geneve->gro_cells); } /* Callback from net/ipv4/udp.c to receive packets */ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct genevehdr *geneveh; struct geneve_dev *geneve; struct geneve_sock *gs; __be16 inner_proto; int opts_len; /* Need UDP and Geneve header to be present */ if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN))) goto drop; /* Return packets with reserved bits set */ geneveh = geneve_hdr(skb); if (unlikely(geneveh->ver != GENEVE_VER)) goto drop; gs = rcu_dereference_sk_user_data(sk); if (!gs) goto drop; geneve = geneve_lookup_skb(gs, skb); if (!geneve) goto drop; inner_proto = geneveh->proto_type; if (unlikely((!geneve->cfg.inner_proto_inherit && inner_proto != htons(ETH_P_TEB)))) { dev_dstats_rx_dropped(geneve->dev); goto drop; } opts_len = geneveh->opt_len * 4; if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, inner_proto, !net_eq(geneve->net, dev_net(geneve->dev)))) { dev_dstats_rx_dropped(geneve->dev); goto drop; } geneve_rx(geneve, gs, skb); return 0; drop: /* Consume bad packet */ kfree_skb(skb); return 0; } /* Callback from net/ipv{4,6}/udp.c to check that we have a tunnel for errors */ static int geneve_udp_encap_err_lookup(struct sock *sk, struct sk_buff *skb) { struct genevehdr *geneveh; struct geneve_sock *gs; u8 zero_vni[3] = { 0 }; u8 *vni = zero_vni; if (!pskb_may_pull(skb, skb_transport_offset(skb) + GENEVE_BASE_HLEN)) return -EINVAL; geneveh = geneve_hdr(skb); if (geneveh->ver != GENEVE_VER) return -EINVAL; if (geneveh->proto_type != htons(ETH_P_TEB)) return -EINVAL; gs = rcu_dereference_sk_user_data(sk); if (!gs) return -ENOENT; if (geneve_get_sk_family(gs) == AF_INET) { struct iphdr *iph = ip_hdr(skb); __be32 addr4 = 0; if (!gs->collect_md) { vni = geneve_hdr(skb)->vni; addr4 = iph->daddr; } return geneve_lookup(gs, addr4, vni) ? 0 : -ENOENT; } #if IS_ENABLED(CONFIG_IPV6) if (geneve_get_sk_family(gs) == AF_INET6) { struct ipv6hdr *ip6h = ipv6_hdr(skb); struct in6_addr addr6; memset(&addr6, 0, sizeof(struct in6_addr)); if (!gs->collect_md) { vni = geneve_hdr(skb)->vni; addr6 = ip6h->daddr; } return geneve6_lookup(gs, addr6, vni) ? 0 : -ENOENT; } #endif return -EPFNOSUPPORT; } static struct socket *geneve_create_sock(struct net *net, bool ipv6, __be16 port, bool ipv6_rx_csum) { struct socket *sock; struct udp_port_cfg udp_conf; int err; memset(&udp_conf, 0, sizeof(udp_conf)); if (ipv6) { udp_conf.family = AF_INET6; udp_conf.ipv6_v6only = 1; udp_conf.use_udp6_rx_checksums = ipv6_rx_csum; } else { udp_conf.family = AF_INET; udp_conf.local_ip.s_addr = htonl(INADDR_ANY); } udp_conf.local_udp_port = port; /* Open UDP socket */ err = udp_sock_create(net, &udp_conf, &sock); if (err < 0) return ERR_PTR(err); udp_allow_gso(sock->sk); return sock; } static int geneve_hlen(struct genevehdr *gh) { return sizeof(*gh) + gh->opt_len * 4; } static struct sk_buff *geneve_gro_receive(struct sock *sk, struct list_head *head, struct sk_buff *skb) { struct sk_buff *pp = NULL; struct sk_buff *p; struct genevehdr *gh, *gh2; unsigned int hlen, gh_len, off_gnv; const struct packet_offload *ptype; __be16 type; int flush = 1; off_gnv = skb_gro_offset(skb); hlen = off_gnv + sizeof(*gh); gh = skb_gro_header(skb, hlen, off_gnv); if (unlikely(!gh)) goto out; if (gh->ver != GENEVE_VER || gh->oam) goto out; gh_len = geneve_hlen(gh); hlen = off_gnv + gh_len; if (!skb_gro_may_pull(skb, hlen)) { gh = skb_gro_header_slow(skb, hlen, off_gnv); if (unlikely(!gh)) goto out; } list_for_each_entry(p, head, list) { if (!NAPI_GRO_CB(p)->same_flow) continue; gh2 = (struct genevehdr *)(p->data + off_gnv); if (gh->opt_len != gh2->opt_len || memcmp(gh, gh2, gh_len)) { NAPI_GRO_CB(p)->same_flow = 0; continue; } } skb_gro_pull(skb, gh_len); skb_gro_postpull_rcsum(skb, gh, gh_len); type = gh->proto_type; if (likely(type == htons(ETH_P_TEB))) return call_gro_receive(eth_gro_receive, head, skb); ptype = gro_find_receive_by_type(type); if (!ptype) goto out; pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb); flush = 0; out: skb_gro_flush_final(skb, pp, flush); return pp; } static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) { struct genevehdr *gh; struct packet_offload *ptype; __be16 type; int gh_len; int err = -ENOSYS; gh = (struct genevehdr *)(skb->data + nhoff); gh_len = geneve_hlen(gh); type = gh->proto_type; /* since skb->encapsulation is set, eth_gro_complete() sets the inner mac header */ if (likely(type == htons(ETH_P_TEB))) return eth_gro_complete(skb, nhoff + gh_len); ptype = gro_find_complete_by_type(type); if (ptype) err = ptype->callbacks.gro_complete(skb, nhoff + gh_len); skb_set_inner_mac_header(skb, nhoff + gh_len); return err; } /* Create new listen socket if needed */ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port, bool ipv6, bool ipv6_rx_csum) { struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_sock *gs; struct socket *sock; struct udp_tunnel_sock_cfg tunnel_cfg; int h; gs = kzalloc(sizeof(*gs), GFP_KERNEL); if (!gs) return ERR_PTR(-ENOMEM); sock = geneve_create_sock(net, ipv6, port, ipv6_rx_csum); if (IS_ERR(sock)) { kfree(gs); return ERR_CAST(sock); } gs->sock = sock; gs->refcnt = 1; for (h = 0; h < VNI_HASH_SIZE; ++h) INIT_HLIST_HEAD(&gs->vni_list[h]); /* Initialize the geneve udp offloads structure */ udp_tunnel_notify_add_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE); /* Mark socket as an encapsulation socket */ memset(&tunnel_cfg, 0, sizeof(tunnel_cfg)); tunnel_cfg.sk_user_data = gs; tunnel_cfg.encap_type = 1; tunnel_cfg.gro_receive = geneve_gro_receive; tunnel_cfg.gro_complete = geneve_gro_complete; tunnel_cfg.encap_rcv = geneve_udp_encap_recv; tunnel_cfg.encap_err_lookup = geneve_udp_encap_err_lookup; tunnel_cfg.encap_destroy = NULL; setup_udp_tunnel_sock(net, sock, &tunnel_cfg); list_add(&gs->list, &gn->sock_list); return gs; } static void __geneve_sock_release(struct geneve_sock *gs) { if (!gs || --gs->refcnt) return; list_del(&gs->list); udp_tunnel_notify_del_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE); udp_tunnel_sock_release(gs->sock); kfree_rcu(gs, rcu); } static void geneve_sock_release(struct geneve_dev *geneve) { struct geneve_sock *gs4 = rtnl_dereference(geneve->sock4); #if IS_ENABLED(CONFIG_IPV6) struct geneve_sock *gs6 = rtnl_dereference(geneve->sock6); rcu_assign_pointer(geneve->sock6, NULL); #endif rcu_assign_pointer(geneve->sock4, NULL); synchronize_net(); __geneve_sock_release(gs4); #if IS_ENABLED(CONFIG_IPV6) __geneve_sock_release(gs6); #endif } static struct geneve_sock *geneve_find_sock(struct geneve_net *gn, sa_family_t family, __be16 dst_port) { struct geneve_sock *gs; list_for_each_entry(gs, &gn->sock_list, list) { if (inet_sk(gs->sock->sk)->inet_sport == dst_port && geneve_get_sk_family(gs) == family) { return gs; } } return NULL; } static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6) { struct net *net = geneve->net; struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_dev_node *node; struct geneve_sock *gs; __u8 vni[3]; __u32 hash; gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET, geneve->cfg.info.key.tp_dst); if (gs) { gs->refcnt++; goto out; } gs = geneve_socket_create(net, geneve->cfg.info.key.tp_dst, ipv6, geneve->cfg.use_udp6_rx_checksums); if (IS_ERR(gs)) return PTR_ERR(gs); out: gs->collect_md = geneve->cfg.collect_md; #if IS_ENABLED(CONFIG_IPV6) if (ipv6) { rcu_assign_pointer(geneve->sock6, gs); node = &geneve->hlist6; } else #endif { rcu_assign_pointer(geneve->sock4, gs); node = &geneve->hlist4; } node->geneve = geneve; tunnel_id_to_vni(geneve->cfg.info.key.tun_id, vni); hash = geneve_net_vni_hash(vni); hlist_add_head_rcu(&node->hlist, &gs->vni_list[hash]); return 0; } static int geneve_open(struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); bool metadata = geneve->cfg.collect_md; bool ipv4, ipv6; int ret = 0; ipv6 = geneve->cfg.info.mode & IP_TUNNEL_INFO_IPV6 || metadata; ipv4 = !ipv6 || metadata; #if IS_ENABLED(CONFIG_IPV6) if (ipv6) { ret = geneve_sock_add(geneve, true); if (ret < 0 && ret != -EAFNOSUPPORT) ipv4 = false; } #endif if (ipv4) ret = geneve_sock_add(geneve, false); if (ret < 0) geneve_sock_release(geneve); return ret; } static int geneve_stop(struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); hlist_del_init_rcu(&geneve->hlist4.hlist); #if IS_ENABLED(CONFIG_IPV6) hlist_del_init_rcu(&geneve->hlist6.hlist); #endif geneve_sock_release(geneve); return 0; } static void geneve_build_header(struct genevehdr *geneveh, const struct ip_tunnel_info *info, __be16 inner_proto) { geneveh->ver = GENEVE_VER; geneveh->opt_len = info->options_len / 4; geneveh->oam = test_bit(IP_TUNNEL_OAM_BIT, info->key.tun_flags); geneveh->critical = test_bit(IP_TUNNEL_CRIT_OPT_BIT, info->key.tun_flags); geneveh->rsvd1 = 0; tunnel_id_to_vni(info->key.tun_id, geneveh->vni); geneveh->proto_type = inner_proto; geneveh->rsvd2 = 0; if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags)) ip_tunnel_info_opts_get(geneveh->options, info); } static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb, const struct ip_tunnel_info *info, bool xnet, int ip_hdr_len, bool inner_proto_inherit) { bool udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); struct genevehdr *gnvh; __be16 inner_proto; int min_headroom; int err; skb_reset_mac_header(skb); skb_scrub_packet(skb, xnet); min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len + GENEVE_BASE_HLEN + info->options_len + ip_hdr_len; err = skb_cow_head(skb, min_headroom); if (unlikely(err)) goto free_dst; err = udp_tunnel_handle_offloads(skb, udp_sum); if (err) goto free_dst; gnvh = __skb_push(skb, sizeof(*gnvh) + info->options_len); inner_proto = inner_proto_inherit ? skb->protocol : htons(ETH_P_TEB); geneve_build_header(gnvh, info, inner_proto); skb_set_inner_protocol(skb, inner_proto); return 0; free_dst: dst_release(dst); return err; } static u8 geneve_get_dsfield(struct sk_buff *skb, struct net_device *dev, const struct ip_tunnel_info *info, bool *use_cache) { struct geneve_dev *geneve = netdev_priv(dev); u8 dsfield; dsfield = info->key.tos; if (dsfield == 1 && !geneve->cfg.collect_md) { dsfield = ip_tunnel_get_dsfield(ip_hdr(skb), skb); *use_cache = false; } return dsfield; } static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct geneve_dev *geneve, const struct ip_tunnel_info *info) { bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); const struct ip_tunnel_key *key = &info->key; struct rtable *rt; bool use_cache; __u8 tos, ttl; __be16 df = 0; __be32 saddr; __be16 sport; int err; if (skb_vlan_inet_prepare(skb, inner_proto_inherit)) return -EINVAL; if (!gs4) return -EIO; use_cache = ip_tunnel_dst_cache_usable(skb, info); tos = geneve_get_dsfield(skb, dev, info, &use_cache); sport = udp_flow_src_port(geneve->net, skb, geneve->cfg.port_min, geneve->cfg.port_max, true); rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr, &info->key, sport, geneve->cfg.info.key.tp_dst, tos, use_cache ? (struct dst_cache *)&info->dst_cache : NULL); if (IS_ERR(rt)) return PTR_ERR(rt); err = skb_tunnel_check_pmtu(skb, &rt->dst, GENEVE_IPV4_HLEN + info->options_len, netif_is_any_bridge_port(dev)); if (err < 0) { dst_release(&rt->dst); return err; } else if (err) { struct ip_tunnel_info *info; info = skb_tunnel_info(skb); if (info) { struct ip_tunnel_info *unclone; unclone = skb_tunnel_info_unclone(skb); if (unlikely(!unclone)) { dst_release(&rt->dst); return -ENOMEM; } unclone->key.u.ipv4.dst = saddr; unclone->key.u.ipv4.src = info->key.u.ipv4.dst; } if (!pskb_may_pull(skb, ETH_HLEN)) { dst_release(&rt->dst); return -EINVAL; } skb->protocol = eth_type_trans(skb, geneve->dev); __netif_rx(skb); dst_release(&rt->dst); return -EMSGSIZE; } tos = ip_tunnel_ecn_encap(tos, ip_hdr(skb), skb); if (geneve->cfg.collect_md) { ttl = key->ttl; df = test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) ? htons(IP_DF) : 0; } else { if (geneve->cfg.ttl_inherit) ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); else ttl = key->ttl; ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); if (geneve->cfg.df == GENEVE_DF_SET) { df = htons(IP_DF); } else if (geneve->cfg.df == GENEVE_DF_INHERIT) { struct ethhdr *eth = skb_eth_hdr(skb); if (ntohs(eth->h_proto) == ETH_P_IPV6) { df = htons(IP_DF); } else if (ntohs(eth->h_proto) == ETH_P_IP) { struct iphdr *iph = ip_hdr(skb); if (iph->frag_off & htons(IP_DF)) df = htons(IP_DF); } } } err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr), inner_proto_inherit); if (unlikely(err)) return err; udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, saddr, info->key.u.ipv4.dst, tos, ttl, df, sport, geneve->cfg.info.key.tp_dst, !net_eq(geneve->net, dev_net(geneve->dev)), !test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags)); return 0; } #if IS_ENABLED(CONFIG_IPV6) static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct geneve_dev *geneve, const struct ip_tunnel_info *info) { bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); const struct ip_tunnel_key *key = &info->key; struct dst_entry *dst = NULL; struct in6_addr saddr; bool use_cache; __u8 prio, ttl; __be16 sport; int err; if (skb_vlan_inet_prepare(skb, inner_proto_inherit)) return -EINVAL; if (!gs6) return -EIO; use_cache = ip_tunnel_dst_cache_usable(skb, info); prio = geneve_get_dsfield(skb, dev, info, &use_cache); sport = udp_flow_src_port(geneve->net, skb, geneve->cfg.port_min, geneve->cfg.port_max, true); dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sock, 0, &saddr, key, sport, geneve->cfg.info.key.tp_dst, prio, use_cache ? (struct dst_cache *)&info->dst_cache : NULL); if (IS_ERR(dst)) return PTR_ERR(dst); err = skb_tunnel_check_pmtu(skb, dst, GENEVE_IPV6_HLEN + info->options_len, netif_is_any_bridge_port(dev)); if (err < 0) { dst_release(dst); return err; } else if (err) { struct ip_tunnel_info *info = skb_tunnel_info(skb); if (info) { struct ip_tunnel_info *unclone; unclone = skb_tunnel_info_unclone(skb); if (unlikely(!unclone)) { dst_release(dst); return -ENOMEM; } unclone->key.u.ipv6.dst = saddr; unclone->key.u.ipv6.src = info->key.u.ipv6.dst; } if (!pskb_may_pull(skb, ETH_HLEN)) { dst_release(dst); return -EINVAL; } skb->protocol = eth_type_trans(skb, geneve->dev); __netif_rx(skb); dst_release(dst); return -EMSGSIZE; } prio = ip_tunnel_ecn_encap(prio, ip_hdr(skb), skb); if (geneve->cfg.collect_md) { ttl = key->ttl; } else { if (geneve->cfg.ttl_inherit) ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); else ttl = key->ttl; ttl = ttl ? : ip6_dst_hoplimit(dst); } err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr), inner_proto_inherit); if (unlikely(err)) return err; udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev, &saddr, &key->u.ipv6.dst, prio, ttl, info->key.label, sport, geneve->cfg.info.key.tp_dst, !test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags)); return 0; } #endif static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); struct ip_tunnel_info *info = NULL; int err; if (geneve->cfg.collect_md) { info = skb_tunnel_info(skb); if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { netdev_dbg(dev, "no tunnel metadata\n"); dev_kfree_skb(skb); dev_dstats_tx_dropped(dev); return NETDEV_TX_OK; } } else { info = &geneve->cfg.info; } rcu_read_lock(); #if IS_ENABLED(CONFIG_IPV6) if (info->mode & IP_TUNNEL_INFO_IPV6) err = geneve6_xmit_skb(skb, dev, geneve, info); else #endif err = geneve_xmit_skb(skb, dev, geneve, info); rcu_read_unlock(); if (likely(!err)) return NETDEV_TX_OK; if (err != -EMSGSIZE) dev_kfree_skb(skb); if (err == -ELOOP) DEV_STATS_INC(dev, collisions); else if (err == -ENETUNREACH) DEV_STATS_INC(dev, tx_carrier_errors); DEV_STATS_INC(dev, tx_errors); return NETDEV_TX_OK; } static int geneve_change_mtu(struct net_device *dev, int new_mtu) { if (new_mtu > dev->max_mtu) new_mtu = dev->max_mtu; else if (new_mtu < dev->min_mtu) new_mtu = dev->min_mtu; WRITE_ONCE(dev->mtu, new_mtu); return 0; } static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) { struct ip_tunnel_info *info = skb_tunnel_info(skb); struct geneve_dev *geneve = netdev_priv(dev); __be16 sport; if (ip_tunnel_info_af(info) == AF_INET) { struct rtable *rt; struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); bool use_cache; __be32 saddr; u8 tos; if (!gs4) return -EIO; use_cache = ip_tunnel_dst_cache_usable(skb, info); tos = geneve_get_dsfield(skb, dev, info, &use_cache); sport = udp_flow_src_port(geneve->net, skb, geneve->cfg.port_min, geneve->cfg.port_max, true); rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr, &info->key, sport, geneve->cfg.info.key.tp_dst, tos, use_cache ? &info->dst_cache : NULL); if (IS_ERR(rt)) return PTR_ERR(rt); ip_rt_put(rt); info->key.u.ipv4.src = saddr; #if IS_ENABLED(CONFIG_IPV6) } else if (ip_tunnel_info_af(info) == AF_INET6) { struct dst_entry *dst; struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); struct in6_addr saddr; bool use_cache; u8 prio; if (!gs6) return -EIO; use_cache = ip_tunnel_dst_cache_usable(skb, info); prio = geneve_get_dsfield(skb, dev, info, &use_cache); sport = udp_flow_src_port(geneve->net, skb, geneve->cfg.port_min, geneve->cfg.port_max, true); dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sock, 0, &saddr, &info->key, sport, geneve->cfg.info.key.tp_dst, prio, use_cache ? &info->dst_cache : NULL); if (IS_ERR(dst)) return PTR_ERR(dst); dst_release(dst); info->key.u.ipv6.src = saddr; #endif } else { return -EINVAL; } info->key.tp_src = sport; info->key.tp_dst = geneve->cfg.info.key.tp_dst; return 0; } static const struct net_device_ops geneve_netdev_ops = { .ndo_init = geneve_init, .ndo_uninit = geneve_uninit, .ndo_open = geneve_open, .ndo_stop = geneve_stop, .ndo_start_xmit = geneve_xmit, .ndo_change_mtu = geneve_change_mtu, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, .ndo_fill_metadata_dst = geneve_fill_metadata_dst, }; static void geneve_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) { strscpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version)); strscpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver)); } static const struct ethtool_ops geneve_ethtool_ops = { .get_drvinfo = geneve_get_drvinfo, .get_link = ethtool_op_get_link, }; /* Info for udev, that this is a virtual tunnel endpoint */ static const struct device_type geneve_type = { .name = "geneve", }; /* Calls the ndo_udp_tunnel_add of the caller in order to * supply the listening GENEVE udp ports. Callers are expected * to implement the ndo_udp_tunnel_add. */ static void geneve_offload_rx_ports(struct net_device *dev, bool push) { struct net *net = dev_net(dev); struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_sock *gs; rcu_read_lock(); list_for_each_entry_rcu(gs, &gn->sock_list, list) { if (push) { udp_tunnel_push_rx_port(dev, gs->sock, UDP_TUNNEL_TYPE_GENEVE); } else { udp_tunnel_drop_rx_port(dev, gs->sock, UDP_TUNNEL_TYPE_GENEVE); } } rcu_read_unlock(); } /* Initialize the device structure. */ static void geneve_setup(struct net_device *dev) { ether_setup(dev); dev->netdev_ops = &geneve_netdev_ops; dev->ethtool_ops = &geneve_ethtool_ops; dev->needs_free_netdev = true; SET_NETDEV_DEVTYPE(dev, &geneve_type); dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; dev->features |= NETIF_F_RXCSUM; dev->features |= NETIF_F_GSO_SOFTWARE; dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST; dev->hw_features |= NETIF_F_RXCSUM; dev->hw_features |= NETIF_F_GSO_SOFTWARE; dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; /* MTU range: 68 - (something less than 65535) */ dev->min_mtu = ETH_MIN_MTU; /* The max_mtu calculation does not take account of GENEVE * options, to avoid excluding potentially valid * configurations. This will be further reduced by IPvX hdr size. */ dev->max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len; netif_keep_dst(dev); dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; dev->lltx = true; eth_hw_addr_random(dev); } static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { [IFLA_GENEVE_UNSPEC] = { .strict_start_type = IFLA_GENEVE_INNER_PROTO_INHERIT }, [IFLA_GENEVE_ID] = { .type = NLA_U32 }, [IFLA_GENEVE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) }, [IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) }, [IFLA_GENEVE_TTL] = { .type = NLA_U8 }, [IFLA_GENEVE_TOS] = { .type = NLA_U8 }, [IFLA_GENEVE_LABEL] = { .type = NLA_U32 }, [IFLA_GENEVE_PORT] = { .type = NLA_U16 }, [IFLA_GENEVE_COLLECT_METADATA] = { .type = NLA_FLAG }, [IFLA_GENEVE_UDP_CSUM] = { .type = NLA_U8 }, [IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 }, [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 }, [IFLA_GENEVE_TTL_INHERIT] = { .type = NLA_U8 }, [IFLA_GENEVE_DF] = { .type = NLA_U8 }, [IFLA_GENEVE_INNER_PROTO_INHERIT] = { .type = NLA_FLAG }, [IFLA_GENEVE_PORT_RANGE] = NLA_POLICY_EXACT_LEN(sizeof(struct ifla_geneve_port_range)), }; static int geneve_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { if (tb[IFLA_ADDRESS]) { if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) { NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS], "Provided link layer address is not Ethernet"); return -EINVAL; } if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) { NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS], "Provided Ethernet address is not unicast"); return -EADDRNOTAVAIL; } } if (!data) { NL_SET_ERR_MSG(extack, "Not enough attributes provided to perform the operation"); return -EINVAL; } if (data[IFLA_GENEVE_ID]) { __u32 vni = nla_get_u32(data[IFLA_GENEVE_ID]); if (vni >= GENEVE_N_VID) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_ID], "Geneve ID must be lower than 16777216"); return -ERANGE; } } if (data[IFLA_GENEVE_DF]) { enum ifla_geneve_df df = nla_get_u8(data[IFLA_GENEVE_DF]); if (df < 0 || df > GENEVE_DF_MAX) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_DF], "Invalid DF attribute"); return -EINVAL; } } if (data[IFLA_GENEVE_PORT_RANGE]) { const struct ifla_geneve_port_range *p; p = nla_data(data[IFLA_GENEVE_PORT_RANGE]); if (ntohs(p->high) < ntohs(p->low)) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_PORT_RANGE], "Invalid source port range"); return -EINVAL; } } return 0; } static struct geneve_dev *geneve_find_dev(struct geneve_net *gn, const struct ip_tunnel_info *info, bool *tun_on_same_port, bool *tun_collect_md) { struct geneve_dev *geneve, *t = NULL; *tun_on_same_port = false; *tun_collect_md = false; list_for_each_entry(geneve, &gn->geneve_list, next) { if (info->key.tp_dst == geneve->cfg.info.key.tp_dst) { *tun_collect_md = geneve->cfg.collect_md; *tun_on_same_port = true; } if (info->key.tun_id == geneve->cfg.info.key.tun_id && info->key.tp_dst == geneve->cfg.info.key.tp_dst && !memcmp(&info->key.u, &geneve->cfg.info.key.u, sizeof(info->key.u))) t = geneve; } return t; } static bool is_tnl_info_zero(const struct ip_tunnel_info *info) { return !(info->key.tun_id || info->key.tos || !ip_tunnel_flags_empty(info->key.tun_flags) || info->key.ttl || info->key.label || info->key.tp_src || memchr_inv(&info->key.u, 0, sizeof(info->key.u))); } static bool geneve_dst_addr_equal(struct ip_tunnel_info *a, struct ip_tunnel_info *b) { if (ip_tunnel_info_af(a) == AF_INET) return a->key.u.ipv4.dst == b->key.u.ipv4.dst; else return ipv6_addr_equal(&a->key.u.ipv6.dst, &b->key.u.ipv6.dst); } static int geneve_configure(struct net *net, struct net_device *dev, struct netlink_ext_ack *extack, const struct geneve_config *cfg) { struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_dev *t, *geneve = netdev_priv(dev); const struct ip_tunnel_info *info = &cfg->info; bool tun_collect_md, tun_on_same_port; int err, encap_len; if (cfg->collect_md && !is_tnl_info_zero(info)) { NL_SET_ERR_MSG(extack, "Device is externally controlled, so attributes (VNI, Port, and so on) must not be specified"); return -EINVAL; } geneve->net = net; geneve->dev = dev; t = geneve_find_dev(gn, info, &tun_on_same_port, &tun_collect_md); if (t) return -EBUSY; /* make enough headroom for basic scenario */ encap_len = GENEVE_BASE_HLEN + ETH_HLEN; if (!cfg->collect_md && ip_tunnel_info_af(info) == AF_INET) { encap_len += sizeof(struct iphdr); dev->max_mtu -= sizeof(struct iphdr); } else { encap_len += sizeof(struct ipv6hdr); dev->max_mtu -= sizeof(struct ipv6hdr); } dev->needed_headroom = encap_len + ETH_HLEN; if (cfg->collect_md) { if (tun_on_same_port) { NL_SET_ERR_MSG(extack, "There can be only one externally controlled device on a destination port"); return -EPERM; } } else { if (tun_collect_md) { NL_SET_ERR_MSG(extack, "There already exists an externally controlled device on this destination port"); return -EPERM; } } dst_cache_reset(&geneve->cfg.info.dst_cache); memcpy(&geneve->cfg, cfg, sizeof(*cfg)); if (geneve->cfg.inner_proto_inherit) { dev->header_ops = NULL; dev->type = ARPHRD_NONE; dev->hard_header_len = 0; dev->addr_len = 0; dev->flags = IFF_POINTOPOINT | IFF_NOARP; } err = register_netdevice(dev); if (err) return err; list_add(&geneve->next, &gn->geneve_list); return 0; } static void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port) { memset(info, 0, sizeof(*info)); info->key.tp_dst = htons(dst_port); } static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack, struct geneve_config *cfg, bool changelink) { struct ip_tunnel_info *info = &cfg->info; int attrtype; if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6]) { NL_SET_ERR_MSG(extack, "Cannot specify both IPv4 and IPv6 Remote addresses"); return -EINVAL; } if (data[IFLA_GENEVE_REMOTE]) { if (changelink && (ip_tunnel_info_af(info) == AF_INET6)) { attrtype = IFLA_GENEVE_REMOTE; goto change_notsup; } info->key.u.ipv4.dst = nla_get_in_addr(data[IFLA_GENEVE_REMOTE]); if (ipv4_is_multicast(info->key.u.ipv4.dst)) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE], "Remote IPv4 address cannot be Multicast"); return -EINVAL; } } if (data[IFLA_GENEVE_REMOTE6]) { #if IS_ENABLED(CONFIG_IPV6) if (changelink && (ip_tunnel_info_af(info) == AF_INET)) { attrtype = IFLA_GENEVE_REMOTE6; goto change_notsup; } info->mode = IP_TUNNEL_INFO_IPV6; info->key.u.ipv6.dst = nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]); if (ipv6_addr_type(&info->key.u.ipv6.dst) & IPV6_ADDR_LINKLOCAL) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], "Remote IPv6 address cannot be link-local"); return -EINVAL; } if (ipv6_addr_is_multicast(&info->key.u.ipv6.dst)) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], "Remote IPv6 address cannot be Multicast"); return -EINVAL; } __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); cfg->use_udp6_rx_checksums = true; #else NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], "IPv6 support not enabled in the kernel"); return -EPFNOSUPPORT; #endif } if (data[IFLA_GENEVE_ID]) { __u32 vni; __u8 tvni[3]; __be64 tunid; vni = nla_get_u32(data[IFLA_GENEVE_ID]); tvni[0] = (vni & 0x00ff0000) >> 16; tvni[1] = (vni & 0x0000ff00) >> 8; tvni[2] = vni & 0x000000ff; tunid = vni_to_tunnel_id(tvni); if (changelink && (tunid != info->key.tun_id)) { attrtype = IFLA_GENEVE_ID; goto change_notsup; } info->key.tun_id = tunid; } if (data[IFLA_GENEVE_TTL_INHERIT]) { if (nla_get_u8(data[IFLA_GENEVE_TTL_INHERIT])) cfg->ttl_inherit = true; else cfg->ttl_inherit = false; } else if (data[IFLA_GENEVE_TTL]) { info->key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]); cfg->ttl_inherit = false; } if (data[IFLA_GENEVE_TOS]) info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]); if (data[IFLA_GENEVE_DF]) cfg->df = nla_get_u8(data[IFLA_GENEVE_DF]); if (data[IFLA_GENEVE_LABEL]) { info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) & IPV6_FLOWLABEL_MASK; if (info->key.label && (!(info->mode & IP_TUNNEL_INFO_IPV6))) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LABEL], "Label attribute only applies for IPv6 Geneve devices"); return -EINVAL; } } if (data[IFLA_GENEVE_PORT]) { if (changelink) { attrtype = IFLA_GENEVE_PORT; goto change_notsup; } info->key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]); } if (data[IFLA_GENEVE_PORT_RANGE]) { const struct ifla_geneve_port_range *p; if (changelink) { attrtype = IFLA_GENEVE_PORT_RANGE; goto change_notsup; } p = nla_data(data[IFLA_GENEVE_PORT_RANGE]); cfg->port_min = ntohs(p->low); cfg->port_max = ntohs(p->high); } if (data[IFLA_GENEVE_COLLECT_METADATA]) { if (changelink) { attrtype = IFLA_GENEVE_COLLECT_METADATA; goto change_notsup; } cfg->collect_md = true; } if (data[IFLA_GENEVE_UDP_CSUM]) { if (changelink) { attrtype = IFLA_GENEVE_UDP_CSUM; goto change_notsup; } if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); } if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) { #if IS_ENABLED(CONFIG_IPV6) if (changelink) { attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_TX; goto change_notsup; } if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX])) __clear_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); #else NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX], "IPv6 support not enabled in the kernel"); return -EPFNOSUPPORT; #endif } if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]) { #if IS_ENABLED(CONFIG_IPV6) if (changelink) { attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_RX; goto change_notsup; } if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX])) cfg->use_udp6_rx_checksums = false; #else NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX], "IPv6 support not enabled in the kernel"); return -EPFNOSUPPORT; #endif } if (data[IFLA_GENEVE_INNER_PROTO_INHERIT]) { if (changelink) { attrtype = IFLA_GENEVE_INNER_PROTO_INHERIT; goto change_notsup; } cfg->inner_proto_inherit = true; } return 0; change_notsup: NL_SET_ERR_MSG_ATTR(extack, data[attrtype], "Changing VNI, Port, endpoint IP address family, external, inner_proto_inherit, and UDP checksum attributes are not supported"); return -EOPNOTSUPP; } static void geneve_link_config(struct net_device *dev, struct ip_tunnel_info *info, struct nlattr *tb[]) { struct geneve_dev *geneve = netdev_priv(dev); int ldev_mtu = 0; if (tb[IFLA_MTU]) { geneve_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); return; } switch (ip_tunnel_info_af(info)) { case AF_INET: { struct flowi4 fl4 = { .daddr = info->key.u.ipv4.dst }; struct rtable *rt = ip_route_output_key(geneve->net, &fl4); if (!IS_ERR(rt) && rt->dst.dev) { ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV4_HLEN; ip_rt_put(rt); } break; } #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: { struct rt6_info *rt; if (!__in6_dev_get(dev)) break; rt = rt6_lookup(geneve->net, &info->key.u.ipv6.dst, NULL, 0, NULL, 0); if (rt && rt->dst.dev) ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN; ip6_rt_put(rt); break; } #endif } if (ldev_mtu <= 0) return; geneve_change_mtu(dev, ldev_mtu - info->options_len); } static int geneve_newlink(struct net_device *dev, struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { struct net *link_net = rtnl_newlink_link_net(params); struct nlattr **data = params->data; struct nlattr **tb = params->tb; struct geneve_config cfg = { .df = GENEVE_DF_UNSET, .use_udp6_rx_checksums = false, .ttl_inherit = false, .collect_md = false, .port_min = 1, .port_max = USHRT_MAX, }; int err; init_tnl_info(&cfg.info, GENEVE_UDP_PORT); err = geneve_nl2info(tb, data, extack, &cfg, false); if (err) return err; err = geneve_configure(link_net, dev, extack, &cfg); if (err) return err; geneve_link_config(dev, &cfg.info, tb); return 0; } /* Quiesces the geneve device data path for both TX and RX. * * On transmit geneve checks for non-NULL geneve_sock before it proceeds. * So, if we set that socket to NULL under RCU and wait for synchronize_net() * to complete for the existing set of in-flight packets to be transmitted, * then we would have quiesced the transmit data path. All the future packets * will get dropped until we unquiesce the data path. * * On receive geneve dereference the geneve_sock stashed in the socket. So, * if we set that to NULL under RCU and wait for synchronize_net() to * complete, then we would have quiesced the receive data path. */ static void geneve_quiesce(struct geneve_dev *geneve, struct geneve_sock **gs4, struct geneve_sock **gs6) { *gs4 = rtnl_dereference(geneve->sock4); rcu_assign_pointer(geneve->sock4, NULL); if (*gs4) rcu_assign_sk_user_data((*gs4)->sock->sk, NULL); #if IS_ENABLED(CONFIG_IPV6) *gs6 = rtnl_dereference(geneve->sock6); rcu_assign_pointer(geneve->sock6, NULL); if (*gs6) rcu_assign_sk_user_data((*gs6)->sock->sk, NULL); #else *gs6 = NULL; #endif synchronize_net(); } /* Resumes the geneve device data path for both TX and RX. */ static void geneve_unquiesce(struct geneve_dev *geneve, struct geneve_sock *gs4, struct geneve_sock __maybe_unused *gs6) { rcu_assign_pointer(geneve->sock4, gs4); if (gs4) rcu_assign_sk_user_data(gs4->sock->sk, gs4); #if IS_ENABLED(CONFIG_IPV6) rcu_assign_pointer(geneve->sock6, gs6); if (gs6) rcu_assign_sk_user_data(gs6->sock->sk, gs6); #endif synchronize_net(); } static int geneve_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct geneve_dev *geneve = netdev_priv(dev); struct geneve_sock *gs4, *gs6; struct geneve_config cfg; int err; /* If the geneve device is configured for metadata (or externally * controlled, for example, OVS), then nothing can be changed. */ if (geneve->cfg.collect_md) return -EOPNOTSUPP; /* Start with the existing info. */ memcpy(&cfg, &geneve->cfg, sizeof(cfg)); err = geneve_nl2info(tb, data, extack, &cfg, true); if (err) return err; if (!geneve_dst_addr_equal(&geneve->cfg.info, &cfg.info)) { dst_cache_reset(&cfg.info.dst_cache); geneve_link_config(dev, &cfg.info, tb); } geneve_quiesce(geneve, &gs4, &gs6); memcpy(&geneve->cfg, &cfg, sizeof(cfg)); geneve_unquiesce(geneve, gs4, gs6); return 0; } static void geneve_dellink(struct net_device *dev, struct list_head *head) { struct geneve_dev *geneve = netdev_priv(dev); list_del(&geneve->next); unregister_netdevice_queue(dev, head); } static size_t geneve_get_size(const struct net_device *dev) { return nla_total_size(sizeof(__u32)) + /* IFLA_GENEVE_ID */ nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_DF */ nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */ nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */ nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */ nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL_INHERIT */ nla_total_size(0) + /* IFLA_GENEVE_INNER_PROTO_INHERIT */ nla_total_size(sizeof(struct ifla_geneve_port_range)) + /* IFLA_GENEVE_PORT_RANGE */ 0; } static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct geneve_dev *geneve = netdev_priv(dev); struct ip_tunnel_info *info = &geneve->cfg.info; bool ttl_inherit = geneve->cfg.ttl_inherit; bool metadata = geneve->cfg.collect_md; struct ifla_geneve_port_range ports = { .low = htons(geneve->cfg.port_min), .high = htons(geneve->cfg.port_max), }; __u8 tmp_vni[3]; __u32 vni; tunnel_id_to_vni(info->key.tun_id, tmp_vni); vni = (tmp_vni[0] << 16) | (tmp_vni[1] << 8) | tmp_vni[2]; if (nla_put_u32(skb, IFLA_GENEVE_ID, vni)) goto nla_put_failure; if (!metadata && ip_tunnel_info_af(info) == AF_INET) { if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE, info->key.u.ipv4.dst)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM, test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags))) goto nla_put_failure; #if IS_ENABLED(CONFIG_IPV6) } else if (!metadata) { if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6, &info->key.u.ipv6.dst)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, !test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags))) goto nla_put_failure; #endif } if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) || nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) || nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_GENEVE_DF, geneve->cfg.df)) goto nla_put_failure; if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst)) goto nla_put_failure; if (metadata && nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA)) goto nla_put_failure; #if IS_ENABLED(CONFIG_IPV6) if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, !geneve->cfg.use_udp6_rx_checksums)) goto nla_put_failure; #endif if (nla_put_u8(skb, IFLA_GENEVE_TTL_INHERIT, ttl_inherit)) goto nla_put_failure; if (geneve->cfg.inner_proto_inherit && nla_put_flag(skb, IFLA_GENEVE_INNER_PROTO_INHERIT)) goto nla_put_failure; if (nla_put(skb, IFLA_GENEVE_PORT_RANGE, sizeof(ports), &ports)) goto nla_put_failure; return 0; nla_put_failure: return -EMSGSIZE; } static struct rtnl_link_ops geneve_link_ops __read_mostly = { .kind = "geneve", .maxtype = IFLA_GENEVE_MAX, .policy = geneve_policy, .priv_size = sizeof(struct geneve_dev), .setup = geneve_setup, .validate = geneve_validate, .newlink = geneve_newlink, .changelink = geneve_changelink, .dellink = geneve_dellink, .get_size = geneve_get_size, .fill_info = geneve_fill_info, }; struct net_device *geneve_dev_create_fb(struct net *net, const char *name, u8 name_assign_type, u16 dst_port) { struct nlattr *tb[IFLA_MAX + 1]; struct net_device *dev; LIST_HEAD(list_kill); int err; struct geneve_config cfg = { .df = GENEVE_DF_UNSET, .use_udp6_rx_checksums = true, .ttl_inherit = false, .collect_md = true, .port_min = 1, .port_max = USHRT_MAX, }; memset(tb, 0, sizeof(tb)); dev = rtnl_create_link(net, name, name_assign_type, &geneve_link_ops, tb, NULL); if (IS_ERR(dev)) return dev; init_tnl_info(&cfg.info, dst_port); err = geneve_configure(net, dev, NULL, &cfg); if (err) { free_netdev(dev); return ERR_PTR(err); } /* openvswitch users expect packet sizes to be unrestricted, * so set the largest MTU we can. */ err = geneve_change_mtu(dev, IP_MAX_MTU); if (err) goto err; err = rtnl_configure_link(dev, NULL, 0, NULL); if (err < 0) goto err; return dev; err: geneve_dellink(dev, &list_kill); unregister_netdevice_many(&list_kill); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(geneve_dev_create_fb); static int geneve_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (event == NETDEV_UDP_TUNNEL_PUSH_INFO) geneve_offload_rx_ports(dev, true); else if (event == NETDEV_UDP_TUNNEL_DROP_INFO) geneve_offload_rx_ports(dev, false); return NOTIFY_DONE; } static struct notifier_block geneve_notifier_block __read_mostly = { .notifier_call = geneve_netdevice_event, }; static __net_init int geneve_init_net(struct net *net) { struct geneve_net *gn = net_generic(net, geneve_net_id); INIT_LIST_HEAD(&gn->geneve_list); INIT_LIST_HEAD(&gn->sock_list); return 0; } static void geneve_destroy_tunnels(struct net *net, struct list_head *head) { struct geneve_net *gn = net_generic(net, geneve_net_id); struct geneve_dev *geneve, *next; list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) geneve_dellink(geneve->dev, head); } static void __net_exit geneve_exit_batch_rtnl(struct list_head *net_list, struct list_head *dev_to_kill) { struct net *net; list_for_each_entry(net, net_list, exit_list) geneve_destroy_tunnels(net, dev_to_kill); } static void __net_exit geneve_exit_net(struct net *net) { const struct geneve_net *gn = net_generic(net, geneve_net_id); WARN_ON_ONCE(!list_empty(&gn->sock_list)); } static struct pernet_operations geneve_net_ops = { .init = geneve_init_net, .exit_batch_rtnl = geneve_exit_batch_rtnl, .exit = geneve_exit_net, .id = &geneve_net_id, .size = sizeof(struct geneve_net), }; static int __init geneve_init_module(void) { int rc; rc = register_pernet_subsys(&geneve_net_ops); if (rc) goto out1; rc = register_netdevice_notifier(&geneve_notifier_block); if (rc) goto out2; rc = rtnl_link_register(&geneve_link_ops); if (rc) goto out3; return 0; out3: unregister_netdevice_notifier(&geneve_notifier_block); out2: unregister_pernet_subsys(&geneve_net_ops); out1: return rc; } late_initcall(geneve_init_module); static void __exit geneve_cleanup_module(void) { rtnl_link_unregister(&geneve_link_ops); unregister_netdevice_notifier(&geneve_notifier_block); unregister_pernet_subsys(&geneve_net_ops); } module_exit(geneve_cleanup_module); MODULE_LICENSE("GPL"); MODULE_VERSION(GENEVE_NETDEV_VER); MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>"); MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic"); MODULE_ALIAS_RTNL_LINK("geneve"); |
5 5 5 5 5 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Scatterlist Cryptographic API. * * Procfs information. * * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> * Copyright (c) 2005 Herbert Xu <herbert@gondor.apana.org.au> */ #include <linux/atomic.h> #include <linux/init.h> #include <linux/crypto.h> #include <linux/fips.h> #include <linux/module.h> /* for module_name() */ #include <linux/rwsem.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include "internal.h" static void *c_start(struct seq_file *m, loff_t *pos) { down_read(&crypto_alg_sem); return seq_list_start(&crypto_alg_list, *pos); } static void *c_next(struct seq_file *m, void *p, loff_t *pos) { return seq_list_next(p, &crypto_alg_list, pos); } static void c_stop(struct seq_file *m, void *p) { up_read(&crypto_alg_sem); } static int c_show(struct seq_file *m, void *p) { struct crypto_alg *alg = list_entry(p, struct crypto_alg, cra_list); seq_printf(m, "name : %s\n", alg->cra_name); seq_printf(m, "driver : %s\n", alg->cra_driver_name); seq_printf(m, "module : %s\n", module_name(alg->cra_module)); seq_printf(m, "priority : %d\n", alg->cra_priority); seq_printf(m, "refcnt : %u\n", refcount_read(&alg->cra_refcnt)); seq_printf(m, "selftest : %s\n", (alg->cra_flags & CRYPTO_ALG_TESTED) ? "passed" : "unknown"); seq_printf(m, "internal : %s\n", str_yes_no(alg->cra_flags & CRYPTO_ALG_INTERNAL)); if (fips_enabled) seq_printf(m, "fips : %s\n", str_no_yes(alg->cra_flags & CRYPTO_ALG_FIPS_INTERNAL)); if (alg->cra_flags & CRYPTO_ALG_LARVAL) { seq_printf(m, "type : larval\n"); seq_printf(m, "flags : 0x%x\n", alg->cra_flags); goto out; } if (alg->cra_type && alg->cra_type->show) { alg->cra_type->show(m, alg); goto out; } switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) { case CRYPTO_ALG_TYPE_CIPHER: seq_printf(m, "type : cipher\n"); seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); seq_printf(m, "min keysize : %u\n", alg->cra_cipher.cia_min_keysize); seq_printf(m, "max keysize : %u\n", alg->cra_cipher.cia_max_keysize); break; default: seq_printf(m, "type : unknown\n"); break; } out: seq_putc(m, '\n'); return 0; } static const struct seq_operations crypto_seq_ops = { .start = c_start, .next = c_next, .stop = c_stop, .show = c_show }; void __init crypto_init_proc(void) { proc_create_seq("crypto", 0, NULL, &crypto_seq_ops); } void __exit crypto_exit_proc(void) { remove_proc_entry("crypto", NULL); } |
11 1 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | // SPDX-License-Identifier: GPL-2.0-only /* module that allows mangling of the arp payload */ #include <linux/module.h> #include <linux/netfilter.h> #include <linux/netfilter_arp/arpt_mangle.h> #include <net/sock.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); MODULE_DESCRIPTION("arptables arp payload mangle target"); static unsigned int target(struct sk_buff *skb, const struct xt_action_param *par) { const struct arpt_mangle *mangle = par->targinfo; const struct arphdr *arp; unsigned char *arpptr; int pln, hln; if (skb_ensure_writable(skb, skb->len)) return NF_DROP; arp = arp_hdr(skb); arpptr = skb_network_header(skb) + sizeof(*arp); pln = arp->ar_pln; hln = arp->ar_hln; /* We assume that pln and hln were checked in the match */ if (mangle->flags & ARPT_MANGLE_SDEV) { if (ARPT_DEV_ADDR_LEN_MAX < hln || (arpptr + hln > skb_tail_pointer(skb))) return NF_DROP; memcpy(arpptr, mangle->src_devaddr, hln); } arpptr += hln; if (mangle->flags & ARPT_MANGLE_SIP) { if (ARPT_MANGLE_ADDR_LEN_MAX < pln || (arpptr + pln > skb_tail_pointer(skb))) return NF_DROP; memcpy(arpptr, &mangle->u_s.src_ip, pln); } arpptr += pln; if (mangle->flags & ARPT_MANGLE_TDEV) { if (ARPT_DEV_ADDR_LEN_MAX < hln || (arpptr + hln > skb_tail_pointer(skb))) return NF_DROP; memcpy(arpptr, mangle->tgt_devaddr, hln); } arpptr += hln; if (mangle->flags & ARPT_MANGLE_TIP) { if (ARPT_MANGLE_ADDR_LEN_MAX < pln || (arpptr + pln > skb_tail_pointer(skb))) return NF_DROP; memcpy(arpptr, &mangle->u_t.tgt_ip, pln); } return mangle->target; } static int checkentry(const struct xt_tgchk_param *par) { const struct arpt_mangle *mangle = par->targinfo; if (mangle->flags & ~ARPT_MANGLE_MASK || !(mangle->flags & ARPT_MANGLE_MASK)) return -EINVAL; if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT && mangle->target != XT_CONTINUE) return -EINVAL; return 0; } static struct xt_target arpt_mangle_reg __read_mostly = { .name = "mangle", .family = NFPROTO_ARP, .target = target, .targetsize = sizeof(struct arpt_mangle), .checkentry = checkentry, .me = THIS_MODULE, }; static int __init arpt_mangle_init(void) { return xt_register_target(&arpt_mangle_reg); } static void __exit arpt_mangle_fini(void) { xt_unregister_target(&arpt_mangle_reg); } module_init(arpt_mangle_init); module_exit(arpt_mangle_fini); |
2 87 87 3 2 2 1 17 1 1 29 1 28 28 2 4 22 3 20 3 23 24 24 24 5 11 11 1 10 11 1 5 2 1 36 1 35 26 8 6 34 19 3 14 5 3 18 18 15 3 1 8 7 2 62 62 62 56 5 4 3 60 30 1 29 29 2 1 3 2 2 22 23 1 15 9 23 20 5 112 1 1 1 100 1 1 5 1 16 90 1 71 16 13 1 59 96 94 1 95 19 76 78 3 1 1 2 1 295 12 49 11 59 1 1 1 1 3 1 1 5 4 1 1 1 4 2 3 1 3 1 1 2 3 1 1 3 3 3 1 83 3 1 12 1 1 4 11 29 295 296 234 39 25 25 25 6 19 7 25 3 2 11 11 11 2 11 5 5 1 1 3 3 95 1 43 51 18 34 15 1 13 35 96 96 64 32 77 18 32 64 42 42 42 42 12 42 42 42 42 65 66 48 17 17 7 7 7 3 6 4 26 26 34 34 34 34 34 32 14 17 6 20 6 7 2 131 127 2 8 133 135 135 135 24 24 24 24 24 24 11 11 28 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 | // SPDX-License-Identifier: GPL-2.0-or-later /* * History: * Started: Aug 9 by Lawrence Foard (entropy@world.std.com), * to allow user process control of SCSI devices. * Development Sponsored by Killy Corp. NY NY * * Original driver (sg.c): * Copyright (C) 1992 Lawrence Foard * Version 2 and 3 extensions to driver: * Copyright (C) 1998 - 2014 Douglas Gilbert */ static int sg_version_num = 30536; /* 2 digits for each component */ #define SG_VERSION_STR "3.5.36" /* * D. P. Gilbert (dgilbert@interlog.com), notes: * - scsi logging is available via SCSI_LOG_TIMEOUT macros. First * the kernel/module needs to be built with CONFIG_SCSI_LOGGING * (otherwise the macros compile to empty statements). * */ #include <linux/module.h> #include <linux/fs.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/errno.h> #include <linux/mtio.h> #include <linux/ioctl.h> #include <linux/major.h> #include <linux/slab.h> #include <linux/fcntl.h> #include <linux/init.h> #include <linux/poll.h> #include <linux/moduleparam.h> #include <linux/cdev.h> #include <linux/idr.h> #include <linux/seq_file.h> #include <linux/blkdev.h> #include <linux/delay.h> #include <linux/blktrace_api.h> #include <linux/mutex.h> #include <linux/atomic.h> #include <linux/ratelimit.h> #include <linux/uio.h> #include <linux/cred.h> /* for sg_check_file_access() */ #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_dbg.h> #include <scsi/scsi_device.h> #include <scsi/scsi_driver.h> #include <scsi/scsi_eh.h> #include <scsi/scsi_host.h> #include <scsi/scsi_ioctl.h> #include <scsi/scsi_tcq.h> #include <scsi/sg.h> #include "scsi_logging.h" #ifdef CONFIG_SCSI_PROC_FS #include <linux/proc_fs.h> static char *sg_version_date = "20140603"; static int sg_proc_init(void); #endif #define SG_ALLOW_DIO_DEF 0 #define SG_MAX_DEVS (1 << MINORBITS) /* SG_MAX_CDB_SIZE should be 260 (spc4r37 section 3.1.30) however the type * of sg_io_hdr::cmd_len can only represent 255. All SCSI commands greater * than 16 bytes are "variable length" whose length is a multiple of 4 */ #define SG_MAX_CDB_SIZE 252 #define SG_DEFAULT_TIMEOUT mult_frac(SG_DEFAULT_TIMEOUT_USER, HZ, USER_HZ) static int sg_big_buff = SG_DEF_RESERVED_SIZE; /* N.B. This variable is readable and writeable via /proc/scsi/sg/def_reserved_size . Each time sg_open() is called a buffer of this size (or less if there is not enough memory) will be reserved for use by this file descriptor. [Deprecated usage: this variable is also readable via /proc/sys/kernel/sg-big-buff if the sg driver is built into the kernel (i.e. it is not a module).] */ static int def_reserved_size = -1; /* picks up init parameter */ static int sg_allow_dio = SG_ALLOW_DIO_DEF; static int scatter_elem_sz = SG_SCATTER_SZ; static int scatter_elem_sz_prev = SG_SCATTER_SZ; #define SG_SECTOR_SZ 512 static int sg_add_device(struct device *); static void sg_remove_device(struct device *); static DEFINE_IDR(sg_index_idr); static DEFINE_RWLOCK(sg_index_lock); /* Also used to lock file descriptor list for device */ static struct class_interface sg_interface = { .add_dev = sg_add_device, .remove_dev = sg_remove_device, }; typedef struct sg_scatter_hold { /* holding area for scsi scatter gather info */ unsigned short k_use_sg; /* Count of kernel scatter-gather pieces */ unsigned sglist_len; /* size of malloc'd scatter-gather list ++ */ unsigned bufflen; /* Size of (aggregate) data buffer */ struct page **pages; int page_order; char dio_in_use; /* 0->indirect IO (or mmap), 1->dio */ unsigned char cmd_opcode; /* first byte of command */ } Sg_scatter_hold; struct sg_device; /* forward declarations */ struct sg_fd; typedef struct sg_request { /* SG_MAX_QUEUE requests outstanding per file */ struct list_head entry; /* list entry */ struct sg_fd *parentfp; /* NULL -> not in use */ Sg_scatter_hold data; /* hold buffer, perhaps scatter list */ sg_io_hdr_t header; /* scsi command+info, see <scsi/sg.h> */ unsigned char sense_b[SCSI_SENSE_BUFFERSIZE]; char res_used; /* 1 -> using reserve buffer, 0 -> not ... */ char orphan; /* 1 -> drop on sight, 0 -> normal */ char sg_io_owned; /* 1 -> packet belongs to SG_IO */ /* done protected by rq_list_lock */ char done; /* 0->before bh, 1->before read, 2->read */ struct request *rq; struct bio *bio; struct execute_work ew; } Sg_request; typedef struct sg_fd { /* holds the state of a file descriptor */ struct list_head sfd_siblings; /* protected by device's sfd_lock */ struct sg_device *parentdp; /* owning device */ wait_queue_head_t read_wait; /* queue read until command done */ rwlock_t rq_list_lock; /* protect access to list in req_arr */ struct mutex f_mutex; /* protect against changes in this fd */ int timeout; /* defaults to SG_DEFAULT_TIMEOUT */ int timeout_user; /* defaults to SG_DEFAULT_TIMEOUT_USER */ Sg_scatter_hold reserve; /* buffer held for this file descriptor */ struct list_head rq_list; /* head of request list */ struct fasync_struct *async_qp; /* used by asynchronous notification */ Sg_request req_arr[SG_MAX_QUEUE]; /* used as singly-linked list */ char force_packid; /* 1 -> pack_id input to read(), 0 -> ignored */ char cmd_q; /* 1 -> allow command queuing, 0 -> don't */ unsigned char next_cmd_len; /* 0: automatic, >0: use on next write() */ char keep_orphan; /* 0 -> drop orphan (def), 1 -> keep for read() */ char mmap_called; /* 0 -> mmap() never called on this fd */ char res_in_use; /* 1 -> 'reserve' array in use */ struct kref f_ref; struct execute_work ew; } Sg_fd; typedef struct sg_device { /* holds the state of each scsi generic device */ struct scsi_device *device; wait_queue_head_t open_wait; /* queue open() when O_EXCL present */ struct mutex open_rel_lock; /* held when in open() or release() */ int sg_tablesize; /* adapter's max scatter-gather table size */ u32 index; /* device index number */ struct list_head sfds; rwlock_t sfd_lock; /* protect access to sfd list */ atomic_t detaching; /* 0->device usable, 1->device detaching */ bool exclude; /* 1->open(O_EXCL) succeeded and is active */ int open_cnt; /* count of opens (perhaps < num(sfds) ) */ char sgdebug; /* 0->off, 1->sense, 9->dump dev, 10-> all devs */ char name[DISK_NAME_LEN]; struct cdev * cdev; /* char_dev [sysfs: /sys/cdev/major/sg<n>] */ struct kref d_ref; } Sg_device; /* tasklet or soft irq callback */ static enum rq_end_io_ret sg_rq_end_io(struct request *rq, blk_status_t status); static int sg_start_req(Sg_request *srp, unsigned char *cmd); static int sg_finish_rem_req(Sg_request * srp); static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size); static ssize_t sg_new_read(Sg_fd * sfp, char __user *buf, size_t count, Sg_request * srp); static ssize_t sg_new_write(Sg_fd *sfp, struct file *file, const char __user *buf, size_t count, int blocking, int read_only, int sg_io_owned, Sg_request **o_srp); static int sg_common_write(Sg_fd * sfp, Sg_request * srp, unsigned char *cmnd, int timeout, int blocking); static int sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer); static void sg_remove_scat(Sg_fd * sfp, Sg_scatter_hold * schp); static void sg_build_reserve(Sg_fd * sfp, int req_size); static void sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size); static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp); static Sg_fd *sg_add_sfp(Sg_device * sdp); static void sg_remove_sfp(struct kref *); static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id, bool *busy); static Sg_request *sg_add_request(Sg_fd * sfp); static int sg_remove_request(Sg_fd * sfp, Sg_request * srp); static Sg_device *sg_get_dev(int dev); static void sg_device_destroy(struct kref *kref); #define SZ_SG_HEADER sizeof(struct sg_header) #define SZ_SG_IO_HDR sizeof(sg_io_hdr_t) #define SZ_SG_IOVEC sizeof(sg_iovec_t) #define SZ_SG_REQ_INFO sizeof(sg_req_info_t) #define sg_printk(prefix, sdp, fmt, a...) \ sdev_prefix_printk(prefix, (sdp)->device, (sdp)->name, fmt, ##a) /* * The SCSI interfaces that use read() and write() as an asynchronous variant of * ioctl(..., SG_IO, ...) are fundamentally unsafe, since there are lots of ways * to trigger read() and write() calls from various contexts with elevated * privileges. This can lead to kernel memory corruption (e.g. if these * interfaces are called through splice()) and privilege escalation inside * userspace (e.g. if a process with access to such a device passes a file * descriptor to a SUID binary as stdin/stdout/stderr). * * This function provides protection for the legacy API by restricting the * calling context. */ static int sg_check_file_access(struct file *filp, const char *caller) { if (filp->f_cred != current_real_cred()) { pr_err_once("%s: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n", caller, task_tgid_vnr(current), current->comm); return -EPERM; } return 0; } static int sg_allow_access(struct file *filp, unsigned char *cmd) { struct sg_fd *sfp = filp->private_data; if (sfp->parentdp->device->type == TYPE_SCANNER) return 0; if (!scsi_cmd_allowed(cmd, filp->f_mode & FMODE_WRITE)) return -EPERM; return 0; } static int open_wait(Sg_device *sdp, int flags) { int retval = 0; if (flags & O_EXCL) { while (sdp->open_cnt > 0) { mutex_unlock(&sdp->open_rel_lock); retval = wait_event_interruptible(sdp->open_wait, (atomic_read(&sdp->detaching) || !sdp->open_cnt)); mutex_lock(&sdp->open_rel_lock); if (retval) /* -ERESTARTSYS */ return retval; if (atomic_read(&sdp->detaching)) return -ENODEV; } } else { while (sdp->exclude) { mutex_unlock(&sdp->open_rel_lock); retval = wait_event_interruptible(sdp->open_wait, (atomic_read(&sdp->detaching) || !sdp->exclude)); mutex_lock(&sdp->open_rel_lock); if (retval) /* -ERESTARTSYS */ return retval; if (atomic_read(&sdp->detaching)) return -ENODEV; } } return retval; } /* Returns 0 on success, else a negated errno value */ static int sg_open(struct inode *inode, struct file *filp) { int dev = iminor(inode); int flags = filp->f_flags; struct request_queue *q; struct scsi_device *device; Sg_device *sdp; Sg_fd *sfp; int retval; nonseekable_open(inode, filp); if ((flags & O_EXCL) && (O_RDONLY == (flags & O_ACCMODE))) return -EPERM; /* Can't lock it with read only access */ sdp = sg_get_dev(dev); if (IS_ERR(sdp)) return PTR_ERR(sdp); SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "sg_open: flags=0x%x\n", flags)); /* This driver's module count bumped by fops_get in <linux/fs.h> */ /* Prevent the device driver from vanishing while we sleep */ device = sdp->device; retval = scsi_device_get(device); if (retval) goto sg_put; /* scsi_block_when_processing_errors() may block so bypass * check if O_NONBLOCK. Permits SCSI commands to be issued * during error recovery. Tread carefully. */ if (!((flags & O_NONBLOCK) || scsi_block_when_processing_errors(device))) { retval = -ENXIO; /* we are in error recovery for this device */ goto sdp_put; } mutex_lock(&sdp->open_rel_lock); if (flags & O_NONBLOCK) { if (flags & O_EXCL) { if (sdp->open_cnt > 0) { retval = -EBUSY; goto error_mutex_locked; } } else { if (sdp->exclude) { retval = -EBUSY; goto error_mutex_locked; } } } else { retval = open_wait(sdp, flags); if (retval) /* -ERESTARTSYS or -ENODEV */ goto error_mutex_locked; } /* N.B. at this point we are holding the open_rel_lock */ if (flags & O_EXCL) sdp->exclude = true; if (sdp->open_cnt < 1) { /* no existing opens */ sdp->sgdebug = 0; q = device->request_queue; sdp->sg_tablesize = queue_max_segments(q); } sfp = sg_add_sfp(sdp); if (IS_ERR(sfp)) { retval = PTR_ERR(sfp); goto out_undo; } filp->private_data = sfp; sdp->open_cnt++; mutex_unlock(&sdp->open_rel_lock); retval = 0; sg_put: kref_put(&sdp->d_ref, sg_device_destroy); return retval; out_undo: if (flags & O_EXCL) { sdp->exclude = false; /* undo if error */ wake_up_interruptible(&sdp->open_wait); } error_mutex_locked: mutex_unlock(&sdp->open_rel_lock); sdp_put: kref_put(&sdp->d_ref, sg_device_destroy); scsi_device_put(device); return retval; } /* Release resources associated with a successful sg_open() * Returns 0 on success, else a negated errno value */ static int sg_release(struct inode *inode, struct file *filp) { Sg_device *sdp; Sg_fd *sfp; if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) return -ENXIO; SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "sg_release\n")); mutex_lock(&sdp->open_rel_lock); sdp->open_cnt--; /* possibly many open()s waiting on exlude clearing, start many; * only open(O_EXCL)s wait on 0==open_cnt so only start one */ if (sdp->exclude) { sdp->exclude = false; wake_up_interruptible_all(&sdp->open_wait); } else if (0 == sdp->open_cnt) { wake_up_interruptible(&sdp->open_wait); } mutex_unlock(&sdp->open_rel_lock); kref_put(&sfp->f_ref, sg_remove_sfp); return 0; } static int get_sg_io_pack_id(int *pack_id, void __user *buf, size_t count) { struct sg_header __user *old_hdr = buf; int reply_len; if (count >= SZ_SG_HEADER) { /* negative reply_len means v3 format, otherwise v1/v2 */ if (get_user(reply_len, &old_hdr->reply_len)) return -EFAULT; if (reply_len >= 0) return get_user(*pack_id, &old_hdr->pack_id); if (in_compat_syscall() && count >= sizeof(struct compat_sg_io_hdr)) { struct compat_sg_io_hdr __user *hp = buf; return get_user(*pack_id, &hp->pack_id); } if (count >= sizeof(struct sg_io_hdr)) { struct sg_io_hdr __user *hp = buf; return get_user(*pack_id, &hp->pack_id); } } /* no valid header was passed, so ignore the pack_id */ *pack_id = -1; return 0; } static ssize_t sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos) { Sg_device *sdp; Sg_fd *sfp; Sg_request *srp; int req_pack_id = -1; bool busy; sg_io_hdr_t *hp; struct sg_header *old_hdr; int retval; /* * This could cause a response to be stranded. Close the associated * file descriptor to free up any resources being held. */ retval = sg_check_file_access(filp, __func__); if (retval) return retval; if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) return -ENXIO; SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "sg_read: count=%d\n", (int) count)); if (sfp->force_packid) retval = get_sg_io_pack_id(&req_pack_id, buf, count); if (retval) return retval; srp = sg_get_rq_mark(sfp, req_pack_id, &busy); if (!srp) { /* now wait on packet to arrive */ if (filp->f_flags & O_NONBLOCK) return -EAGAIN; retval = wait_event_interruptible(sfp->read_wait, ((srp = sg_get_rq_mark(sfp, req_pack_id, &busy)) || (!busy && atomic_read(&sdp->detaching)))); if (!srp) /* signal or detaching */ return retval ? retval : -ENODEV; } if (srp->header.interface_id != '\0') return sg_new_read(sfp, buf, count, srp); hp = &srp->header; old_hdr = kzalloc(SZ_SG_HEADER, GFP_KERNEL); if (!old_hdr) return -ENOMEM; old_hdr->reply_len = (int) hp->timeout; old_hdr->pack_len = old_hdr->reply_len; /* old, strange behaviour */ old_hdr->pack_id = hp->pack_id; old_hdr->twelve_byte = ((srp->data.cmd_opcode >= 0xc0) && (12 == hp->cmd_len)) ? 1 : 0; old_hdr->target_status = hp->masked_status; old_hdr->host_status = hp->host_status; old_hdr->driver_status = hp->driver_status; if ((CHECK_CONDITION & hp->masked_status) || (srp->sense_b[0] & 0x70) == 0x70) { old_hdr->driver_status = DRIVER_SENSE; memcpy(old_hdr->sense_buffer, srp->sense_b, sizeof (old_hdr->sense_buffer)); } switch (hp->host_status) { /* This setup of 'result' is for backward compatibility and is best ignored by the user who should use target, host + driver status */ case DID_OK: case DID_PASSTHROUGH: case DID_SOFT_ERROR: old_hdr->result = 0; break; case DID_NO_CONNECT: case DID_BUS_BUSY: case DID_TIME_OUT: old_hdr->result = EBUSY; break; case DID_BAD_TARGET: case DID_ABORT: case DID_PARITY: case DID_RESET: case DID_BAD_INTR: old_hdr->result = EIO; break; case DID_ERROR: old_hdr->result = (srp->sense_b[0] == 0 && hp->masked_status == GOOD) ? 0 : EIO; break; default: old_hdr->result = EIO; break; } /* Now copy the result back to the user buffer. */ if (count >= SZ_SG_HEADER) { if (copy_to_user(buf, old_hdr, SZ_SG_HEADER)) { retval = -EFAULT; goto free_old_hdr; } buf += SZ_SG_HEADER; if (count > old_hdr->reply_len) count = old_hdr->reply_len; if (count > SZ_SG_HEADER) { if (sg_read_oxfer(srp, buf, count - SZ_SG_HEADER)) { retval = -EFAULT; goto free_old_hdr; } } } else count = (old_hdr->result == 0) ? 0 : -EIO; sg_finish_rem_req(srp); sg_remove_request(sfp, srp); retval = count; free_old_hdr: kfree(old_hdr); return retval; } static ssize_t sg_new_read(Sg_fd * sfp, char __user *buf, size_t count, Sg_request * srp) { sg_io_hdr_t *hp = &srp->header; int err = 0, err2; int len; if (in_compat_syscall()) { if (count < sizeof(struct compat_sg_io_hdr)) { err = -EINVAL; goto err_out; } } else if (count < SZ_SG_IO_HDR) { err = -EINVAL; goto err_out; } hp->sb_len_wr = 0; if ((hp->mx_sb_len > 0) && hp->sbp) { if ((CHECK_CONDITION & hp->masked_status) || (srp->sense_b[0] & 0x70) == 0x70) { int sb_len = SCSI_SENSE_BUFFERSIZE; sb_len = (hp->mx_sb_len > sb_len) ? sb_len : hp->mx_sb_len; len = 8 + (int) srp->sense_b[7]; /* Additional sense length field */ len = (len > sb_len) ? sb_len : len; if (copy_to_user(hp->sbp, srp->sense_b, len)) { err = -EFAULT; goto err_out; } hp->driver_status = DRIVER_SENSE; hp->sb_len_wr = len; } } if (hp->masked_status || hp->host_status || hp->driver_status) hp->info |= SG_INFO_CHECK; err = put_sg_io_hdr(hp, buf); err_out: err2 = sg_finish_rem_req(srp); sg_remove_request(sfp, srp); return err ? : err2 ? : count; } static ssize_t sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos) { int mxsize, cmd_size, k; int input_size, blocking; unsigned char opcode; Sg_device *sdp; Sg_fd *sfp; Sg_request *srp; struct sg_header old_hdr; sg_io_hdr_t *hp; unsigned char cmnd[SG_MAX_CDB_SIZE]; int retval; retval = sg_check_file_access(filp, __func__); if (retval) return retval; if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) return -ENXIO; SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "sg_write: count=%d\n", (int) count)); if (atomic_read(&sdp->detaching)) return -ENODEV; if (!((filp->f_flags & O_NONBLOCK) || scsi_block_when_processing_errors(sdp->device))) return -ENXIO; if (count < SZ_SG_HEADER) return -EIO; if (copy_from_user(&old_hdr, buf, SZ_SG_HEADER)) return -EFAULT; blocking = !(filp->f_flags & O_NONBLOCK); if (old_hdr.reply_len < 0) return sg_new_write(sfp, filp, buf, count, blocking, 0, 0, NULL); if (count < (SZ_SG_HEADER + 6)) return -EIO; /* The minimum scsi command length is 6 bytes. */ buf += SZ_SG_HEADER; if (get_user(opcode, buf)) return -EFAULT; if (!(srp = sg_add_request(sfp))) { SCSI_LOG_TIMEOUT(1, sg_printk(KERN_INFO, sdp, "sg_write: queue full\n")); return -EDOM; } mutex_lock(&sfp->f_mutex); if (sfp->next_cmd_len > 0) { cmd_size = sfp->next_cmd_len; sfp->next_cmd_len = 0; /* reset so only this write() effected */ } else { cmd_size = COMMAND_SIZE(opcode); /* based on SCSI command group */ if ((opcode >= 0xc0) && old_hdr.twelve_byte) cmd_size = 12; } mutex_unlock(&sfp->f_mutex); SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sdp, "sg_write: scsi opcode=0x%02x, cmd_size=%d\n", (int) opcode, cmd_size)); /* Determine buffer size. */ input_size = count - cmd_size; mxsize = (input_size > old_hdr.reply_len) ? input_size : old_hdr.reply_len; mxsize -= SZ_SG_HEADER; input_size -= SZ_SG_HEADER; if (input_size < 0) { sg_remove_request(sfp, srp); return -EIO; /* User did not pass enough bytes for this command. */ } hp = &srp->header; hp->interface_id = '\0'; /* indicator of old interface tunnelled */ hp->cmd_len = (unsigned char) cmd_size; hp->iovec_count = 0; hp->mx_sb_len = 0; if (input_size > 0) hp->dxfer_direction = (old_hdr.reply_len > SZ_SG_HEADER) ? SG_DXFER_TO_FROM_DEV : SG_DXFER_TO_DEV; else hp->dxfer_direction = (mxsize > 0) ? SG_DXFER_FROM_DEV : SG_DXFER_NONE; hp->dxfer_len = mxsize; if ((hp->dxfer_direction == SG_DXFER_TO_DEV) || (hp->dxfer_direction == SG_DXFER_TO_FROM_DEV)) hp->dxferp = (char __user *)buf + cmd_size; else hp->dxferp = NULL; hp->sbp = NULL; hp->timeout = old_hdr.reply_len; /* structure abuse ... */ hp->flags = input_size; /* structure abuse ... */ hp->pack_id = old_hdr.pack_id; hp->usr_ptr = NULL; if (copy_from_user(cmnd, buf, cmd_size)) { sg_remove_request(sfp, srp); return -EFAULT; } /* * SG_DXFER_TO_FROM_DEV is functionally equivalent to SG_DXFER_FROM_DEV, * but is is possible that the app intended SG_DXFER_TO_DEV, because there * is a non-zero input_size, so emit a warning. */ if (hp->dxfer_direction == SG_DXFER_TO_FROM_DEV) { printk_ratelimited(KERN_WARNING "sg_write: data in/out %d/%d bytes " "for SCSI command 0x%x-- guessing " "data in;\n program %s not setting " "count and/or reply_len properly\n", old_hdr.reply_len - (int)SZ_SG_HEADER, input_size, (unsigned int) cmnd[0], current->comm); } k = sg_common_write(sfp, srp, cmnd, sfp->timeout, blocking); return (k < 0) ? k : count; } static ssize_t sg_new_write(Sg_fd *sfp, struct file *file, const char __user *buf, size_t count, int blocking, int read_only, int sg_io_owned, Sg_request **o_srp) { int k; Sg_request *srp; sg_io_hdr_t *hp; unsigned char cmnd[SG_MAX_CDB_SIZE]; int timeout; unsigned long ul_timeout; if (count < SZ_SG_IO_HDR) return -EINVAL; sfp->cmd_q = 1; /* when sg_io_hdr seen, set command queuing on */ if (!(srp = sg_add_request(sfp))) { SCSI_LOG_TIMEOUT(1, sg_printk(KERN_INFO, sfp->parentdp, "sg_new_write: queue full\n")); return -EDOM; } srp->sg_io_owned = sg_io_owned; hp = &srp->header; if (get_sg_io_hdr(hp, buf)) { sg_remove_request(sfp, srp); return -EFAULT; } if (hp->interface_id != 'S') { sg_remove_request(sfp, srp); return -ENOSYS; } if (hp->flags & SG_FLAG_MMAP_IO) { if (hp->dxfer_len > sfp->reserve.bufflen) { sg_remove_request(sfp, srp); return -ENOMEM; /* MMAP_IO size must fit in reserve buffer */ } if (hp->flags & SG_FLAG_DIRECT_IO) { sg_remove_request(sfp, srp); return -EINVAL; /* either MMAP_IO or DIRECT_IO (not both) */ } if (sfp->res_in_use) { sg_remove_request(sfp, srp); return -EBUSY; /* reserve buffer already being used */ } } ul_timeout = msecs_to_jiffies(srp->header.timeout); timeout = (ul_timeout < INT_MAX) ? ul_timeout : INT_MAX; if ((!hp->cmdp) || (hp->cmd_len < 6) || (hp->cmd_len > sizeof (cmnd))) { sg_remove_request(sfp, srp); return -EMSGSIZE; } if (copy_from_user(cmnd, hp->cmdp, hp->cmd_len)) { sg_remove_request(sfp, srp); return -EFAULT; } if (read_only && sg_allow_access(file, cmnd)) { sg_remove_request(sfp, srp); return -EPERM; } k = sg_common_write(sfp, srp, cmnd, timeout, blocking); if (k < 0) return k; if (o_srp) *o_srp = srp; return count; } static int sg_common_write(Sg_fd * sfp, Sg_request * srp, unsigned char *cmnd, int timeout, int blocking) { int k, at_head; Sg_device *sdp = sfp->parentdp; sg_io_hdr_t *hp = &srp->header; srp->data.cmd_opcode = cmnd[0]; /* hold opcode of command */ hp->status = 0; hp->masked_status = 0; hp->msg_status = 0; hp->info = 0; hp->host_status = 0; hp->driver_status = 0; hp->resid = 0; SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sfp->parentdp, "sg_common_write: scsi opcode=0x%02x, cmd_size=%d\n", (int) cmnd[0], (int) hp->cmd_len)); if (hp->dxfer_len >= SZ_256M) { sg_remove_request(sfp, srp); return -EINVAL; } k = sg_start_req(srp, cmnd); if (k) { SCSI_LOG_TIMEOUT(1, sg_printk(KERN_INFO, sfp->parentdp, "sg_common_write: start_req err=%d\n", k)); sg_finish_rem_req(srp); sg_remove_request(sfp, srp); return k; /* probably out of space --> ENOMEM */ } if (atomic_read(&sdp->detaching)) { if (srp->bio) { blk_mq_free_request(srp->rq); srp->rq = NULL; } sg_finish_rem_req(srp); sg_remove_request(sfp, srp); return -ENODEV; } hp->duration = jiffies_to_msecs(jiffies); if (hp->interface_id != '\0' && /* v3 (or later) interface */ (SG_FLAG_Q_AT_TAIL & hp->flags)) at_head = 0; else at_head = 1; srp->rq->timeout = timeout; kref_get(&sfp->f_ref); /* sg_rq_end_io() does kref_put(). */ srp->rq->end_io = sg_rq_end_io; blk_execute_rq_nowait(srp->rq, at_head); return 0; } static int srp_done(Sg_fd *sfp, Sg_request *srp) { unsigned long flags; int ret; read_lock_irqsave(&sfp->rq_list_lock, flags); ret = srp->done; read_unlock_irqrestore(&sfp->rq_list_lock, flags); return ret; } static int max_sectors_bytes(struct request_queue *q) { unsigned int max_sectors = queue_max_sectors(q); max_sectors = min_t(unsigned int, max_sectors, INT_MAX >> 9); return max_sectors << 9; } static void sg_fill_request_table(Sg_fd *sfp, sg_req_info_t *rinfo) { Sg_request *srp; int val; unsigned int ms; val = 0; list_for_each_entry(srp, &sfp->rq_list, entry) { if (val >= SG_MAX_QUEUE) break; rinfo[val].req_state = srp->done + 1; rinfo[val].problem = srp->header.masked_status & srp->header.host_status & srp->header.driver_status; if (srp->done) rinfo[val].duration = srp->header.duration; else { ms = jiffies_to_msecs(jiffies); rinfo[val].duration = (ms > srp->header.duration) ? (ms - srp->header.duration) : 0; } rinfo[val].orphan = srp->orphan; rinfo[val].sg_io_owned = srp->sg_io_owned; rinfo[val].pack_id = srp->header.pack_id; rinfo[val].usr_ptr = srp->header.usr_ptr; val++; } } #ifdef CONFIG_COMPAT struct compat_sg_req_info { /* used by SG_GET_REQUEST_TABLE ioctl() */ char req_state; char orphan; char sg_io_owned; char problem; int pack_id; compat_uptr_t usr_ptr; unsigned int duration; int unused; }; static int put_compat_request_table(struct compat_sg_req_info __user *o, struct sg_req_info *rinfo) { int i; for (i = 0; i < SG_MAX_QUEUE; i++) { if (copy_to_user(o + i, rinfo + i, offsetof(sg_req_info_t, usr_ptr)) || put_user((uintptr_t)rinfo[i].usr_ptr, &o[i].usr_ptr) || put_user(rinfo[i].duration, &o[i].duration) || put_user(rinfo[i].unused, &o[i].unused)) return -EFAULT; } return 0; } #endif static long sg_ioctl_common(struct file *filp, Sg_device *sdp, Sg_fd *sfp, unsigned int cmd_in, void __user *p) { int __user *ip = p; int result, val, read_only; Sg_request *srp; unsigned long iflags; SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "sg_ioctl: cmd=0x%x\n", (int) cmd_in)); read_only = (O_RDWR != (filp->f_flags & O_ACCMODE)); switch (cmd_in) { case SG_IO: if (atomic_read(&sdp->detaching)) return -ENODEV; if (!scsi_block_when_processing_errors(sdp->device)) return -ENXIO; result = sg_new_write(sfp, filp, p, SZ_SG_IO_HDR, 1, read_only, 1, &srp); if (result < 0) return result; result = wait_event_interruptible(sfp->read_wait, srp_done(sfp, srp)); write_lock_irq(&sfp->rq_list_lock); if (srp->done) { srp->done = 2; write_unlock_irq(&sfp->rq_list_lock); result = sg_new_read(sfp, p, SZ_SG_IO_HDR, srp); return (result < 0) ? result : 0; } srp->orphan = 1; write_unlock_irq(&sfp->rq_list_lock); return result; /* -ERESTARTSYS because signal hit process */ case SG_SET_TIMEOUT: result = get_user(val, ip); if (result) return result; if (val < 0) return -EIO; if (val >= mult_frac((s64)INT_MAX, USER_HZ, HZ)) val = min_t(s64, mult_frac((s64)INT_MAX, USER_HZ, HZ), INT_MAX); sfp->timeout_user = val; sfp->timeout = mult_frac(val, HZ, USER_HZ); return 0; case SG_GET_TIMEOUT: /* N.B. User receives timeout as return value */ /* strange ..., for backward compatibility */ return sfp->timeout_user; case SG_SET_FORCE_LOW_DMA: /* * N.B. This ioctl never worked properly, but failed to * return an error value. So returning '0' to keep compability * with legacy applications. */ return 0; case SG_GET_LOW_DMA: return put_user(0, ip); case SG_GET_SCSI_ID: { sg_scsi_id_t v; if (atomic_read(&sdp->detaching)) return -ENODEV; memset(&v, 0, sizeof(v)); v.host_no = sdp->device->host->host_no; v.channel = sdp->device->channel; v.scsi_id = sdp->device->id; v.lun = sdp->device->lun; v.scsi_type = sdp->device->type; v.h_cmd_per_lun = sdp->device->host->cmd_per_lun; v.d_queue_depth = sdp->device->queue_depth; if (copy_to_user(p, &v, sizeof(sg_scsi_id_t))) return -EFAULT; return 0; } case SG_SET_FORCE_PACK_ID: result = get_user(val, ip); if (result) return result; sfp->force_packid = val ? 1 : 0; return 0; case SG_GET_PACK_ID: read_lock_irqsave(&sfp->rq_list_lock, iflags); list_for_each_entry(srp, &sfp->rq_list, entry) { if ((1 == srp->done) && (!srp->sg_io_owned)) { read_unlock_irqrestore(&sfp->rq_list_lock, iflags); return put_user(srp->header.pack_id, ip); } } read_unlock_irqrestore(&sfp->rq_list_lock, iflags); return put_user(-1, ip); case SG_GET_NUM_WAITING: read_lock_irqsave(&sfp->rq_list_lock, iflags); val = 0; list_for_each_entry(srp, &sfp->rq_list, entry) { if ((1 == srp->done) && (!srp->sg_io_owned)) ++val; } read_unlock_irqrestore(&sfp->rq_list_lock, iflags); return put_user(val, ip); case SG_GET_SG_TABLESIZE: return put_user(sdp->sg_tablesize, ip); case SG_SET_RESERVED_SIZE: result = get_user(val, ip); if (result) return result; if (val < 0) return -EINVAL; val = min_t(int, val, max_sectors_bytes(sdp->device->request_queue)); mutex_lock(&sfp->f_mutex); if (val != sfp->reserve.bufflen) { if (sfp->mmap_called || sfp->res_in_use) { mutex_unlock(&sfp->f_mutex); return -EBUSY; } sg_remove_scat(sfp, &sfp->reserve); sg_build_reserve(sfp, val); } mutex_unlock(&sfp->f_mutex); return 0; case SG_GET_RESERVED_SIZE: val = min_t(int, sfp->reserve.bufflen, max_sectors_bytes(sdp->device->request_queue)); return put_user(val, ip); case SG_SET_COMMAND_Q: result = get_user(val, ip); if (result) return result; sfp->cmd_q = val ? 1 : 0; return 0; case SG_GET_COMMAND_Q: return put_user((int) sfp->cmd_q, ip); case SG_SET_KEEP_ORPHAN: result = get_user(val, ip); if (result) return result; sfp->keep_orphan = val; return 0; case SG_GET_KEEP_ORPHAN: return put_user((int) sfp->keep_orphan, ip); case SG_NEXT_CMD_LEN: result = get_user(val, ip); if (result) return result; if (val > SG_MAX_CDB_SIZE) return -ENOMEM; sfp->next_cmd_len = (val > 0) ? val : 0; return 0; case SG_GET_VERSION_NUM: return put_user(sg_version_num, ip); case SG_GET_ACCESS_COUNT: /* faked - we don't have a real access count anymore */ val = (sdp->device ? 1 : 0); return put_user(val, ip); case SG_GET_REQUEST_TABLE: { sg_req_info_t *rinfo; rinfo = kcalloc(SG_MAX_QUEUE, SZ_SG_REQ_INFO, GFP_KERNEL); if (!rinfo) return -ENOMEM; read_lock_irqsave(&sfp->rq_list_lock, iflags); sg_fill_request_table(sfp, rinfo); read_unlock_irqrestore(&sfp->rq_list_lock, iflags); #ifdef CONFIG_COMPAT if (in_compat_syscall()) result = put_compat_request_table(p, rinfo); else #endif result = copy_to_user(p, rinfo, SZ_SG_REQ_INFO * SG_MAX_QUEUE); result = result ? -EFAULT : 0; kfree(rinfo); return result; } case SG_EMULATED_HOST: if (atomic_read(&sdp->detaching)) return -ENODEV; return put_user(sdp->device->host->hostt->emulated, ip); case SCSI_IOCTL_SEND_COMMAND: if (atomic_read(&sdp->detaching)) return -ENODEV; return scsi_ioctl(sdp->device, filp->f_mode & FMODE_WRITE, cmd_in, p); case SG_SET_DEBUG: result = get_user(val, ip); if (result) return result; sdp->sgdebug = (char) val; return 0; case BLKSECTGET: return put_user(max_sectors_bytes(sdp->device->request_queue), ip); case BLKTRACESETUP: return blk_trace_setup(sdp->device->request_queue, sdp->name, MKDEV(SCSI_GENERIC_MAJOR, sdp->index), NULL, p); case BLKTRACESTART: return blk_trace_startstop(sdp->device->request_queue, 1); case BLKTRACESTOP: return blk_trace_startstop(sdp->device->request_queue, 0); case BLKTRACETEARDOWN: return blk_trace_remove(sdp->device->request_queue); case SCSI_IOCTL_GET_IDLUN: case SCSI_IOCTL_GET_BUS_NUMBER: case SCSI_IOCTL_PROBE_HOST: case SG_GET_TRANSFORM: case SG_SCSI_RESET: if (atomic_read(&sdp->detaching)) return -ENODEV; break; default: if (read_only) return -EPERM; /* don't know so take safe approach */ break; } result = scsi_ioctl_block_when_processing_errors(sdp->device, cmd_in, filp->f_flags & O_NDELAY); if (result) return result; return -ENOIOCTLCMD; } static long sg_ioctl(struct file *filp, unsigned int cmd_in, unsigned long arg) { void __user *p = (void __user *)arg; Sg_device *sdp; Sg_fd *sfp; int ret; if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) return -ENXIO; ret = sg_ioctl_common(filp, sdp, sfp, cmd_in, p); if (ret != -ENOIOCTLCMD) return ret; return scsi_ioctl(sdp->device, filp->f_mode & FMODE_WRITE, cmd_in, p); } static __poll_t sg_poll(struct file *filp, poll_table * wait) { __poll_t res = 0; Sg_device *sdp; Sg_fd *sfp; Sg_request *srp; int count = 0; unsigned long iflags; sfp = filp->private_data; if (!sfp) return EPOLLERR; sdp = sfp->parentdp; if (!sdp) return EPOLLERR; poll_wait(filp, &sfp->read_wait, wait); read_lock_irqsave(&sfp->rq_list_lock, iflags); list_for_each_entry(srp, &sfp->rq_list, entry) { /* if any read waiting, flag it */ if ((0 == res) && (1 == srp->done) && (!srp->sg_io_owned)) res = EPOLLIN | EPOLLRDNORM; ++count; } read_unlock_irqrestore(&sfp->rq_list_lock, iflags); if (atomic_read(&sdp->detaching)) res |= EPOLLHUP; else if (!sfp->cmd_q) { if (0 == count) res |= EPOLLOUT | EPOLLWRNORM; } else if (count < SG_MAX_QUEUE) res |= EPOLLOUT | EPOLLWRNORM; SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "sg_poll: res=0x%x\n", (__force u32) res)); return res; } static int sg_fasync(int fd, struct file *filp, int mode) { Sg_device *sdp; Sg_fd *sfp; if ((!(sfp = (Sg_fd *) filp->private_data)) || (!(sdp = sfp->parentdp))) return -ENXIO; SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "sg_fasync: mode=%d\n", mode)); return fasync_helper(fd, filp, mode, &sfp->async_qp); } static vm_fault_t sg_vma_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; Sg_fd *sfp; unsigned long offset, len, sa; Sg_scatter_hold *rsv_schp; int k, length; if ((NULL == vma) || (!(sfp = (Sg_fd *) vma->vm_private_data))) return VM_FAULT_SIGBUS; rsv_schp = &sfp->reserve; offset = vmf->pgoff << PAGE_SHIFT; if (offset >= rsv_schp->bufflen) return VM_FAULT_SIGBUS; SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sfp->parentdp, "sg_vma_fault: offset=%lu, scatg=%d\n", offset, rsv_schp->k_use_sg)); sa = vma->vm_start; length = 1 << (PAGE_SHIFT + rsv_schp->page_order); for (k = 0; k < rsv_schp->k_use_sg && sa < vma->vm_end; k++) { len = vma->vm_end - sa; len = (len < length) ? len : length; if (offset < len) { struct page *page = nth_page(rsv_schp->pages[k], offset >> PAGE_SHIFT); get_page(page); /* increment page count */ vmf->page = page; return 0; /* success */ } sa += len; offset -= len; } return VM_FAULT_SIGBUS; } static const struct vm_operations_struct sg_mmap_vm_ops = { .fault = sg_vma_fault, }; static int sg_mmap(struct file *filp, struct vm_area_struct *vma) { Sg_fd *sfp; unsigned long req_sz, len, sa; Sg_scatter_hold *rsv_schp; int k, length; int ret = 0; if ((!filp) || (!vma) || (!(sfp = (Sg_fd *) filp->private_data))) return -ENXIO; req_sz = vma->vm_end - vma->vm_start; SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sfp->parentdp, "sg_mmap starting, vm_start=%p, len=%d\n", (void *) vma->vm_start, (int) req_sz)); if (vma->vm_pgoff) return -EINVAL; /* want no offset */ rsv_schp = &sfp->reserve; mutex_lock(&sfp->f_mutex); if (req_sz > rsv_schp->bufflen) { ret = -ENOMEM; /* cannot map more than reserved buffer */ goto out; } sa = vma->vm_start; length = 1 << (PAGE_SHIFT + rsv_schp->page_order); for (k = 0; k < rsv_schp->k_use_sg && sa < vma->vm_end; k++) { len = vma->vm_end - sa; len = (len < length) ? len : length; sa += len; } sfp->mmap_called = 1; vm_flags_set(vma, VM_IO | VM_DONTEXPAND | VM_DONTDUMP); vma->vm_private_data = sfp; vma->vm_ops = &sg_mmap_vm_ops; out: mutex_unlock(&sfp->f_mutex); return ret; } static void sg_rq_end_io_usercontext(struct work_struct *work) { struct sg_request *srp = container_of(work, struct sg_request, ew.work); struct sg_fd *sfp = srp->parentfp; sg_finish_rem_req(srp); sg_remove_request(sfp, srp); kref_put(&sfp->f_ref, sg_remove_sfp); } /* * This function is a "bottom half" handler that is called by the mid * level when a command is completed (or has failed). */ static enum rq_end_io_ret sg_rq_end_io(struct request *rq, blk_status_t status) { struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(rq); struct sg_request *srp = rq->end_io_data; Sg_device *sdp; Sg_fd *sfp; unsigned long iflags; unsigned int ms; char *sense; int result, resid, done = 1; if (WARN_ON(srp->done != 0)) return RQ_END_IO_NONE; sfp = srp->parentfp; if (WARN_ON(sfp == NULL)) return RQ_END_IO_NONE; sdp = sfp->parentdp; if (unlikely(atomic_read(&sdp->detaching))) pr_info("%s: device detaching\n", __func__); sense = scmd->sense_buffer; result = scmd->result; resid = scmd->resid_len; SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sdp, "sg_cmd_done: pack_id=%d, res=0x%x\n", srp->header.pack_id, result)); srp->header.resid = resid; ms = jiffies_to_msecs(jiffies); srp->header.duration = (ms > srp->header.duration) ? (ms - srp->header.duration) : 0; if (0 != result) { struct scsi_sense_hdr sshdr; srp->header.status = 0xff & result; srp->header.masked_status = sg_status_byte(result); srp->header.msg_status = COMMAND_COMPLETE; srp->header.host_status = host_byte(result); srp->header.driver_status = driver_byte(result); if ((sdp->sgdebug > 0) && ((CHECK_CONDITION == srp->header.masked_status) || (COMMAND_TERMINATED == srp->header.masked_status))) __scsi_print_sense(sdp->device, __func__, sense, SCSI_SENSE_BUFFERSIZE); /* Following if statement is a patch supplied by Eric Youngdale */ if (driver_byte(result) != 0 && scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, &sshdr) && !scsi_sense_is_deferred(&sshdr) && sshdr.sense_key == UNIT_ATTENTION && sdp->device->removable) { /* Detected possible disc change. Set the bit - this */ /* may be used if there are filesystems using this device */ sdp->device->changed = 1; } } if (scmd->sense_len) memcpy(srp->sense_b, scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE); /* Rely on write phase to clean out srp status values, so no "else" */ /* * Free the request as soon as it is complete so that its resources * can be reused without waiting for userspace to read() the * result. But keep the associated bio (if any) around until * blk_rq_unmap_user() can be called from user context. */ srp->rq = NULL; blk_mq_free_request(rq); write_lock_irqsave(&sfp->rq_list_lock, iflags); if (unlikely(srp->orphan)) { if (sfp->keep_orphan) srp->sg_io_owned = 0; else done = 0; } srp->done = done; write_unlock_irqrestore(&sfp->rq_list_lock, iflags); if (likely(done)) { /* Now wake up any sg_read() that is waiting for this * packet. */ wake_up_interruptible(&sfp->read_wait); kill_fasync(&sfp->async_qp, SIGPOLL, POLL_IN); kref_put(&sfp->f_ref, sg_remove_sfp); } else { INIT_WORK(&srp->ew.work, sg_rq_end_io_usercontext); schedule_work(&srp->ew.work); } return RQ_END_IO_NONE; } static const struct file_operations sg_fops = { .owner = THIS_MODULE, .read = sg_read, .write = sg_write, .poll = sg_poll, .unlocked_ioctl = sg_ioctl, .compat_ioctl = compat_ptr_ioctl, .open = sg_open, .mmap = sg_mmap, .release = sg_release, .fasync = sg_fasync, }; static const struct class sg_sysfs_class = { .name = "scsi_generic" }; static int sg_sysfs_valid = 0; static Sg_device * sg_alloc(struct scsi_device *scsidp) { struct request_queue *q = scsidp->request_queue; Sg_device *sdp; unsigned long iflags; int error; u32 k; sdp = kzalloc(sizeof(Sg_device), GFP_KERNEL); if (!sdp) { sdev_printk(KERN_WARNING, scsidp, "%s: kmalloc Sg_device " "failure\n", __func__); return ERR_PTR(-ENOMEM); } idr_preload(GFP_KERNEL); write_lock_irqsave(&sg_index_lock, iflags); error = idr_alloc(&sg_index_idr, sdp, 0, SG_MAX_DEVS, GFP_NOWAIT); if (error < 0) { if (error == -ENOSPC) { sdev_printk(KERN_WARNING, scsidp, "Unable to attach sg device type=%d, minor number exceeds %d\n", scsidp->type, SG_MAX_DEVS - 1); error = -ENODEV; } else { sdev_printk(KERN_WARNING, scsidp, "%s: idr " "allocation Sg_device failure: %d\n", __func__, error); } goto out_unlock; } k = error; SCSI_LOG_TIMEOUT(3, sdev_printk(KERN_INFO, scsidp, "sg_alloc: dev=%d \n", k)); sprintf(sdp->name, "sg%d", k); sdp->device = scsidp; mutex_init(&sdp->open_rel_lock); INIT_LIST_HEAD(&sdp->sfds); init_waitqueue_head(&sdp->open_wait); atomic_set(&sdp->detaching, 0); rwlock_init(&sdp->sfd_lock); sdp->sg_tablesize = queue_max_segments(q); sdp->index = k; kref_init(&sdp->d_ref); error = 0; out_unlock: write_unlock_irqrestore(&sg_index_lock, iflags); idr_preload_end(); if (error) { kfree(sdp); return ERR_PTR(error); } return sdp; } static int sg_add_device(struct device *cl_dev) { struct scsi_device *scsidp = to_scsi_device(cl_dev->parent); Sg_device *sdp = NULL; struct cdev * cdev = NULL; int error; unsigned long iflags; if (!blk_get_queue(scsidp->request_queue)) { pr_warn("%s: get scsi_device queue failed\n", __func__); return -ENODEV; } error = -ENOMEM; cdev = cdev_alloc(); if (!cdev) { pr_warn("%s: cdev_alloc failed\n", __func__); goto out; } cdev->owner = THIS_MODULE; cdev->ops = &sg_fops; sdp = sg_alloc(scsidp); if (IS_ERR(sdp)) { pr_warn("%s: sg_alloc failed\n", __func__); error = PTR_ERR(sdp); goto out; } error = cdev_add(cdev, MKDEV(SCSI_GENERIC_MAJOR, sdp->index), 1); if (error) goto cdev_add_err; sdp->cdev = cdev; if (sg_sysfs_valid) { struct device *sg_class_member; sg_class_member = device_create(&sg_sysfs_class, cl_dev->parent, MKDEV(SCSI_GENERIC_MAJOR, sdp->index), sdp, "%s", sdp->name); if (IS_ERR(sg_class_member)) { pr_err("%s: device_create failed\n", __func__); error = PTR_ERR(sg_class_member); goto cdev_add_err; } error = sysfs_create_link(&scsidp->sdev_gendev.kobj, &sg_class_member->kobj, "generic"); if (error) pr_err("%s: unable to make symlink 'generic' back " "to sg%d\n", __func__, sdp->index); } else pr_warn("%s: sg_sys Invalid\n", __func__); sdev_printk(KERN_NOTICE, scsidp, "Attached scsi generic sg%d " "type %d\n", sdp->index, scsidp->type); dev_set_drvdata(cl_dev, sdp); return 0; cdev_add_err: write_lock_irqsave(&sg_index_lock, iflags); idr_remove(&sg_index_idr, sdp->index); write_unlock_irqrestore(&sg_index_lock, iflags); kfree(sdp); out: if (cdev) cdev_del(cdev); blk_put_queue(scsidp->request_queue); return error; } static void sg_device_destroy(struct kref *kref) { struct sg_device *sdp = container_of(kref, struct sg_device, d_ref); struct request_queue *q = sdp->device->request_queue; unsigned long flags; /* CAUTION! Note that the device can still be found via idr_find() * even though the refcount is 0. Therefore, do idr_remove() BEFORE * any other cleanup. */ blk_trace_remove(q); blk_put_queue(q); write_lock_irqsave(&sg_index_lock, flags); idr_remove(&sg_index_idr, sdp->index); write_unlock_irqrestore(&sg_index_lock, flags); SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "sg_device_destroy\n")); kfree(sdp); } static void sg_remove_device(struct device *cl_dev) { struct scsi_device *scsidp = to_scsi_device(cl_dev->parent); Sg_device *sdp = dev_get_drvdata(cl_dev); unsigned long iflags; Sg_fd *sfp; int val; if (!sdp) return; /* want sdp->detaching non-zero as soon as possible */ val = atomic_inc_return(&sdp->detaching); if (val > 1) return; /* only want to do following once per device */ SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "%s\n", __func__)); read_lock_irqsave(&sdp->sfd_lock, iflags); list_for_each_entry(sfp, &sdp->sfds, sfd_siblings) { wake_up_interruptible_all(&sfp->read_wait); kill_fasync(&sfp->async_qp, SIGPOLL, POLL_HUP); } wake_up_interruptible_all(&sdp->open_wait); read_unlock_irqrestore(&sdp->sfd_lock, iflags); sysfs_remove_link(&scsidp->sdev_gendev.kobj, "generic"); device_destroy(&sg_sysfs_class, MKDEV(SCSI_GENERIC_MAJOR, sdp->index)); cdev_del(sdp->cdev); sdp->cdev = NULL; kref_put(&sdp->d_ref, sg_device_destroy); } module_param_named(scatter_elem_sz, scatter_elem_sz, int, S_IRUGO | S_IWUSR); module_param_named(def_reserved_size, def_reserved_size, int, S_IRUGO | S_IWUSR); module_param_named(allow_dio, sg_allow_dio, int, S_IRUGO | S_IWUSR); MODULE_AUTHOR("Douglas Gilbert"); MODULE_DESCRIPTION("SCSI generic (sg) driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(SG_VERSION_STR); MODULE_ALIAS_CHARDEV_MAJOR(SCSI_GENERIC_MAJOR); MODULE_PARM_DESC(scatter_elem_sz, "scatter gather element " "size (default: max(SG_SCATTER_SZ, PAGE_SIZE))"); MODULE_PARM_DESC(def_reserved_size, "size of buffer reserved for each fd"); MODULE_PARM_DESC(allow_dio, "allow direct I/O (default: 0 (disallow))"); #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> static const struct ctl_table sg_sysctls[] = { { .procname = "sg-big-buff", .data = &sg_big_buff, .maxlen = sizeof(int), .mode = 0444, .proc_handler = proc_dointvec, }, }; static struct ctl_table_header *hdr; static void register_sg_sysctls(void) { if (!hdr) hdr = register_sysctl("kernel", sg_sysctls); } static void unregister_sg_sysctls(void) { if (hdr) unregister_sysctl_table(hdr); } #else #define register_sg_sysctls() do { } while (0) #define unregister_sg_sysctls() do { } while (0) #endif /* CONFIG_SYSCTL */ static int __init init_sg(void) { int rc; if (scatter_elem_sz < PAGE_SIZE) { scatter_elem_sz = PAGE_SIZE; scatter_elem_sz_prev = scatter_elem_sz; } if (def_reserved_size >= 0) sg_big_buff = def_reserved_size; else def_reserved_size = sg_big_buff; rc = register_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0), SG_MAX_DEVS, "sg"); if (rc) return rc; rc = class_register(&sg_sysfs_class); if (rc) goto err_out; sg_sysfs_valid = 1; rc = scsi_register_interface(&sg_interface); if (0 == rc) { #ifdef CONFIG_SCSI_PROC_FS sg_proc_init(); #endif /* CONFIG_SCSI_PROC_FS */ return 0; } class_unregister(&sg_sysfs_class); register_sg_sysctls(); err_out: unregister_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0), SG_MAX_DEVS); return rc; } static void __exit exit_sg(void) { unregister_sg_sysctls(); #ifdef CONFIG_SCSI_PROC_FS remove_proc_subtree("scsi/sg", NULL); #endif /* CONFIG_SCSI_PROC_FS */ scsi_unregister_interface(&sg_interface); class_unregister(&sg_sysfs_class); sg_sysfs_valid = 0; unregister_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0), SG_MAX_DEVS); idr_destroy(&sg_index_idr); } static int sg_start_req(Sg_request *srp, unsigned char *cmd) { int res; struct request *rq; Sg_fd *sfp = srp->parentfp; sg_io_hdr_t *hp = &srp->header; int dxfer_len = (int) hp->dxfer_len; int dxfer_dir = hp->dxfer_direction; unsigned int iov_count = hp->iovec_count; Sg_scatter_hold *req_schp = &srp->data; Sg_scatter_hold *rsv_schp = &sfp->reserve; struct request_queue *q = sfp->parentdp->device->request_queue; struct rq_map_data *md, map_data; int rw = hp->dxfer_direction == SG_DXFER_TO_DEV ? ITER_SOURCE : ITER_DEST; struct scsi_cmnd *scmd; SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sfp->parentdp, "sg_start_req: dxfer_len=%d\n", dxfer_len)); /* * NOTE * * With scsi-mq enabled, there are a fixed number of preallocated * requests equal in number to shost->can_queue. If all of the * preallocated requests are already in use, then scsi_alloc_request() * will sleep until an active command completes, freeing up a request. * Although waiting in an asynchronous interface is less than ideal, we * do not want to use BLK_MQ_REQ_NOWAIT here because userspace might * not expect an EWOULDBLOCK from this condition. */ rq = scsi_alloc_request(q, hp->dxfer_direction == SG_DXFER_TO_DEV ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); if (IS_ERR(rq)) return PTR_ERR(rq); scmd = blk_mq_rq_to_pdu(rq); if (hp->cmd_len > sizeof(scmd->cmnd)) { blk_mq_free_request(rq); return -EINVAL; } memcpy(scmd->cmnd, cmd, hp->cmd_len); scmd->cmd_len = hp->cmd_len; srp->rq = rq; rq->end_io_data = srp; scmd->allowed = SG_DEFAULT_RETRIES; if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE)) return 0; if (sg_allow_dio && hp->flags & SG_FLAG_DIRECT_IO && dxfer_dir != SG_DXFER_UNKNOWN && !iov_count && blk_rq_aligned(q, (unsigned long)hp->dxferp, dxfer_len)) md = NULL; else md = &map_data; if (md) { mutex_lock(&sfp->f_mutex); if (dxfer_len <= rsv_schp->bufflen && !sfp->res_in_use) { sfp->res_in_use = 1; sg_link_reserve(sfp, srp, dxfer_len); } else if (hp->flags & SG_FLAG_MMAP_IO) { res = -EBUSY; /* sfp->res_in_use == 1 */ if (dxfer_len > rsv_schp->bufflen) res = -ENOMEM; mutex_unlock(&sfp->f_mutex); return res; } else { res = sg_build_indirect(req_schp, sfp, dxfer_len); if (res) { mutex_unlock(&sfp->f_mutex); return res; } } mutex_unlock(&sfp->f_mutex); md->pages = req_schp->pages; md->page_order = req_schp->page_order; md->nr_entries = req_schp->k_use_sg; md->offset = 0; md->null_mapped = hp->dxferp ? 0 : 1; if (dxfer_dir == SG_DXFER_TO_FROM_DEV) md->from_user = 1; else md->from_user = 0; } res = blk_rq_map_user_io(rq, md, hp->dxferp, hp->dxfer_len, GFP_ATOMIC, iov_count, iov_count, 1, rw); if (!res) { srp->bio = rq->bio; if (!md) { req_schp->dio_in_use = 1; hp->info |= SG_INFO_DIRECT_IO; } } return res; } static int sg_finish_rem_req(Sg_request *srp) { int ret = 0; Sg_fd *sfp = srp->parentfp; Sg_scatter_hold *req_schp = &srp->data; SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sfp->parentdp, "sg_finish_rem_req: res_used=%d\n", (int) srp->res_used)); if (srp->bio) ret = blk_rq_unmap_user(srp->bio); if (srp->rq) blk_mq_free_request(srp->rq); if (srp->res_used) sg_unlink_reserve(sfp, srp); else sg_remove_scat(sfp, req_schp); return ret; } static int sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp, int tablesize) { int sg_bufflen = tablesize * sizeof(struct page *); gfp_t gfp_flags = GFP_ATOMIC | __GFP_NOWARN; schp->pages = kzalloc(sg_bufflen, gfp_flags); if (!schp->pages) return -ENOMEM; schp->sglist_len = sg_bufflen; return tablesize; /* number of scat_gath elements allocated */ } static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) { int ret_sz = 0, i, k, rem_sz, num, mx_sc_elems; int sg_tablesize = sfp->parentdp->sg_tablesize; int blk_size = buff_size, order; gfp_t gfp_mask = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN | __GFP_ZERO; if (blk_size < 0) return -EFAULT; if (0 == blk_size) ++blk_size; /* don't know why */ /* round request up to next highest SG_SECTOR_SZ byte boundary */ blk_size = ALIGN(blk_size, SG_SECTOR_SZ); SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sfp->parentdp, "sg_build_indirect: buff_size=%d, blk_size=%d\n", buff_size, blk_size)); /* N.B. ret_sz carried into this block ... */ mx_sc_elems = sg_build_sgat(schp, sfp, sg_tablesize); if (mx_sc_elems < 0) return mx_sc_elems; /* most likely -ENOMEM */ num = scatter_elem_sz; if (unlikely(num != scatter_elem_sz_prev)) { if (num < PAGE_SIZE) { scatter_elem_sz = PAGE_SIZE; scatter_elem_sz_prev = PAGE_SIZE; } else scatter_elem_sz_prev = num; } order = get_order(num); retry: ret_sz = 1 << (PAGE_SHIFT + order); for (k = 0, rem_sz = blk_size; rem_sz > 0 && k < mx_sc_elems; k++, rem_sz -= ret_sz) { num = (rem_sz > scatter_elem_sz_prev) ? scatter_elem_sz_prev : rem_sz; schp->pages[k] = alloc_pages(gfp_mask, order); if (!schp->pages[k]) goto out; if (num == scatter_elem_sz_prev) { if (unlikely(ret_sz > scatter_elem_sz_prev)) { scatter_elem_sz = ret_sz; scatter_elem_sz_prev = ret_sz; } } SCSI_LOG_TIMEOUT(5, sg_printk(KERN_INFO, sfp->parentdp, "sg_build_indirect: k=%d, num=%d, ret_sz=%d\n", k, num, ret_sz)); } /* end of for loop */ schp->page_order = order; schp->k_use_sg = k; SCSI_LOG_TIMEOUT(5, sg_printk(KERN_INFO, sfp->parentdp, "sg_build_indirect: k_use_sg=%d, rem_sz=%d\n", k, rem_sz)); schp->bufflen = blk_size; if (rem_sz > 0) /* must have failed */ return -ENOMEM; return 0; out: for (i = 0; i < k; i++) __free_pages(schp->pages[i], order); if (--order >= 0) goto retry; return -ENOMEM; } static void sg_remove_scat(Sg_fd * sfp, Sg_scatter_hold * schp) { SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sfp->parentdp, "sg_remove_scat: k_use_sg=%d\n", schp->k_use_sg)); if (schp->pages && schp->sglist_len > 0) { if (!schp->dio_in_use) { int k; for (k = 0; k < schp->k_use_sg && schp->pages[k]; k++) { SCSI_LOG_TIMEOUT(5, sg_printk(KERN_INFO, sfp->parentdp, "sg_remove_scat: k=%d, pg=0x%p\n", k, schp->pages[k])); __free_pages(schp->pages[k], schp->page_order); } kfree(schp->pages); } } memset(schp, 0, sizeof (*schp)); } static int sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer) { Sg_scatter_hold *schp = &srp->data; int k, num; SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, srp->parentfp->parentdp, "sg_read_oxfer: num_read_xfer=%d\n", num_read_xfer)); if ((!outp) || (num_read_xfer <= 0)) return 0; num = 1 << (PAGE_SHIFT + schp->page_order); for (k = 0; k < schp->k_use_sg && schp->pages[k]; k++) { if (num > num_read_xfer) { if (copy_to_user(outp, page_address(schp->pages[k]), num_read_xfer)) return -EFAULT; break; } else { if (copy_to_user(outp, page_address(schp->pages[k]), num)) return -EFAULT; num_read_xfer -= num; if (num_read_xfer <= 0) break; outp += num; } } return 0; } static void sg_build_reserve(Sg_fd * sfp, int req_size) { Sg_scatter_hold *schp = &sfp->reserve; SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sfp->parentdp, "sg_build_reserve: req_size=%d\n", req_size)); do { if (req_size < PAGE_SIZE) req_size = PAGE_SIZE; if (0 == sg_build_indirect(schp, sfp, req_size)) return; else sg_remove_scat(sfp, schp); req_size >>= 1; /* divide by 2 */ } while (req_size > (PAGE_SIZE / 2)); } static void sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size) { Sg_scatter_hold *req_schp = &srp->data; Sg_scatter_hold *rsv_schp = &sfp->reserve; int k, num, rem; srp->res_used = 1; SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, sfp->parentdp, "sg_link_reserve: size=%d\n", size)); rem = size; num = 1 << (PAGE_SHIFT + rsv_schp->page_order); for (k = 0; k < rsv_schp->k_use_sg; k++) { if (rem <= num) { req_schp->k_use_sg = k + 1; req_schp->sglist_len = rsv_schp->sglist_len; req_schp->pages = rsv_schp->pages; req_schp->bufflen = size; req_schp->page_order = rsv_schp->page_order; break; } else rem -= num; } if (k >= rsv_schp->k_use_sg) SCSI_LOG_TIMEOUT(1, sg_printk(KERN_INFO, sfp->parentdp, "sg_link_reserve: BAD size\n")); } static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp) { Sg_scatter_hold *req_schp = &srp->data; SCSI_LOG_TIMEOUT(4, sg_printk(KERN_INFO, srp->parentfp->parentdp, "sg_unlink_reserve: req->k_use_sg=%d\n", (int) req_schp->k_use_sg)); req_schp->k_use_sg = 0; req_schp->bufflen = 0; req_schp->pages = NULL; req_schp->page_order = 0; req_schp->sglist_len = 0; srp->res_used = 0; /* Called without mutex lock to avoid deadlock */ sfp->res_in_use = 0; } static Sg_request * sg_get_rq_mark(Sg_fd * sfp, int pack_id, bool *busy) { Sg_request *resp; unsigned long iflags; *busy = false; write_lock_irqsave(&sfp->rq_list_lock, iflags); list_for_each_entry(resp, &sfp->rq_list, entry) { /* look for requests that are not SG_IO owned */ if ((!resp->sg_io_owned) && ((-1 == pack_id) || (resp->header.pack_id == pack_id))) { switch (resp->done) { case 0: /* request active */ *busy = true; break; case 1: /* request done; response ready to return */ resp->done = 2; /* guard against other readers */ write_unlock_irqrestore(&sfp->rq_list_lock, iflags); return resp; case 2: /* response already being returned */ break; } } } write_unlock_irqrestore(&sfp->rq_list_lock, iflags); return NULL; } /* always adds to end of list */ static Sg_request * sg_add_request(Sg_fd * sfp) { int k; unsigned long iflags; Sg_request *rp = sfp->req_arr; write_lock_irqsave(&sfp->rq_list_lock, iflags); if (!list_empty(&sfp->rq_list)) { if (!sfp->cmd_q) goto out_unlock; for (k = 0; k < SG_MAX_QUEUE; ++k, ++rp) { if (!rp->parentfp) break; } if (k >= SG_MAX_QUEUE) goto out_unlock; } memset(rp, 0, sizeof (Sg_request)); rp->parentfp = sfp; rp->header.duration = jiffies_to_msecs(jiffies); list_add_tail(&rp->entry, &sfp->rq_list); write_unlock_irqrestore(&sfp->rq_list_lock, iflags); return rp; out_unlock: write_unlock_irqrestore(&sfp->rq_list_lock, iflags); return NULL; } /* Return of 1 for found; 0 for not found */ static int sg_remove_request(Sg_fd * sfp, Sg_request * srp) { unsigned long iflags; int res = 0; if (!sfp || !srp || list_empty(&sfp->rq_list)) return res; write_lock_irqsave(&sfp->rq_list_lock, iflags); if (!list_empty(&srp->entry)) { list_del(&srp->entry); srp->parentfp = NULL; res = 1; } write_unlock_irqrestore(&sfp->rq_list_lock, iflags); /* * If the device is detaching, wakeup any readers in case we just * removed the last response, which would leave nothing for them to * return other than -ENODEV. */ if (unlikely(atomic_read(&sfp->parentdp->detaching))) wake_up_interruptible_all(&sfp->read_wait); return res; } static Sg_fd * sg_add_sfp(Sg_device * sdp) { Sg_fd *sfp; unsigned long iflags; int bufflen; sfp = kzalloc(sizeof(*sfp), GFP_ATOMIC | __GFP_NOWARN); if (!sfp) return ERR_PTR(-ENOMEM); init_waitqueue_head(&sfp->read_wait); rwlock_init(&sfp->rq_list_lock); INIT_LIST_HEAD(&sfp->rq_list); kref_init(&sfp->f_ref); mutex_init(&sfp->f_mutex); sfp->timeout = SG_DEFAULT_TIMEOUT; sfp->timeout_user = SG_DEFAULT_TIMEOUT_USER; sfp->force_packid = SG_DEF_FORCE_PACK_ID; sfp->cmd_q = SG_DEF_COMMAND_Q; sfp->keep_orphan = SG_DEF_KEEP_ORPHAN; sfp->parentdp = sdp; write_lock_irqsave(&sdp->sfd_lock, iflags); if (atomic_read(&sdp->detaching)) { write_unlock_irqrestore(&sdp->sfd_lock, iflags); kfree(sfp); return ERR_PTR(-ENODEV); } list_add_tail(&sfp->sfd_siblings, &sdp->sfds); write_unlock_irqrestore(&sdp->sfd_lock, iflags); SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "sg_add_sfp: sfp=0x%p\n", sfp)); if (unlikely(sg_big_buff != def_reserved_size)) sg_big_buff = def_reserved_size; bufflen = min_t(int, sg_big_buff, max_sectors_bytes(sdp->device->request_queue)); sg_build_reserve(sfp, bufflen); SCSI_LOG_TIMEOUT(3, sg_printk(KERN_INFO, sdp, "sg_add_sfp: bufflen=%d, k_use_sg=%d\n", sfp->reserve.bufflen, sfp->reserve.k_use_sg)); kref_get(&sdp->d_ref); __module_get(THIS_MODULE); return sfp; } static void sg_remove_sfp_usercontext(struct work_struct *work) { struct sg_fd *sfp = container_of(work, struct sg_fd, ew.work); struct sg_device *sdp = sfp->parentdp; struct scsi_device *device = sdp->device; Sg_request *srp; unsigned long iflags; /* Cleanup any responses which were never read(). */ write_lock_irqsave(&sfp->rq_list_lock, iflags); while (!list_empty(&sfp->rq_list)) { srp = list_first_entry(&sfp->rq_list, Sg_request, entry); sg_finish_rem_req(srp); list_del(&srp->entry); srp->parentfp = NULL; } write_unlock_irqrestore(&sfp->rq_list_lock, iflags); if (sfp->reserve.bufflen > 0) { SCSI_LOG_TIMEOUT(6, sg_printk(KERN_INFO, sdp, "sg_remove_sfp: bufflen=%d, k_use_sg=%d\n", (int) sfp->reserve.bufflen, (int) sfp->reserve.k_use_sg)); sg_remove_scat(sfp, &sfp->reserve); } SCSI_LOG_TIMEOUT(6, sg_printk(KERN_INFO, sdp, "sg_remove_sfp: sfp=0x%p\n", sfp)); kfree(sfp); kref_put(&sdp->d_ref, sg_device_destroy); scsi_device_put(device); module_put(THIS_MODULE); } static void sg_remove_sfp(struct kref *kref) { struct sg_fd *sfp = container_of(kref, struct sg_fd, f_ref); struct sg_device *sdp = sfp->parentdp; unsigned long iflags; write_lock_irqsave(&sdp->sfd_lock, iflags); list_del(&sfp->sfd_siblings); write_unlock_irqrestore(&sdp->sfd_lock, iflags); INIT_WORK(&sfp->ew.work, sg_remove_sfp_usercontext); schedule_work(&sfp->ew.work); } #ifdef CONFIG_SCSI_PROC_FS static int sg_idr_max_id(int id, void *p, void *data) { int *k = data; if (*k < id) *k = id; return 0; } static int sg_last_dev(void) { int k = -1; unsigned long iflags; read_lock_irqsave(&sg_index_lock, iflags); idr_for_each(&sg_index_idr, sg_idr_max_id, &k); read_unlock_irqrestore(&sg_index_lock, iflags); return k + 1; /* origin 1 */ } #endif /* must be called with sg_index_lock held */ static Sg_device *sg_lookup_dev(int dev) { return idr_find(&sg_index_idr, dev); } static Sg_device * sg_get_dev(int dev) { struct sg_device *sdp; unsigned long flags; read_lock_irqsave(&sg_index_lock, flags); sdp = sg_lookup_dev(dev); if (!sdp) sdp = ERR_PTR(-ENXIO); else if (atomic_read(&sdp->detaching)) { /* If sdp->detaching, then the refcount may already be 0, in * which case it would be a bug to do kref_get(). */ sdp = ERR_PTR(-ENODEV); } else kref_get(&sdp->d_ref); read_unlock_irqrestore(&sg_index_lock, flags); return sdp; } #ifdef CONFIG_SCSI_PROC_FS static int sg_proc_seq_show_int(struct seq_file *s, void *v); static int sg_proc_single_open_adio(struct inode *inode, struct file *file); static ssize_t sg_proc_write_adio(struct file *filp, const char __user *buffer, size_t count, loff_t *off); static const struct proc_ops adio_proc_ops = { .proc_open = sg_proc_single_open_adio, .proc_read = seq_read, .proc_lseek = seq_lseek, .proc_write = sg_proc_write_adio, .proc_release = single_release, }; static int sg_proc_single_open_dressz(struct inode *inode, struct file *file); static ssize_t sg_proc_write_dressz(struct file *filp, const char __user *buffer, size_t count, loff_t *off); static const struct proc_ops dressz_proc_ops = { .proc_open = sg_proc_single_open_dressz, .proc_read = seq_read, .proc_lseek = seq_lseek, .proc_write = sg_proc_write_dressz, .proc_release = single_release, }; static int sg_proc_seq_show_version(struct seq_file *s, void *v); static int sg_proc_seq_show_devhdr(struct seq_file *s, void *v); static int sg_proc_seq_show_dev(struct seq_file *s, void *v); static void * dev_seq_start(struct seq_file *s, loff_t *pos); static void * dev_seq_next(struct seq_file *s, void *v, loff_t *pos); static void dev_seq_stop(struct seq_file *s, void *v); static const struct seq_operations dev_seq_ops = { .start = dev_seq_start, .next = dev_seq_next, .stop = dev_seq_stop, .show = sg_proc_seq_show_dev, }; static int sg_proc_seq_show_devstrs(struct seq_file *s, void *v); static const struct seq_operations devstrs_seq_ops = { .start = dev_seq_start, .next = dev_seq_next, .stop = dev_seq_stop, .show = sg_proc_seq_show_devstrs, }; static int sg_proc_seq_show_debug(struct seq_file *s, void *v); static const struct seq_operations debug_seq_ops = { .start = dev_seq_start, .next = dev_seq_next, .stop = dev_seq_stop, .show = sg_proc_seq_show_debug, }; static int sg_proc_init(void) { struct proc_dir_entry *p; p = proc_mkdir("scsi/sg", NULL); if (!p) return 1; proc_create("allow_dio", S_IRUGO | S_IWUSR, p, &adio_proc_ops); proc_create_seq("debug", S_IRUGO, p, &debug_seq_ops); proc_create("def_reserved_size", S_IRUGO | S_IWUSR, p, &dressz_proc_ops); proc_create_single("device_hdr", S_IRUGO, p, sg_proc_seq_show_devhdr); proc_create_seq("devices", S_IRUGO, p, &dev_seq_ops); proc_create_seq("device_strs", S_IRUGO, p, &devstrs_seq_ops); proc_create_single("version", S_IRUGO, p, sg_proc_seq_show_version); return 0; } static int sg_proc_seq_show_int(struct seq_file *s, void *v) { seq_printf(s, "%d\n", *((int *)s->private)); return 0; } static int sg_proc_single_open_adio(struct inode *inode, struct file *file) { return single_open(file, sg_proc_seq_show_int, &sg_allow_dio); } static ssize_t sg_proc_write_adio(struct file *filp, const char __user *buffer, size_t count, loff_t *off) { int err; unsigned long num; if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) return -EACCES; err = kstrtoul_from_user(buffer, count, 0, &num); if (err) return err; sg_allow_dio = num ? 1 : 0; return count; } static int sg_proc_single_open_dressz(struct inode *inode, struct file *file) { return single_open(file, sg_proc_seq_show_int, &sg_big_buff); } static ssize_t sg_proc_write_dressz(struct file *filp, const char __user *buffer, size_t count, loff_t *off) { int err; unsigned long k = ULONG_MAX; if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) return -EACCES; err = kstrtoul_from_user(buffer, count, 0, &k); if (err) return err; if (k <= 1048576) { /* limit "big buff" to 1 MB */ sg_big_buff = k; return count; } return -ERANGE; } static int sg_proc_seq_show_version(struct seq_file *s, void *v) { seq_printf(s, "%d\t%s [%s]\n", sg_version_num, SG_VERSION_STR, sg_version_date); return 0; } static int sg_proc_seq_show_devhdr(struct seq_file *s, void *v) { seq_puts(s, "host\tchan\tid\tlun\ttype\topens\tqdepth\tbusy\tonline\n"); return 0; } struct sg_proc_deviter { loff_t index; size_t max; }; static void * dev_seq_start(struct seq_file *s, loff_t *pos) { struct sg_proc_deviter * it = kmalloc(sizeof(*it), GFP_KERNEL); s->private = it; if (! it) return NULL; it->index = *pos; it->max = sg_last_dev(); if (it->index >= it->max) return NULL; return it; } static void * dev_seq_next(struct seq_file *s, void *v, loff_t *pos) { struct sg_proc_deviter * it = s->private; *pos = ++it->index; return (it->index < it->max) ? it : NULL; } static void dev_seq_stop(struct seq_file *s, void *v) { kfree(s->private); } static int sg_proc_seq_show_dev(struct seq_file *s, void *v) { struct sg_proc_deviter * it = (struct sg_proc_deviter *) v; Sg_device *sdp; struct scsi_device *scsidp; unsigned long iflags; read_lock_irqsave(&sg_index_lock, iflags); sdp = it ? sg_lookup_dev(it->index) : NULL; if ((NULL == sdp) || (NULL == sdp->device) || (atomic_read(&sdp->detaching))) seq_puts(s, "-1\t-1\t-1\t-1\t-1\t-1\t-1\t-1\t-1\n"); else { scsidp = sdp->device; seq_printf(s, "%d\t%d\t%d\t%llu\t%d\t%d\t%d\t%d\t%d\n", scsidp->host->host_no, scsidp->channel, scsidp->id, scsidp->lun, (int) scsidp->type, 1, (int) scsidp->queue_depth, (int) scsi_device_busy(scsidp), (int) scsi_device_online(scsidp)); } read_unlock_irqrestore(&sg_index_lock, iflags); return 0; } static int sg_proc_seq_show_devstrs(struct seq_file *s, void *v) { struct sg_proc_deviter * it = (struct sg_proc_deviter *) v; Sg_device *sdp; struct scsi_device *scsidp; unsigned long iflags; read_lock_irqsave(&sg_index_lock, iflags); sdp = it ? sg_lookup_dev(it->index) : NULL; scsidp = sdp ? sdp->device : NULL; if (sdp && scsidp && (!atomic_read(&sdp->detaching))) seq_printf(s, "%8.8s\t%16.16s\t%4.4s\n", scsidp->vendor, scsidp->model, scsidp->rev); else seq_puts(s, "<no active device>\n"); read_unlock_irqrestore(&sg_index_lock, iflags); return 0; } /* must be called while holding sg_index_lock */ static void sg_proc_debug_helper(struct seq_file *s, Sg_device * sdp) { int k, new_interface, blen, usg; Sg_request *srp; Sg_fd *fp; const sg_io_hdr_t *hp; const char * cp; unsigned int ms; k = 0; list_for_each_entry(fp, &sdp->sfds, sfd_siblings) { k++; read_lock(&fp->rq_list_lock); /* irqs already disabled */ seq_printf(s, " FD(%d): timeout=%dms bufflen=%d " "(res)sgat=%d low_dma=%d\n", k, jiffies_to_msecs(fp->timeout), fp->reserve.bufflen, (int) fp->reserve.k_use_sg, 0); seq_printf(s, " cmd_q=%d f_packid=%d k_orphan=%d closed=0\n", (int) fp->cmd_q, (int) fp->force_packid, (int) fp->keep_orphan); list_for_each_entry(srp, &fp->rq_list, entry) { hp = &srp->header; new_interface = (hp->interface_id == '\0') ? 0 : 1; if (srp->res_used) { if (new_interface && (SG_FLAG_MMAP_IO & hp->flags)) cp = " mmap>> "; else cp = " rb>> "; } else { if (SG_INFO_DIRECT_IO_MASK & hp->info) cp = " dio>> "; else cp = " "; } seq_puts(s, cp); blen = srp->data.bufflen; usg = srp->data.k_use_sg; seq_puts(s, srp->done ? ((1 == srp->done) ? "rcv:" : "fin:") : "act:"); seq_printf(s, " id=%d blen=%d", srp->header.pack_id, blen); if (srp->done) seq_printf(s, " dur=%d", hp->duration); else { ms = jiffies_to_msecs(jiffies); seq_printf(s, " t_o/elap=%d/%d", (new_interface ? hp->timeout : jiffies_to_msecs(fp->timeout)), (ms > hp->duration ? ms - hp->duration : 0)); } seq_printf(s, "ms sgat=%d op=0x%02x\n", usg, (int) srp->data.cmd_opcode); } if (list_empty(&fp->rq_list)) seq_puts(s, " No requests active\n"); read_unlock(&fp->rq_list_lock); } } static int sg_proc_seq_show_debug(struct seq_file *s, void *v) { struct sg_proc_deviter * it = (struct sg_proc_deviter *) v; Sg_device *sdp; unsigned long iflags; if (it && (0 == it->index)) seq_printf(s, "max_active_device=%d def_reserved_size=%d\n", (int)it->max, sg_big_buff); read_lock_irqsave(&sg_index_lock, iflags); sdp = it ? sg_lookup_dev(it->index) : NULL; if (NULL == sdp) goto skip; read_lock(&sdp->sfd_lock); if (!list_empty(&sdp->sfds)) { seq_printf(s, " >>> device=%s ", sdp->name); if (atomic_read(&sdp->detaching)) seq_puts(s, "detaching pending close "); else if (sdp->device) { struct scsi_device *scsidp = sdp->device; seq_printf(s, "%d:%d:%d:%llu em=%d", scsidp->host->host_no, scsidp->channel, scsidp->id, scsidp->lun, scsidp->host->hostt->emulated); } seq_printf(s, " sg_tablesize=%d excl=%d open_cnt=%d\n", sdp->sg_tablesize, sdp->exclude, sdp->open_cnt); sg_proc_debug_helper(s, sdp); } read_unlock(&sdp->sfd_lock); skip: read_unlock_irqrestore(&sg_index_lock, iflags); return 0; } #endif /* CONFIG_SCSI_PROC_FS */ module_init(init_sg); module_exit(exit_sg); |
12 13 13 13 6 9 1 35 30 8 50 83 83 5 3 3 13 4 10 9 9 9 9 12 13 5 5 11 4 6 1 1 5 6 7 1 4 7 13 1 12 7 7 7 7 2 2 1 1 2 7 7 7 5 5 9 2 7 9 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 | /* * Copyright © 2012 Red Hat * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * Authors: * Dave Airlie <airlied@redhat.com> * Rob Clark <rob.clark@linaro.org> * */ #include <linux/export.h> #include <linux/dma-buf.h> #include <linux/rbtree.h> #include <linux/module.h> #include <drm/drm.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> #include <drm/drm_framebuffer.h> #include <drm/drm_gem.h> #include <drm/drm_prime.h> #include "drm_internal.h" MODULE_IMPORT_NS("DMA_BUF"); /** * DOC: overview and lifetime rules * * Similar to GEM global names, PRIME file descriptors are also used to share * buffer objects across processes. They offer additional security: as file * descriptors must be explicitly sent over UNIX domain sockets to be shared * between applications, they can't be guessed like the globally unique GEM * names. * * Drivers that support the PRIME API implement the drm_gem_object_funcs.export * and &drm_driver.gem_prime_import hooks. &dma_buf_ops implementations for * drivers are all individually exported for drivers which need to overwrite * or reimplement some of them. * * Reference Counting for GEM Drivers * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * On the export the &dma_buf holds a reference to the exported buffer object, * usually a &drm_gem_object. It takes this reference in the PRIME_HANDLE_TO_FD * IOCTL, when it first calls &drm_gem_object_funcs.export * and stores the exporting GEM object in the &dma_buf.priv field. This * reference needs to be released when the final reference to the &dma_buf * itself is dropped and its &dma_buf_ops.release function is called. For * GEM-based drivers, the &dma_buf should be exported using * drm_gem_dmabuf_export() and then released by drm_gem_dmabuf_release(). * * Thus the chain of references always flows in one direction, avoiding loops: * importing GEM object -> dma-buf -> exported GEM bo. A further complication * are the lookup caches for import and export. These are required to guarantee * that any given object will always have only one unique userspace handle. This * is required to allow userspace to detect duplicated imports, since some GEM * drivers do fail command submissions if a given buffer object is listed more * than once. These import and export caches in &drm_prime_file_private only * retain a weak reference, which is cleaned up when the corresponding object is * released. * * Self-importing: If userspace is using PRIME as a replacement for flink then * it will get a fd->handle request for a GEM object that it created. Drivers * should detect this situation and return back the underlying object from the * dma-buf private. For GEM based drivers this is handled in * drm_gem_prime_import() already. */ struct drm_prime_member { struct dma_buf *dma_buf; uint32_t handle; struct rb_node dmabuf_rb; struct rb_node handle_rb; }; static int drm_prime_add_buf_handle(struct drm_prime_file_private *prime_fpriv, struct dma_buf *dma_buf, uint32_t handle) { struct drm_prime_member *member; struct rb_node **p, *rb; member = kmalloc(sizeof(*member), GFP_KERNEL); if (!member) return -ENOMEM; get_dma_buf(dma_buf); member->dma_buf = dma_buf; member->handle = handle; rb = NULL; p = &prime_fpriv->dmabufs.rb_node; while (*p) { struct drm_prime_member *pos; rb = *p; pos = rb_entry(rb, struct drm_prime_member, dmabuf_rb); if (dma_buf > pos->dma_buf) p = &rb->rb_right; else p = &rb->rb_left; } rb_link_node(&member->dmabuf_rb, rb, p); rb_insert_color(&member->dmabuf_rb, &prime_fpriv->dmabufs); rb = NULL; p = &prime_fpriv->handles.rb_node; while (*p) { struct drm_prime_member *pos; rb = *p; pos = rb_entry(rb, struct drm_prime_member, handle_rb); if (handle > pos->handle) p = &rb->rb_right; else p = &rb->rb_left; } rb_link_node(&member->handle_rb, rb, p); rb_insert_color(&member->handle_rb, &prime_fpriv->handles); return 0; } static struct dma_buf *drm_prime_lookup_buf_by_handle(struct drm_prime_file_private *prime_fpriv, uint32_t handle) { struct rb_node *rb; rb = prime_fpriv->handles.rb_node; while (rb) { struct drm_prime_member *member; member = rb_entry(rb, struct drm_prime_member, handle_rb); if (member->handle == handle) return member->dma_buf; else if (member->handle < handle) rb = rb->rb_right; else rb = rb->rb_left; } return NULL; } static int drm_prime_lookup_buf_handle(struct drm_prime_file_private *prime_fpriv, struct dma_buf *dma_buf, uint32_t *handle) { struct rb_node *rb; rb = prime_fpriv->dmabufs.rb_node; while (rb) { struct drm_prime_member *member; member = rb_entry(rb, struct drm_prime_member, dmabuf_rb); if (member->dma_buf == dma_buf) { *handle = member->handle; return 0; } else if (member->dma_buf < dma_buf) { rb = rb->rb_right; } else { rb = rb->rb_left; } } return -ENOENT; } void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv, uint32_t handle) { struct rb_node *rb; mutex_lock(&prime_fpriv->lock); rb = prime_fpriv->handles.rb_node; while (rb) { struct drm_prime_member *member; member = rb_entry(rb, struct drm_prime_member, handle_rb); if (member->handle == handle) { rb_erase(&member->handle_rb, &prime_fpriv->handles); rb_erase(&member->dmabuf_rb, &prime_fpriv->dmabufs); dma_buf_put(member->dma_buf); kfree(member); break; } else if (member->handle < handle) { rb = rb->rb_right; } else { rb = rb->rb_left; } } mutex_unlock(&prime_fpriv->lock); } void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv) { mutex_init(&prime_fpriv->lock); prime_fpriv->dmabufs = RB_ROOT; prime_fpriv->handles = RB_ROOT; } void drm_prime_destroy_file_private(struct drm_prime_file_private *prime_fpriv) { /* by now drm_gem_release should've made sure the list is empty */ WARN_ON(!RB_EMPTY_ROOT(&prime_fpriv->dmabufs)); } /** * drm_gem_dmabuf_export - &dma_buf export implementation for GEM * @dev: parent device for the exported dmabuf * @exp_info: the export information used by dma_buf_export() * * This wraps dma_buf_export() for use by generic GEM drivers that are using * drm_gem_dmabuf_release(). In addition to calling dma_buf_export(), we take * a reference to the &drm_device and the exported &drm_gem_object (stored in * &dma_buf_export_info.priv) which is released by drm_gem_dmabuf_release(). * * Returns the new dmabuf. */ struct dma_buf *drm_gem_dmabuf_export(struct drm_device *dev, struct dma_buf_export_info *exp_info) { struct drm_gem_object *obj = exp_info->priv; struct dma_buf *dma_buf; dma_buf = dma_buf_export(exp_info); if (IS_ERR(dma_buf)) return dma_buf; drm_dev_get(dev); drm_gem_object_get(obj); dma_buf->file->f_mapping = obj->dev->anon_inode->i_mapping; return dma_buf; } EXPORT_SYMBOL(drm_gem_dmabuf_export); /** * drm_gem_dmabuf_release - &dma_buf release implementation for GEM * @dma_buf: buffer to be released * * Generic release function for dma_bufs exported as PRIME buffers. GEM drivers * must use this in their &dma_buf_ops structure as the release callback. * drm_gem_dmabuf_release() should be used in conjunction with * drm_gem_dmabuf_export(). */ void drm_gem_dmabuf_release(struct dma_buf *dma_buf) { struct drm_gem_object *obj = dma_buf->priv; struct drm_device *dev = obj->dev; /* drop the reference on the export fd holds */ drm_gem_object_put(obj); drm_dev_put(dev); } EXPORT_SYMBOL(drm_gem_dmabuf_release); /** * drm_gem_prime_fd_to_handle - PRIME import function for GEM drivers * @dev: drm_device to import into * @file_priv: drm file-private structure * @prime_fd: fd id of the dma-buf which should be imported * @handle: pointer to storage for the handle of the imported buffer object * * This is the PRIME import function which must be used mandatorily by GEM * drivers to ensure correct lifetime management of the underlying GEM object. * The actual importing of GEM object from the dma-buf is done through the * &drm_driver.gem_prime_import driver callback. * * Returns 0 on success or a negative error code on failure. */ int drm_gem_prime_fd_to_handle(struct drm_device *dev, struct drm_file *file_priv, int prime_fd, uint32_t *handle) { struct dma_buf *dma_buf; struct drm_gem_object *obj; int ret; dma_buf = dma_buf_get(prime_fd); if (IS_ERR(dma_buf)) return PTR_ERR(dma_buf); mutex_lock(&file_priv->prime.lock); ret = drm_prime_lookup_buf_handle(&file_priv->prime, dma_buf, handle); if (ret == 0) goto out_put; /* never seen this one, need to import */ mutex_lock(&dev->object_name_lock); if (dev->driver->gem_prime_import) obj = dev->driver->gem_prime_import(dev, dma_buf); else obj = drm_gem_prime_import(dev, dma_buf); if (IS_ERR(obj)) { ret = PTR_ERR(obj); goto out_unlock; } if (obj->dma_buf) { WARN_ON(obj->dma_buf != dma_buf); } else { obj->dma_buf = dma_buf; get_dma_buf(dma_buf); } /* _handle_create_tail unconditionally unlocks dev->object_name_lock. */ ret = drm_gem_handle_create_tail(file_priv, obj, handle); drm_gem_object_put(obj); if (ret) goto out_put; ret = drm_prime_add_buf_handle(&file_priv->prime, dma_buf, *handle); mutex_unlock(&file_priv->prime.lock); if (ret) goto fail; dma_buf_put(dma_buf); return 0; fail: /* hmm, if driver attached, we are relying on the free-object path * to detach.. which seems ok.. */ drm_gem_handle_delete(file_priv, *handle); dma_buf_put(dma_buf); return ret; out_unlock: mutex_unlock(&dev->object_name_lock); out_put: mutex_unlock(&file_priv->prime.lock); dma_buf_put(dma_buf); return ret; } EXPORT_SYMBOL(drm_gem_prime_fd_to_handle); int drm_prime_fd_to_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_prime_handle *args = data; if (dev->driver->prime_fd_to_handle) { return dev->driver->prime_fd_to_handle(dev, file_priv, args->fd, &args->handle); } return drm_gem_prime_fd_to_handle(dev, file_priv, args->fd, &args->handle); } static struct dma_buf *export_and_register_object(struct drm_device *dev, struct drm_gem_object *obj, uint32_t flags) { struct dma_buf *dmabuf; /* prevent races with concurrent gem_close. */ if (obj->handle_count == 0) { dmabuf = ERR_PTR(-ENOENT); return dmabuf; } if (obj->funcs && obj->funcs->export) dmabuf = obj->funcs->export(obj, flags); else dmabuf = drm_gem_prime_export(obj, flags); if (IS_ERR(dmabuf)) { /* normally the created dma-buf takes ownership of the ref, * but if that fails then drop the ref */ return dmabuf; } /* * Note that callers do not need to clean up the export cache * since the check for obj->handle_count guarantees that someone * will clean it up. */ obj->dma_buf = dmabuf; get_dma_buf(obj->dma_buf); return dmabuf; } /** * drm_gem_prime_handle_to_dmabuf - PRIME export function for GEM drivers * @dev: dev to export the buffer from * @file_priv: drm file-private structure * @handle: buffer handle to export * @flags: flags like DRM_CLOEXEC * * This is the PRIME export function which must be used mandatorily by GEM * drivers to ensure correct lifetime management of the underlying GEM object. * The actual exporting from GEM object to a dma-buf is done through the * &drm_gem_object_funcs.export callback. * * Unlike drm_gem_prime_handle_to_fd(), it returns the struct dma_buf it * has created, without attaching it to any file descriptors. The difference * between those two is similar to that between anon_inode_getfile() and * anon_inode_getfd(); insertion into descriptor table is something you * can not revert if any cleanup is needed, so the descriptor-returning * variants should only be used when you are past the last failure exit * and the only thing left is passing the new file descriptor to userland. * When all you need is the object itself or when you need to do something * else that might fail, use that one instead. */ struct dma_buf *drm_gem_prime_handle_to_dmabuf(struct drm_device *dev, struct drm_file *file_priv, uint32_t handle, uint32_t flags) { struct drm_gem_object *obj; int ret = 0; struct dma_buf *dmabuf; mutex_lock(&file_priv->prime.lock); obj = drm_gem_object_lookup(file_priv, handle); if (!obj) { dmabuf = ERR_PTR(-ENOENT); goto out_unlock; } dmabuf = drm_prime_lookup_buf_by_handle(&file_priv->prime, handle); if (dmabuf) { get_dma_buf(dmabuf); goto out; } mutex_lock(&dev->object_name_lock); /* re-export the original imported/exported object */ if (obj->dma_buf) { get_dma_buf(obj->dma_buf); dmabuf = obj->dma_buf; goto out_have_obj; } dmabuf = export_and_register_object(dev, obj, flags); if (IS_ERR(dmabuf)) { /* normally the created dma-buf takes ownership of the ref, * but if that fails then drop the ref */ mutex_unlock(&dev->object_name_lock); goto out; } out_have_obj: /* * If we've exported this buffer then cheat and add it to the import list * so we get the correct handle back. We must do this under the * protection of dev->object_name_lock to ensure that a racing gem close * ioctl doesn't miss to remove this buffer handle from the cache. */ ret = drm_prime_add_buf_handle(&file_priv->prime, dmabuf, handle); mutex_unlock(&dev->object_name_lock); if (ret) { dma_buf_put(dmabuf); dmabuf = ERR_PTR(ret); } out: drm_gem_object_put(obj); out_unlock: mutex_unlock(&file_priv->prime.lock); return dmabuf; } EXPORT_SYMBOL(drm_gem_prime_handle_to_dmabuf); /** * drm_gem_prime_handle_to_fd - PRIME export function for GEM drivers * @dev: dev to export the buffer from * @file_priv: drm file-private structure * @handle: buffer handle to export * @flags: flags like DRM_CLOEXEC * @prime_fd: pointer to storage for the fd id of the create dma-buf * * This is the PRIME export function which must be used mandatorily by GEM * drivers to ensure correct lifetime management of the underlying GEM object. * The actual exporting from GEM object to a dma-buf is done through the * &drm_gem_object_funcs.export callback. */ int drm_gem_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv, uint32_t handle, uint32_t flags, int *prime_fd) { struct dma_buf *dmabuf; int fd = get_unused_fd_flags(flags); if (fd < 0) return fd; dmabuf = drm_gem_prime_handle_to_dmabuf(dev, file_priv, handle, flags); if (IS_ERR(dmabuf)) { put_unused_fd(fd); return PTR_ERR(dmabuf); } fd_install(fd, dmabuf->file); *prime_fd = fd; return 0; } EXPORT_SYMBOL(drm_gem_prime_handle_to_fd); int drm_prime_handle_to_fd_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_prime_handle *args = data; /* check flags are valid */ if (args->flags & ~(DRM_CLOEXEC | DRM_RDWR)) return -EINVAL; if (dev->driver->prime_handle_to_fd) { return dev->driver->prime_handle_to_fd(dev, file_priv, args->handle, args->flags, &args->fd); } return drm_gem_prime_handle_to_fd(dev, file_priv, args->handle, args->flags, &args->fd); } /** * DOC: PRIME Helpers * * Drivers can implement &drm_gem_object_funcs.export and * &drm_driver.gem_prime_import in terms of simpler APIs by using the helper * functions drm_gem_prime_export() and drm_gem_prime_import(). These functions * implement dma-buf support in terms of some lower-level helpers, which are * again exported for drivers to use individually: * * Exporting buffers * ~~~~~~~~~~~~~~~~~ * * Optional pinning of buffers is handled at dma-buf attach and detach time in * drm_gem_map_attach() and drm_gem_map_detach(). Backing storage itself is * handled by drm_gem_map_dma_buf() and drm_gem_unmap_dma_buf(), which relies on * &drm_gem_object_funcs.get_sg_table. If &drm_gem_object_funcs.get_sg_table is * unimplemented, exports into another device are rejected. * * For kernel-internal access there's drm_gem_dmabuf_vmap() and * drm_gem_dmabuf_vunmap(). Userspace mmap support is provided by * drm_gem_dmabuf_mmap(). * * Note that these export helpers can only be used if the underlying backing * storage is fully coherent and either permanently pinned, or it is safe to pin * it indefinitely. * * FIXME: The underlying helper functions are named rather inconsistently. * * Importing buffers * ~~~~~~~~~~~~~~~~~ * * Importing dma-bufs using drm_gem_prime_import() relies on * &drm_driver.gem_prime_import_sg_table. * * Note that similarly to the export helpers this permanently pins the * underlying backing storage. Which is ok for scanout, but is not the best * option for sharing lots of buffers for rendering. */ /** * drm_gem_map_attach - dma_buf attach implementation for GEM * @dma_buf: buffer to attach device to * @attach: buffer attachment data * * Calls &drm_gem_object_funcs.pin for device specific handling. This can be * used as the &dma_buf_ops.attach callback. Must be used together with * drm_gem_map_detach(). * * Returns 0 on success, negative error code on failure. */ int drm_gem_map_attach(struct dma_buf *dma_buf, struct dma_buf_attachment *attach) { struct drm_gem_object *obj = dma_buf->priv; /* * drm_gem_map_dma_buf() requires obj->get_sg_table(), but drivers * that implement their own ->map_dma_buf() do not. */ if (dma_buf->ops->map_dma_buf == drm_gem_map_dma_buf && !obj->funcs->get_sg_table) return -ENOSYS; return drm_gem_pin(obj); } EXPORT_SYMBOL(drm_gem_map_attach); /** * drm_gem_map_detach - dma_buf detach implementation for GEM * @dma_buf: buffer to detach from * @attach: attachment to be detached * * Calls &drm_gem_object_funcs.pin for device specific handling. Cleans up * &dma_buf_attachment from drm_gem_map_attach(). This can be used as the * &dma_buf_ops.detach callback. */ void drm_gem_map_detach(struct dma_buf *dma_buf, struct dma_buf_attachment *attach) { struct drm_gem_object *obj = dma_buf->priv; drm_gem_unpin(obj); } EXPORT_SYMBOL(drm_gem_map_detach); /** * drm_gem_map_dma_buf - map_dma_buf implementation for GEM * @attach: attachment whose scatterlist is to be returned * @dir: direction of DMA transfer * * Calls &drm_gem_object_funcs.get_sg_table and then maps the scatterlist. This * can be used as the &dma_buf_ops.map_dma_buf callback. Should be used together * with drm_gem_unmap_dma_buf(). * * Returns:sg_table containing the scatterlist to be returned; returns ERR_PTR * on error. May return -EINTR if it is interrupted by a signal. */ struct sg_table *drm_gem_map_dma_buf(struct dma_buf_attachment *attach, enum dma_data_direction dir) { struct drm_gem_object *obj = attach->dmabuf->priv; struct sg_table *sgt; int ret; if (WARN_ON(dir == DMA_NONE)) return ERR_PTR(-EINVAL); if (WARN_ON(!obj->funcs->get_sg_table)) return ERR_PTR(-ENOSYS); sgt = obj->funcs->get_sg_table(obj); if (IS_ERR(sgt)) return sgt; ret = dma_map_sgtable(attach->dev, sgt, dir, DMA_ATTR_SKIP_CPU_SYNC); if (ret) { sg_free_table(sgt); kfree(sgt); sgt = ERR_PTR(ret); } return sgt; } EXPORT_SYMBOL(drm_gem_map_dma_buf); /** * drm_gem_unmap_dma_buf - unmap_dma_buf implementation for GEM * @attach: attachment to unmap buffer from * @sgt: scatterlist info of the buffer to unmap * @dir: direction of DMA transfer * * This can be used as the &dma_buf_ops.unmap_dma_buf callback. */ void drm_gem_unmap_dma_buf(struct dma_buf_attachment *attach, struct sg_table *sgt, enum dma_data_direction dir) { if (!sgt) return; dma_unmap_sgtable(attach->dev, sgt, dir, DMA_ATTR_SKIP_CPU_SYNC); sg_free_table(sgt); kfree(sgt); } EXPORT_SYMBOL(drm_gem_unmap_dma_buf); /** * drm_gem_dmabuf_vmap - dma_buf vmap implementation for GEM * @dma_buf: buffer to be mapped * @map: the virtual address of the buffer * * Sets up a kernel virtual mapping. This can be used as the &dma_buf_ops.vmap * callback. Calls into &drm_gem_object_funcs.vmap for device specific handling. * The kernel virtual address is returned in map. * * Returns 0 on success or a negative errno code otherwise. */ int drm_gem_dmabuf_vmap(struct dma_buf *dma_buf, struct iosys_map *map) { struct drm_gem_object *obj = dma_buf->priv; return drm_gem_vmap(obj, map); } EXPORT_SYMBOL(drm_gem_dmabuf_vmap); /** * drm_gem_dmabuf_vunmap - dma_buf vunmap implementation for GEM * @dma_buf: buffer to be unmapped * @map: the virtual address of the buffer * * Releases a kernel virtual mapping. This can be used as the * &dma_buf_ops.vunmap callback. Calls into &drm_gem_object_funcs.vunmap for device specific handling. */ void drm_gem_dmabuf_vunmap(struct dma_buf *dma_buf, struct iosys_map *map) { struct drm_gem_object *obj = dma_buf->priv; drm_gem_vunmap(obj, map); } EXPORT_SYMBOL(drm_gem_dmabuf_vunmap); /** * drm_gem_prime_mmap - PRIME mmap function for GEM drivers * @obj: GEM object * @vma: Virtual address range * * This function sets up a userspace mapping for PRIME exported buffers using * the same codepath that is used for regular GEM buffer mapping on the DRM fd. * The fake GEM offset is added to vma->vm_pgoff and &drm_driver->fops->mmap is * called to set up the mapping. */ int drm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) { struct drm_file *priv; struct file *fil; int ret; /* Add the fake offset */ vma->vm_pgoff += drm_vma_node_start(&obj->vma_node); if (obj->funcs && obj->funcs->mmap) { vma->vm_ops = obj->funcs->vm_ops; drm_gem_object_get(obj); ret = obj->funcs->mmap(obj, vma); if (ret) { drm_gem_object_put(obj); return ret; } vma->vm_private_data = obj; return 0; } priv = kzalloc(sizeof(*priv), GFP_KERNEL); fil = kzalloc(sizeof(*fil), GFP_KERNEL); if (!priv || !fil) { ret = -ENOMEM; goto out; } /* Used by drm_gem_mmap() to lookup the GEM object */ priv->minor = obj->dev->primary; fil->private_data = priv; ret = drm_vma_node_allow(&obj->vma_node, priv); if (ret) goto out; ret = obj->dev->driver->fops->mmap(fil, vma); drm_vma_node_revoke(&obj->vma_node, priv); out: kfree(priv); kfree(fil); return ret; } EXPORT_SYMBOL(drm_gem_prime_mmap); /** * drm_gem_dmabuf_mmap - dma_buf mmap implementation for GEM * @dma_buf: buffer to be mapped * @vma: virtual address range * * Provides memory mapping for the buffer. This can be used as the * &dma_buf_ops.mmap callback. It just forwards to drm_gem_prime_mmap(). * * Returns 0 on success or a negative error code on failure. */ int drm_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) { struct drm_gem_object *obj = dma_buf->priv; return drm_gem_prime_mmap(obj, vma); } EXPORT_SYMBOL(drm_gem_dmabuf_mmap); static const struct dma_buf_ops drm_gem_prime_dmabuf_ops = { .cache_sgt_mapping = true, .attach = drm_gem_map_attach, .detach = drm_gem_map_detach, .map_dma_buf = drm_gem_map_dma_buf, .unmap_dma_buf = drm_gem_unmap_dma_buf, .release = drm_gem_dmabuf_release, .mmap = drm_gem_dmabuf_mmap, .vmap = drm_gem_dmabuf_vmap, .vunmap = drm_gem_dmabuf_vunmap, }; /** * drm_prime_pages_to_sg - converts a page array into an sg list * @dev: DRM device * @pages: pointer to the array of page pointers to convert * @nr_pages: length of the page vector * * This helper creates an sg table object from a set of pages * the driver is responsible for mapping the pages into the * importers address space for use with dma_buf itself. * * This is useful for implementing &drm_gem_object_funcs.get_sg_table. */ struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev, struct page **pages, unsigned int nr_pages) { struct sg_table *sg; size_t max_segment = 0; int err; sg = kmalloc(sizeof(struct sg_table), GFP_KERNEL); if (!sg) return ERR_PTR(-ENOMEM); if (dev) max_segment = dma_max_mapping_size(dev->dev); if (max_segment == 0) max_segment = UINT_MAX; err = sg_alloc_table_from_pages_segment(sg, pages, nr_pages, 0, (unsigned long)nr_pages << PAGE_SHIFT, max_segment, GFP_KERNEL); if (err) { kfree(sg); sg = ERR_PTR(err); } return sg; } EXPORT_SYMBOL(drm_prime_pages_to_sg); /** * drm_prime_get_contiguous_size - returns the contiguous size of the buffer * @sgt: sg_table describing the buffer to check * * This helper calculates the contiguous size in the DMA address space * of the buffer described by the provided sg_table. * * This is useful for implementing * &drm_gem_object_funcs.gem_prime_import_sg_table. */ unsigned long drm_prime_get_contiguous_size(struct sg_table *sgt) { dma_addr_t expected = sg_dma_address(sgt->sgl); struct scatterlist *sg; unsigned long size = 0; int i; for_each_sgtable_dma_sg(sgt, sg, i) { unsigned int len = sg_dma_len(sg); if (!len) break; if (sg_dma_address(sg) != expected) break; expected += len; size += len; } return size; } EXPORT_SYMBOL(drm_prime_get_contiguous_size); /** * drm_gem_prime_export - helper library implementation of the export callback * @obj: GEM object to export * @flags: flags like DRM_CLOEXEC and DRM_RDWR * * This is the implementation of the &drm_gem_object_funcs.export functions for GEM drivers * using the PRIME helpers. It is used as the default in * drm_gem_prime_handle_to_fd(). */ struct dma_buf *drm_gem_prime_export(struct drm_gem_object *obj, int flags) { struct drm_device *dev = obj->dev; struct dma_buf_export_info exp_info = { .exp_name = KBUILD_MODNAME, /* white lie for debug */ .owner = dev->driver->fops->owner, .ops = &drm_gem_prime_dmabuf_ops, .size = obj->size, .flags = flags, .priv = obj, .resv = obj->resv, }; return drm_gem_dmabuf_export(dev, &exp_info); } EXPORT_SYMBOL(drm_gem_prime_export); /** * drm_gem_prime_import_dev - core implementation of the import callback * @dev: drm_device to import into * @dma_buf: dma-buf object to import * @attach_dev: struct device to dma_buf attach * * This is the core of drm_gem_prime_import(). It's designed to be called by * drivers who want to use a different device structure than &drm_device.dev for * attaching via dma_buf. This function calls * &drm_driver.gem_prime_import_sg_table internally. * * Drivers must arrange to call drm_prime_gem_destroy() from their * &drm_gem_object_funcs.free hook when using this function. */ struct drm_gem_object *drm_gem_prime_import_dev(struct drm_device *dev, struct dma_buf *dma_buf, struct device *attach_dev) { struct dma_buf_attachment *attach; struct sg_table *sgt; struct drm_gem_object *obj; int ret; if (dma_buf->ops == &drm_gem_prime_dmabuf_ops) { obj = dma_buf->priv; if (obj->dev == dev) { /* * Importing dmabuf exported from our own gem increases * refcount on gem itself instead of f_count of dmabuf. */ drm_gem_object_get(obj); return obj; } } if (!dev->driver->gem_prime_import_sg_table) return ERR_PTR(-EINVAL); attach = dma_buf_attach(dma_buf, attach_dev); if (IS_ERR(attach)) return ERR_CAST(attach); get_dma_buf(dma_buf); sgt = dma_buf_map_attachment_unlocked(attach, DMA_BIDIRECTIONAL); if (IS_ERR(sgt)) { ret = PTR_ERR(sgt); goto fail_detach; } obj = dev->driver->gem_prime_import_sg_table(dev, attach, sgt); if (IS_ERR(obj)) { ret = PTR_ERR(obj); goto fail_unmap; } obj->import_attach = attach; obj->resv = dma_buf->resv; return obj; fail_unmap: dma_buf_unmap_attachment_unlocked(attach, sgt, DMA_BIDIRECTIONAL); fail_detach: dma_buf_detach(dma_buf, attach); dma_buf_put(dma_buf); return ERR_PTR(ret); } EXPORT_SYMBOL(drm_gem_prime_import_dev); /** * drm_gem_prime_import - helper library implementation of the import callback * @dev: drm_device to import into * @dma_buf: dma-buf object to import * * This is the implementation of the gem_prime_import functions for GEM drivers * using the PRIME helpers. Drivers can use this as their * &drm_driver.gem_prime_import implementation. It is used as the default * implementation in drm_gem_prime_fd_to_handle(). * * Drivers must arrange to call drm_prime_gem_destroy() from their * &drm_gem_object_funcs.free hook when using this function. */ struct drm_gem_object *drm_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf) { return drm_gem_prime_import_dev(dev, dma_buf, dev->dev); } EXPORT_SYMBOL(drm_gem_prime_import); /** * drm_prime_sg_to_page_array - convert an sg table into a page array * @sgt: scatter-gather table to convert * @pages: array of page pointers to store the pages in * @max_entries: size of the passed-in array * * Exports an sg table into an array of pages. * * This function is deprecated and strongly discouraged to be used. * The page array is only useful for page faults and those can corrupt fields * in the struct page if they are not handled by the exporting driver. */ int __deprecated drm_prime_sg_to_page_array(struct sg_table *sgt, struct page **pages, int max_entries) { struct sg_page_iter page_iter; struct page **p = pages; for_each_sgtable_page(sgt, &page_iter, 0) { if (WARN_ON(p - pages >= max_entries)) return -1; *p++ = sg_page_iter_page(&page_iter); } return 0; } EXPORT_SYMBOL(drm_prime_sg_to_page_array); /** * drm_prime_sg_to_dma_addr_array - convert an sg table into a dma addr array * @sgt: scatter-gather table to convert * @addrs: array to store the dma bus address of each page * @max_entries: size of both the passed-in arrays * * Exports an sg table into an array of addresses. * * Drivers should use this in their &drm_driver.gem_prime_import_sg_table * implementation. */ int drm_prime_sg_to_dma_addr_array(struct sg_table *sgt, dma_addr_t *addrs, int max_entries) { struct sg_dma_page_iter dma_iter; dma_addr_t *a = addrs; for_each_sgtable_dma_page(sgt, &dma_iter, 0) { if (WARN_ON(a - addrs >= max_entries)) return -1; *a++ = sg_page_iter_dma_address(&dma_iter); } return 0; } EXPORT_SYMBOL(drm_prime_sg_to_dma_addr_array); /** * drm_prime_gem_destroy - helper to clean up a PRIME-imported GEM object * @obj: GEM object which was created from a dma-buf * @sg: the sg-table which was pinned at import time * * This is the cleanup functions which GEM drivers need to call when they use * drm_gem_prime_import() or drm_gem_prime_import_dev() to import dma-bufs. */ void drm_prime_gem_destroy(struct drm_gem_object *obj, struct sg_table *sg) { struct dma_buf_attachment *attach; struct dma_buf *dma_buf; attach = obj->import_attach; if (sg) dma_buf_unmap_attachment_unlocked(attach, sg, DMA_BIDIRECTIONAL); dma_buf = attach->dmabuf; dma_buf_detach(attach->dmabuf, attach); /* remove the reference */ dma_buf_put(dma_buf); } EXPORT_SYMBOL(drm_prime_gem_destroy); |
742 12 734 772 771 15 15 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 | // SPDX-License-Identifier: GPL-2.0 /* * DMA memory management for framework level HCD code (hc_driver) * * This implementation plugs in through generic "usb_bus" level methods, * and should work with all USB controllers, regardless of bus type. * * Released under the GPLv2 only. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/device.h> #include <linux/mm.h> #include <linux/io.h> #include <linux/dma-mapping.h> #include <linux/dmapool.h> #include <linux/genalloc.h> #include <linux/usb.h> #include <linux/usb/hcd.h> /* * DMA-Coherent Buffers */ /* FIXME tune these based on pool statistics ... */ static size_t pool_max[HCD_BUFFER_POOLS] = { 32, 128, 512, 2048, }; void __init usb_init_pool_max(void) { /* * The pool_max values must never be smaller than * ARCH_DMA_MINALIGN. */ if (ARCH_DMA_MINALIGN <= 32) ; /* Original value is okay */ else if (ARCH_DMA_MINALIGN <= 64) pool_max[0] = 64; else if (ARCH_DMA_MINALIGN <= 128) pool_max[0] = 0; /* Don't use this pool */ else BUILD_BUG(); /* We don't allow this */ } /* SETUP primitives */ /** * hcd_buffer_create - initialize buffer pools * @hcd: the bus whose buffer pools are to be initialized * * Context: task context, might sleep * * Call this as part of initializing a host controller that uses the dma * memory allocators. It initializes some pools of dma-coherent memory that * will be shared by all drivers using that controller. * * Call hcd_buffer_destroy() to clean up after using those pools. * * Return: 0 if successful. A negative errno value otherwise. */ int hcd_buffer_create(struct usb_hcd *hcd) { char name[16]; int i, size; if (hcd->localmem_pool || !hcd_uses_dma(hcd)) return 0; for (i = 0; i < HCD_BUFFER_POOLS; i++) { size = pool_max[i]; if (!size) continue; snprintf(name, sizeof(name), "buffer-%d", size); hcd->pool[i] = dma_pool_create(name, hcd->self.sysdev, size, size, 0); if (!hcd->pool[i]) { hcd_buffer_destroy(hcd); return -ENOMEM; } } return 0; } /** * hcd_buffer_destroy - deallocate buffer pools * @hcd: the bus whose buffer pools are to be destroyed * * Context: task context, might sleep * * This frees the buffer pools created by hcd_buffer_create(). */ void hcd_buffer_destroy(struct usb_hcd *hcd) { int i; if (!IS_ENABLED(CONFIG_HAS_DMA)) return; for (i = 0; i < HCD_BUFFER_POOLS; i++) { dma_pool_destroy(hcd->pool[i]); hcd->pool[i] = NULL; } } /* sometimes alloc/free could use kmalloc with GFP_DMA, for * better sharing and to leverage mm/slab.c intelligence. */ void *hcd_buffer_alloc( struct usb_bus *bus, size_t size, gfp_t mem_flags, dma_addr_t *dma ) { struct usb_hcd *hcd = bus_to_hcd(bus); int i; if (size == 0) return NULL; if (hcd->localmem_pool) return gen_pool_dma_alloc(hcd->localmem_pool, size, dma); /* some USB hosts just use PIO */ if (!hcd_uses_dma(hcd)) { *dma = ~(dma_addr_t) 0; return kmalloc(size, mem_flags); } for (i = 0; i < HCD_BUFFER_POOLS; i++) { if (size <= pool_max[i]) return dma_pool_alloc(hcd->pool[i], mem_flags, dma); } return dma_alloc_coherent(hcd->self.sysdev, size, dma, mem_flags); } void hcd_buffer_free( struct usb_bus *bus, size_t size, void *addr, dma_addr_t dma ) { struct usb_hcd *hcd = bus_to_hcd(bus); int i; if (!addr) return; if (hcd->localmem_pool) { gen_pool_free(hcd->localmem_pool, (unsigned long)addr, size); return; } if (!hcd_uses_dma(hcd)) { kfree(addr); return; } for (i = 0; i < HCD_BUFFER_POOLS; i++) { if (size <= pool_max[i]) { dma_pool_free(hcd->pool[i], addr, dma); return; } } dma_free_coherent(hcd->self.sysdev, size, addr, dma); } void *hcd_buffer_alloc_pages(struct usb_hcd *hcd, size_t size, gfp_t mem_flags, dma_addr_t *dma) { if (size == 0) return NULL; if (hcd->localmem_pool) return gen_pool_dma_alloc_align(hcd->localmem_pool, size, dma, PAGE_SIZE); /* some USB hosts just use PIO */ if (!hcd_uses_dma(hcd)) { *dma = DMA_MAPPING_ERROR; return (void *)__get_free_pages(mem_flags, get_order(size)); } return dma_alloc_coherent(hcd->self.sysdev, size, dma, mem_flags); } void hcd_buffer_free_pages(struct usb_hcd *hcd, size_t size, void *addr, dma_addr_t dma) { if (!addr) return; if (hcd->localmem_pool) { gen_pool_free(hcd->localmem_pool, (unsigned long)addr, size); return; } if (!hcd_uses_dma(hcd)) { free_pages((unsigned long)addr, get_order(size)); return; } dma_free_coherent(hcd->self.sysdev, size, addr, dma); } |
383 330 213 137 133 60 128 29 237 166 155 145 213 223 134 232 6 24 6 229 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 | // SPDX-License-Identifier: GPL-2.0-only #include <net/tcp.h> /* The bandwidth estimator estimates the rate at which the network * can currently deliver outbound data packets for this flow. At a high * level, it operates by taking a delivery rate sample for each ACK. * * A rate sample records the rate at which the network delivered packets * for this flow, calculated over the time interval between the transmission * of a data packet and the acknowledgment of that packet. * * Specifically, over the interval between each transmit and corresponding ACK, * the estimator generates a delivery rate sample. Typically it uses the rate * at which packets were acknowledged. However, the approach of using only the * acknowledgment rate faces a challenge under the prevalent ACK decimation or * compression: packets can temporarily appear to be delivered much quicker * than the bottleneck rate. Since it is physically impossible to do that in a * sustained fashion, when the estimator notices that the ACK rate is faster * than the transmit rate, it uses the latter: * * send_rate = #pkts_delivered/(last_snd_time - first_snd_time) * ack_rate = #pkts_delivered/(last_ack_time - first_ack_time) * bw = min(send_rate, ack_rate) * * Notice the estimator essentially estimates the goodput, not always the * network bottleneck link rate when the sending or receiving is limited by * other factors like applications or receiver window limits. The estimator * deliberately avoids using the inter-packet spacing approach because that * approach requires a large number of samples and sophisticated filtering. * * TCP flows can often be application-limited in request/response workloads. * The estimator marks a bandwidth sample as application-limited if there * was some moment during the sampled window of packets when there was no data * ready to send in the write queue. */ /* Snapshot the current delivery information in the skb, to generate * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered(). */ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); /* In general we need to start delivery rate samples from the * time we received the most recent ACK, to ensure we include * the full time the network needs to deliver all in-flight * packets. If there are no packets in flight yet, then we * know that any ACKs after now indicate that the network was * able to deliver those packets completely in the sampling * interval between now and the next ACK. * * Note that we use packets_out instead of tcp_packets_in_flight(tp) * because the latter is a guess based on RTO and loss-marking * heuristics. We don't want spurious RTOs or loss markings to cause * a spuriously small time interval, causing a spuriously high * bandwidth estimate. */ if (!tp->packets_out) { u64 tstamp_us = tcp_skb_timestamp_us(skb); tp->first_tx_mstamp = tstamp_us; tp->delivered_mstamp = tstamp_us; } TCP_SKB_CB(skb)->tx.first_tx_mstamp = tp->first_tx_mstamp; TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp; TCP_SKB_CB(skb)->tx.delivered = tp->delivered; TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce; TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0; } /* When an skb is sacked or acked, we fill in the rate sample with the (prior) * delivery information when the skb was last transmitted. * * If an ACK (s)acks multiple skbs (e.g., stretched-acks), this function is * called multiple times. We favor the information from the most recently * sent skb, i.e., the skb with the most recently sent time and the highest * sequence. */ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_skb_cb *scb = TCP_SKB_CB(skb); u64 tx_tstamp; if (!scb->tx.delivered_mstamp) return; tx_tstamp = tcp_skb_timestamp_us(skb); if (!rs->prior_delivered || tcp_skb_sent_after(tx_tstamp, tp->first_tx_mstamp, scb->end_seq, rs->last_end_seq)) { rs->prior_delivered_ce = scb->tx.delivered_ce; rs->prior_delivered = scb->tx.delivered; rs->prior_mstamp = scb->tx.delivered_mstamp; rs->is_app_limited = scb->tx.is_app_limited; rs->is_retrans = scb->sacked & TCPCB_RETRANS; rs->last_end_seq = scb->end_seq; /* Record send time of most recently ACKed packet: */ tp->first_tx_mstamp = tx_tstamp; /* Find the duration of the "send phase" of this window: */ rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp, scb->tx.first_tx_mstamp); } /* Mark off the skb delivered once it's sacked to avoid being * used again when it's cumulatively acked. For acked packets * we don't need to reset since it'll be freed soon. */ if (scb->sacked & TCPCB_SACKED_ACKED) scb->tx.delivered_mstamp = 0; } /* Update the connection delivery information and generate a rate sample. */ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, bool is_sack_reneg, struct rate_sample *rs) { struct tcp_sock *tp = tcp_sk(sk); u32 snd_us, ack_us; /* Clear app limited if bubble is acked and gone. */ if (tp->app_limited && after(tp->delivered, tp->app_limited)) tp->app_limited = 0; /* TODO: there are multiple places throughout tcp_ack() to get * current time. Refactor the code using a new "tcp_acktag_state" * to carry current time, flags, stats like "tcp_sacktag_state". */ if (delivered) tp->delivered_mstamp = tp->tcp_mstamp; rs->acked_sacked = delivered; /* freshly ACKed or SACKed */ rs->losses = lost; /* freshly marked lost */ /* Return an invalid sample if no timing information is available or * in recovery from loss with SACK reneging. Rate samples taken during * a SACK reneging event may overestimate bw by including packets that * were SACKed before the reneg. */ if (!rs->prior_mstamp || is_sack_reneg) { rs->delivered = -1; rs->interval_us = -1; return; } rs->delivered = tp->delivered - rs->prior_delivered; rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce; /* delivered_ce occupies less than 32 bits in the skb control block */ rs->delivered_ce &= TCPCB_DELIVERED_CE_MASK; /* Model sending data and receiving ACKs as separate pipeline phases * for a window. Usually the ACK phase is longer, but with ACK * compression the send phase can be longer. To be safe we use the * longer phase. */ snd_us = rs->interval_us; /* send phase */ ack_us = tcp_stamp_us_delta(tp->tcp_mstamp, rs->prior_mstamp); /* ack phase */ rs->interval_us = max(snd_us, ack_us); /* Record both segment send and ack receive intervals */ rs->snd_interval_us = snd_us; rs->rcv_interval_us = ack_us; /* Normally we expect interval_us >= min-rtt. * Note that rate may still be over-estimated when a spuriously * retransmistted skb was first (s)acked because "interval_us" * is under-estimated (up to an RTT). However continuously * measuring the delivery rate during loss recovery is crucial * for connections suffer heavy or prolonged losses. */ if (unlikely(rs->interval_us < tcp_min_rtt(tp))) { if (!rs->is_retrans) pr_debug("tcp rate: %ld %d %u %u %u\n", rs->interval_us, rs->delivered, inet_csk(sk)->icsk_ca_state, tp->rx_opt.sack_ok, tcp_min_rtt(tp)); rs->interval_us = -1; return; } /* Record the last non-app-limited or the highest app-limited bw */ if (!rs->is_app_limited || ((u64)rs->delivered * tp->rate_interval_us >= (u64)tp->rate_delivered * rs->interval_us)) { tp->rate_delivered = rs->delivered; tp->rate_interval_us = rs->interval_us; tp->rate_app_limited = rs->is_app_limited; } } /* If a gap is detected between sends, mark the socket application-limited. */ void tcp_rate_check_app_limited(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); if (/* We have less than one packet to send. */ tp->write_seq - tp->snd_nxt < tp->mss_cache && /* Nothing in sending host's qdisc queues or NIC tx queue. */ sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1) && /* We are not limited by CWND. */ tcp_packets_in_flight(tp) < tcp_snd_cwnd(tp) && /* All lost packets have been retransmitted. */ tp->lost_out <= tp->retrans_out) tp->app_limited = (tp->delivered + tcp_packets_in_flight(tp)) ? : 1; } EXPORT_SYMBOL_GPL(tcp_rate_check_app_limited); |
3305 365 365 364 356 167 357 167 363 365 147 341 31 31 31 31 22 31 22 31 31 372 148 341 357 357 357 166 168 167 22 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 | // SPDX-License-Identifier: GPL-2.0-only /* * Power Management Quality of Service (PM QoS) support base. * * Copyright (C) 2020 Intel Corporation * * Authors: * Mark Gross <mgross@linux.intel.com> * Rafael J. Wysocki <rafael.j.wysocki@intel.com> * * Provided here is an interface for specifying PM QoS dependencies. It allows * entities depending on QoS constraints to register their requests which are * aggregated as appropriate to produce effective constraints (target values) * that can be monitored by entities needing to respect them, either by polling * or through a built-in notification mechanism. * * In addition to the basic functionality, more specific interfaces for managing * global CPU latency QoS requests and frequency QoS requests are provided. */ /*#define DEBUG*/ #include <linux/pm_qos.h> #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/time.h> #include <linux/fs.h> #include <linux/device.h> #include <linux/miscdevice.h> #include <linux/string.h> #include <linux/platform_device.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/debugfs.h> #include <linux/seq_file.h> #include <linux/uaccess.h> #include <linux/export.h> #include <trace/events/power.h> /* * locking rule: all changes to constraints or notifiers lists * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock * held, taken with _irqsave. One lock to rule them all */ static DEFINE_SPINLOCK(pm_qos_lock); /** * pm_qos_read_value - Return the current effective constraint value. * @c: List of PM QoS constraint requests. */ s32 pm_qos_read_value(struct pm_qos_constraints *c) { return READ_ONCE(c->target_value); } static int pm_qos_get_value(struct pm_qos_constraints *c) { if (plist_head_empty(&c->list)) return c->no_constraint_value; switch (c->type) { case PM_QOS_MIN: return plist_first(&c->list)->prio; case PM_QOS_MAX: return plist_last(&c->list)->prio; default: WARN(1, "Unknown PM QoS type in %s\n", __func__); return PM_QOS_DEFAULT_VALUE; } } static void pm_qos_set_value(struct pm_qos_constraints *c, s32 value) { WRITE_ONCE(c->target_value, value); } /** * pm_qos_update_target - Update a list of PM QoS constraint requests. * @c: List of PM QoS requests. * @node: Target list entry. * @action: Action to carry out (add, update or remove). * @value: New request value for the target list entry. * * Update the given list of PM QoS constraint requests, @c, by carrying an * @action involving the @node list entry and @value on it. * * The recognized values of @action are PM_QOS_ADD_REQ (store @value in @node * and add it to the list), PM_QOS_UPDATE_REQ (remove @node from the list, store * @value in it and add it to the list again), and PM_QOS_REMOVE_REQ (remove * @node from the list, ignore @value). * * Return: 1 if the aggregate constraint value has changed, 0 otherwise. */ int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, enum pm_qos_req_action action, int value) { int prev_value, curr_value, new_value; unsigned long flags; spin_lock_irqsave(&pm_qos_lock, flags); prev_value = pm_qos_get_value(c); if (value == PM_QOS_DEFAULT_VALUE) new_value = c->default_value; else new_value = value; switch (action) { case PM_QOS_REMOVE_REQ: plist_del(node, &c->list); break; case PM_QOS_UPDATE_REQ: /* * To change the list, atomically remove, reinit with new value * and add, then see if the aggregate has changed. */ plist_del(node, &c->list); fallthrough; case PM_QOS_ADD_REQ: plist_node_init(node, new_value); plist_add(node, &c->list); break; default: /* no action */ ; } curr_value = pm_qos_get_value(c); pm_qos_set_value(c, curr_value); spin_unlock_irqrestore(&pm_qos_lock, flags); trace_pm_qos_update_target(action, prev_value, curr_value); if (prev_value == curr_value) return 0; if (c->notifiers) blocking_notifier_call_chain(c->notifiers, curr_value, NULL); return 1; } /** * pm_qos_flags_remove_req - Remove device PM QoS flags request. * @pqf: Device PM QoS flags set to remove the request from. * @req: Request to remove from the set. */ static void pm_qos_flags_remove_req(struct pm_qos_flags *pqf, struct pm_qos_flags_request *req) { s32 val = 0; list_del(&req->node); list_for_each_entry(req, &pqf->list, node) val |= req->flags; pqf->effective_flags = val; } /** * pm_qos_update_flags - Update a set of PM QoS flags. * @pqf: Set of PM QoS flags to update. * @req: Request to add to the set, to modify, or to remove from the set. * @action: Action to take on the set. * @val: Value of the request to add or modify. * * Return: 1 if the aggregate constraint value has changed, 0 otherwise. */ bool pm_qos_update_flags(struct pm_qos_flags *pqf, struct pm_qos_flags_request *req, enum pm_qos_req_action action, s32 val) { unsigned long irqflags; s32 prev_value, curr_value; spin_lock_irqsave(&pm_qos_lock, irqflags); prev_value = list_empty(&pqf->list) ? 0 : pqf->effective_flags; switch (action) { case PM_QOS_REMOVE_REQ: pm_qos_flags_remove_req(pqf, req); break; case PM_QOS_UPDATE_REQ: pm_qos_flags_remove_req(pqf, req); fallthrough; case PM_QOS_ADD_REQ: req->flags = val; INIT_LIST_HEAD(&req->node); list_add_tail(&req->node, &pqf->list); pqf->effective_flags |= val; break; default: /* no action */ ; } curr_value = list_empty(&pqf->list) ? 0 : pqf->effective_flags; spin_unlock_irqrestore(&pm_qos_lock, irqflags); trace_pm_qos_update_flags(action, prev_value, curr_value); return prev_value != curr_value; } #ifdef CONFIG_CPU_IDLE /* Definitions related to the CPU latency QoS. */ static struct pm_qos_constraints cpu_latency_constraints = { .list = PLIST_HEAD_INIT(cpu_latency_constraints.list), .target_value = PM_QOS_CPU_LATENCY_DEFAULT_VALUE, .default_value = PM_QOS_CPU_LATENCY_DEFAULT_VALUE, .no_constraint_value = PM_QOS_CPU_LATENCY_DEFAULT_VALUE, .type = PM_QOS_MIN, }; static inline bool cpu_latency_qos_value_invalid(s32 value) { return value < 0 && value != PM_QOS_DEFAULT_VALUE; } /** * cpu_latency_qos_limit - Return current system-wide CPU latency QoS limit. */ s32 cpu_latency_qos_limit(void) { return pm_qos_read_value(&cpu_latency_constraints); } /** * cpu_latency_qos_request_active - Check the given PM QoS request. * @req: PM QoS request to check. * * Return: 'true' if @req has been added to the CPU latency QoS list, 'false' * otherwise. */ bool cpu_latency_qos_request_active(struct pm_qos_request *req) { return req->qos == &cpu_latency_constraints; } EXPORT_SYMBOL_GPL(cpu_latency_qos_request_active); static void cpu_latency_qos_apply(struct pm_qos_request *req, enum pm_qos_req_action action, s32 value) { int ret = pm_qos_update_target(req->qos, &req->node, action, value); if (ret > 0) wake_up_all_idle_cpus(); } /** * cpu_latency_qos_add_request - Add new CPU latency QoS request. * @req: Pointer to a preallocated handle. * @value: Requested constraint value. * * Use @value to initialize the request handle pointed to by @req, insert it as * a new entry to the CPU latency QoS list and recompute the effective QoS * constraint for that list. * * Callers need to save the handle for later use in updates and removal of the * QoS request represented by it. */ void cpu_latency_qos_add_request(struct pm_qos_request *req, s32 value) { if (!req || cpu_latency_qos_value_invalid(value)) return; if (cpu_latency_qos_request_active(req)) { WARN(1, KERN_ERR "%s called for already added request\n", __func__); return; } trace_pm_qos_add_request(value); req->qos = &cpu_latency_constraints; cpu_latency_qos_apply(req, PM_QOS_ADD_REQ, value); } EXPORT_SYMBOL_GPL(cpu_latency_qos_add_request); /** * cpu_latency_qos_update_request - Modify existing CPU latency QoS request. * @req : QoS request to update. * @new_value: New requested constraint value. * * Use @new_value to update the QoS request represented by @req in the CPU * latency QoS list along with updating the effective constraint value for that * list. */ void cpu_latency_qos_update_request(struct pm_qos_request *req, s32 new_value) { if (!req || cpu_latency_qos_value_invalid(new_value)) return; if (!cpu_latency_qos_request_active(req)) { WARN(1, KERN_ERR "%s called for unknown object\n", __func__); return; } trace_pm_qos_update_request(new_value); if (new_value == req->node.prio) return; cpu_latency_qos_apply(req, PM_QOS_UPDATE_REQ, new_value); } EXPORT_SYMBOL_GPL(cpu_latency_qos_update_request); /** * cpu_latency_qos_remove_request - Remove existing CPU latency QoS request. * @req: QoS request to remove. * * Remove the CPU latency QoS request represented by @req from the CPU latency * QoS list along with updating the effective constraint value for that list. */ void cpu_latency_qos_remove_request(struct pm_qos_request *req) { if (!req) return; if (!cpu_latency_qos_request_active(req)) { WARN(1, KERN_ERR "%s called for unknown object\n", __func__); return; } trace_pm_qos_remove_request(PM_QOS_DEFAULT_VALUE); cpu_latency_qos_apply(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); memset(req, 0, sizeof(*req)); } EXPORT_SYMBOL_GPL(cpu_latency_qos_remove_request); /* User space interface to the CPU latency QoS via misc device. */ static int cpu_latency_qos_open(struct inode *inode, struct file *filp) { struct pm_qos_request *req; req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) return -ENOMEM; cpu_latency_qos_add_request(req, PM_QOS_DEFAULT_VALUE); filp->private_data = req; return 0; } static int cpu_latency_qos_release(struct inode *inode, struct file *filp) { struct pm_qos_request *req = filp->private_data; filp->private_data = NULL; cpu_latency_qos_remove_request(req); kfree(req); return 0; } static ssize_t cpu_latency_qos_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) { struct pm_qos_request *req = filp->private_data; unsigned long flags; s32 value; if (!req || !cpu_latency_qos_request_active(req)) return -EINVAL; spin_lock_irqsave(&pm_qos_lock, flags); value = pm_qos_get_value(&cpu_latency_constraints); spin_unlock_irqrestore(&pm_qos_lock, flags); return simple_read_from_buffer(buf, count, f_pos, &value, sizeof(s32)); } static ssize_t cpu_latency_qos_write(struct file *filp, const char __user *buf, size_t count, loff_t *f_pos) { s32 value; if (count == sizeof(s32)) { if (copy_from_user(&value, buf, sizeof(s32))) return -EFAULT; } else { int ret; ret = kstrtos32_from_user(buf, count, 16, &value); if (ret) return ret; } cpu_latency_qos_update_request(filp->private_data, value); return count; } static const struct file_operations cpu_latency_qos_fops = { .write = cpu_latency_qos_write, .read = cpu_latency_qos_read, .open = cpu_latency_qos_open, .release = cpu_latency_qos_release, .llseek = noop_llseek, }; static struct miscdevice cpu_latency_qos_miscdev = { .minor = MISC_DYNAMIC_MINOR, .name = "cpu_dma_latency", .fops = &cpu_latency_qos_fops, }; static int __init cpu_latency_qos_init(void) { int ret; ret = misc_register(&cpu_latency_qos_miscdev); if (ret < 0) pr_err("%s: %s setup failed\n", __func__, cpu_latency_qos_miscdev.name); return ret; } late_initcall(cpu_latency_qos_init); #endif /* CONFIG_CPU_IDLE */ /* Definitions related to the frequency QoS below. */ static inline bool freq_qos_value_invalid(s32 value) { return value < 0 && value != PM_QOS_DEFAULT_VALUE; } /** * freq_constraints_init - Initialize frequency QoS constraints. * @qos: Frequency QoS constraints to initialize. */ void freq_constraints_init(struct freq_constraints *qos) { struct pm_qos_constraints *c; c = &qos->min_freq; plist_head_init(&c->list); c->target_value = FREQ_QOS_MIN_DEFAULT_VALUE; c->default_value = FREQ_QOS_MIN_DEFAULT_VALUE; c->no_constraint_value = FREQ_QOS_MIN_DEFAULT_VALUE; c->type = PM_QOS_MAX; c->notifiers = &qos->min_freq_notifiers; BLOCKING_INIT_NOTIFIER_HEAD(c->notifiers); c = &qos->max_freq; plist_head_init(&c->list); c->target_value = FREQ_QOS_MAX_DEFAULT_VALUE; c->default_value = FREQ_QOS_MAX_DEFAULT_VALUE; c->no_constraint_value = FREQ_QOS_MAX_DEFAULT_VALUE; c->type = PM_QOS_MIN; c->notifiers = &qos->max_freq_notifiers; BLOCKING_INIT_NOTIFIER_HEAD(c->notifiers); } /** * freq_qos_read_value - Get frequency QoS constraint for a given list. * @qos: Constraints to evaluate. * @type: QoS request type. */ s32 freq_qos_read_value(struct freq_constraints *qos, enum freq_qos_req_type type) { s32 ret; switch (type) { case FREQ_QOS_MIN: ret = IS_ERR_OR_NULL(qos) ? FREQ_QOS_MIN_DEFAULT_VALUE : pm_qos_read_value(&qos->min_freq); break; case FREQ_QOS_MAX: ret = IS_ERR_OR_NULL(qos) ? FREQ_QOS_MAX_DEFAULT_VALUE : pm_qos_read_value(&qos->max_freq); break; default: WARN_ON(1); ret = 0; } return ret; } /** * freq_qos_apply - Add/modify/remove frequency QoS request. * @req: Constraint request to apply. * @action: Action to perform (add/update/remove). * @value: Value to assign to the QoS request. * * This is only meant to be called from inside pm_qos, not drivers. */ int freq_qos_apply(struct freq_qos_request *req, enum pm_qos_req_action action, s32 value) { int ret; switch(req->type) { case FREQ_QOS_MIN: ret = pm_qos_update_target(&req->qos->min_freq, &req->pnode, action, value); break; case FREQ_QOS_MAX: ret = pm_qos_update_target(&req->qos->max_freq, &req->pnode, action, value); break; default: ret = -EINVAL; } return ret; } /** * freq_qos_add_request - Insert new frequency QoS request into a given list. * @qos: Constraints to update. * @req: Preallocated request object. * @type: Request type. * @value: Request value. * * Insert a new entry into the @qos list of requests, recompute the effective * QoS constraint value for that list and initialize the @req object. The * caller needs to save that object for later use in updates and removal. * * Return 1 if the effective constraint value has changed, 0 if the effective * constraint value has not changed, or a negative error code on failures. */ int freq_qos_add_request(struct freq_constraints *qos, struct freq_qos_request *req, enum freq_qos_req_type type, s32 value) { int ret; if (IS_ERR_OR_NULL(qos) || !req || freq_qos_value_invalid(value)) return -EINVAL; if (WARN(freq_qos_request_active(req), "%s() called for active request\n", __func__)) return -EINVAL; req->qos = qos; req->type = type; ret = freq_qos_apply(req, PM_QOS_ADD_REQ, value); if (ret < 0) { req->qos = NULL; req->type = 0; } return ret; } EXPORT_SYMBOL_GPL(freq_qos_add_request); /** * freq_qos_update_request - Modify existing frequency QoS request. * @req: Request to modify. * @new_value: New request value. * * Update an existing frequency QoS request along with the effective constraint * value for the list of requests it belongs to. * * Return 1 if the effective constraint value has changed, 0 if the effective * constraint value has not changed, or a negative error code on failures. */ int freq_qos_update_request(struct freq_qos_request *req, s32 new_value) { if (!req || freq_qos_value_invalid(new_value)) return -EINVAL; if (WARN(!freq_qos_request_active(req), "%s() called for unknown object\n", __func__)) return -EINVAL; if (req->pnode.prio == new_value) return 0; return freq_qos_apply(req, PM_QOS_UPDATE_REQ, new_value); } EXPORT_SYMBOL_GPL(freq_qos_update_request); /** * freq_qos_remove_request - Remove frequency QoS request from its list. * @req: Request to remove. * * Remove the given frequency QoS request from the list of constraints it * belongs to and recompute the effective constraint value for that list. * * Return 1 if the effective constraint value has changed, 0 if the effective * constraint value has not changed, or a negative error code on failures. */ int freq_qos_remove_request(struct freq_qos_request *req) { int ret; if (!req) return -EINVAL; if (WARN(!freq_qos_request_active(req), "%s() called for unknown object\n", __func__)) return -EINVAL; ret = freq_qos_apply(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); req->qos = NULL; req->type = 0; return ret; } EXPORT_SYMBOL_GPL(freq_qos_remove_request); /** * freq_qos_add_notifier - Add frequency QoS change notifier. * @qos: List of requests to add the notifier to. * @type: Request type. * @notifier: Notifier block to add. */ int freq_qos_add_notifier(struct freq_constraints *qos, enum freq_qos_req_type type, struct notifier_block *notifier) { int ret; if (IS_ERR_OR_NULL(qos) || !notifier) return -EINVAL; switch (type) { case FREQ_QOS_MIN: ret = blocking_notifier_chain_register(qos->min_freq.notifiers, notifier); break; case FREQ_QOS_MAX: ret = blocking_notifier_chain_register(qos->max_freq.notifiers, notifier); break; default: WARN_ON(1); ret = -EINVAL; } return ret; } EXPORT_SYMBOL_GPL(freq_qos_add_notifier); /** * freq_qos_remove_notifier - Remove frequency QoS change notifier. * @qos: List of requests to remove the notifier from. * @type: Request type. * @notifier: Notifier block to remove. */ int freq_qos_remove_notifier(struct freq_constraints *qos, enum freq_qos_req_type type, struct notifier_block *notifier) { int ret; if (IS_ERR_OR_NULL(qos) || !notifier) return -EINVAL; switch (type) { case FREQ_QOS_MIN: ret = blocking_notifier_chain_unregister(qos->min_freq.notifiers, notifier); break; case FREQ_QOS_MAX: ret = blocking_notifier_chain_unregister(qos->max_freq.notifiers, notifier); break; default: WARN_ON(1); ret = -EINVAL; } return ret; } EXPORT_SYMBOL_GPL(freq_qos_remove_notifier); |
7 4 2 1 3 3 12 4 8 3 2 9 2 7 5 5 7 7 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 | // SPDX-License-Identifier: GPL-2.0-or-later /* Public-key operation keyctls * * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/slab.h> #include <linux/err.h> #include <linux/key.h> #include <linux/keyctl.h> #include <linux/parser.h> #include <linux/uaccess.h> #include <keys/user-type.h> #include "internal.h" static void keyctl_pkey_params_free(struct kernel_pkey_params *params) { kfree(params->info); key_put(params->key); } enum { Opt_err, Opt_enc, /* "enc=<encoding>" eg. "enc=oaep" */ Opt_hash, /* "hash=<digest-name>" eg. "hash=sha1" */ }; static const match_table_t param_keys = { { Opt_enc, "enc=%s" }, { Opt_hash, "hash=%s" }, { Opt_err, NULL } }; /* * Parse the information string which consists of key=val pairs. */ static int keyctl_pkey_params_parse(struct kernel_pkey_params *params) { unsigned long token_mask = 0; substring_t args[MAX_OPT_ARGS]; char *c = params->info, *p, *q; int token; while ((p = strsep(&c, " \t"))) { if (*p == '\0' || *p == ' ' || *p == '\t') continue; token = match_token(p, param_keys, args); if (token == Opt_err) return -EINVAL; if (__test_and_set_bit(token, &token_mask)) return -EINVAL; q = args[0].from; if (!q[0]) return -EINVAL; switch (token) { case Opt_enc: params->encoding = q; break; case Opt_hash: params->hash_algo = q; break; default: return -EINVAL; } } return 0; } /* * Interpret parameters. Callers must always call the free function * on params, even if an error is returned. */ static int keyctl_pkey_params_get(key_serial_t id, const char __user *_info, struct kernel_pkey_params *params) { key_ref_t key_ref; void *p; int ret; memset(params, 0, sizeof(*params)); params->encoding = "raw"; p = strndup_user(_info, PAGE_SIZE); if (IS_ERR(p)) return PTR_ERR(p); params->info = p; ret = keyctl_pkey_params_parse(params); if (ret < 0) return ret; key_ref = lookup_user_key(id, 0, KEY_NEED_SEARCH); if (IS_ERR(key_ref)) return PTR_ERR(key_ref); params->key = key_ref_to_ptr(key_ref); if (!params->key->type->asym_query) return -EOPNOTSUPP; return 0; } /* * Get parameters from userspace. Callers must always call the free function * on params, even if an error is returned. */ static int keyctl_pkey_params_get_2(const struct keyctl_pkey_params __user *_params, const char __user *_info, int op, struct kernel_pkey_params *params) { struct keyctl_pkey_params uparams; struct kernel_pkey_query info; int ret; memset(params, 0, sizeof(*params)); params->encoding = "raw"; if (copy_from_user(&uparams, _params, sizeof(uparams)) != 0) return -EFAULT; ret = keyctl_pkey_params_get(uparams.key_id, _info, params); if (ret < 0) return ret; ret = params->key->type->asym_query(params, &info); if (ret < 0) return ret; switch (op) { case KEYCTL_PKEY_ENCRYPT: if (uparams.in_len > info.max_dec_size || uparams.out_len > info.max_enc_size) return -EINVAL; break; case KEYCTL_PKEY_DECRYPT: if (uparams.in_len > info.max_enc_size || uparams.out_len > info.max_dec_size) return -EINVAL; break; case KEYCTL_PKEY_SIGN: if (uparams.in_len > info.max_data_size || uparams.out_len > info.max_sig_size) return -EINVAL; break; case KEYCTL_PKEY_VERIFY: if (uparams.in_len > info.max_data_size || uparams.in2_len > info.max_sig_size) return -EINVAL; break; default: BUG(); } params->in_len = uparams.in_len; params->out_len = uparams.out_len; /* Note: same as in2_len */ return 0; } /* * Query information about an asymmetric key. */ long keyctl_pkey_query(key_serial_t id, const char __user *_info, struct keyctl_pkey_query __user *_res) { struct kernel_pkey_params params; struct kernel_pkey_query res; long ret; ret = keyctl_pkey_params_get(id, _info, ¶ms); if (ret < 0) goto error; ret = params.key->type->asym_query(¶ms, &res); if (ret < 0) goto error; ret = -EFAULT; if (copy_to_user(_res, &res, sizeof(res)) == 0 && clear_user(_res->__spare, sizeof(_res->__spare)) == 0) ret = 0; error: keyctl_pkey_params_free(¶ms); return ret; } /* * Encrypt/decrypt/sign * * Encrypt data, decrypt data or sign data using a public key. * * _info is a string of supplementary information in key=val format. For * instance, it might contain: * * "enc=pkcs1 hash=sha256" * * where enc= specifies the encoding and hash= selects the OID to go in that * particular encoding if required. If enc= isn't supplied, it's assumed that * the caller is supplying raw values. * * If successful, the amount of data written into the output buffer is * returned. */ long keyctl_pkey_e_d_s(int op, const struct keyctl_pkey_params __user *_params, const char __user *_info, const void __user *_in, void __user *_out) { struct kernel_pkey_params params; void *in, *out; long ret; ret = keyctl_pkey_params_get_2(_params, _info, op, ¶ms); if (ret < 0) goto error_params; ret = -EOPNOTSUPP; if (!params.key->type->asym_eds_op) goto error_params; switch (op) { case KEYCTL_PKEY_ENCRYPT: params.op = kernel_pkey_encrypt; break; case KEYCTL_PKEY_DECRYPT: params.op = kernel_pkey_decrypt; break; case KEYCTL_PKEY_SIGN: params.op = kernel_pkey_sign; break; default: BUG(); } in = memdup_user(_in, params.in_len); if (IS_ERR(in)) { ret = PTR_ERR(in); goto error_params; } ret = -ENOMEM; out = kmalloc(params.out_len, GFP_KERNEL); if (!out) goto error_in; ret = params.key->type->asym_eds_op(¶ms, in, out); if (ret < 0) goto error_out; if (copy_to_user(_out, out, ret) != 0) ret = -EFAULT; error_out: kfree(out); error_in: kfree(in); error_params: keyctl_pkey_params_free(¶ms); return ret; } /* * Verify a signature. * * Verify a public key signature using the given key, or if not given, search * for a matching key. * * _info is a string of supplementary information in key=val format. For * instance, it might contain: * * "enc=pkcs1 hash=sha256" * * where enc= specifies the signature blob encoding and hash= selects the OID * to go in that particular encoding. If enc= isn't supplied, it's assumed * that the caller is supplying raw values. * * If successful, 0 is returned. */ long keyctl_pkey_verify(const struct keyctl_pkey_params __user *_params, const char __user *_info, const void __user *_in, const void __user *_in2) { struct kernel_pkey_params params; void *in, *in2; long ret; ret = keyctl_pkey_params_get_2(_params, _info, KEYCTL_PKEY_VERIFY, ¶ms); if (ret < 0) goto error_params; ret = -EOPNOTSUPP; if (!params.key->type->asym_verify_signature) goto error_params; in = memdup_user(_in, params.in_len); if (IS_ERR(in)) { ret = PTR_ERR(in); goto error_params; } in2 = memdup_user(_in2, params.in2_len); if (IS_ERR(in2)) { ret = PTR_ERR(in2); goto error_in; } params.op = kernel_pkey_verify; ret = params.key->type->asym_verify_signature(¶ms, in, in2); kfree(in2); error_in: kfree(in); error_params: keyctl_pkey_params_free(¶ms); return ret; } |
15 16 13 13 13 13 13 13 14 13 13 13 13 13 13 13 13 14 14 13 13 14 13 13 14 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 | /* * Created: Sun Dec 21 13:08:50 2008 by bgamari@gmail.com * * Copyright 2008 Ben Gamari <bgamari@gmail.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include <linux/debugfs.h> #include <linux/export.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <drm/drm_atomic.h> #include <drm/drm_auth.h> #include <drm/drm_bridge.h> #include <drm/drm_debugfs.h> #include <drm/drm_device.h> #include <drm/drm_drv.h> #include <drm/drm_edid.h> #include <drm/drm_file.h> #include <drm/drm_gem.h> #include <drm/drm_managed.h> #include <drm/drm_gpuvm.h> #include "drm_crtc_internal.h" #include "drm_internal.h" /*************************************************** * Initialization, etc. **************************************************/ static int drm_name_info(struct seq_file *m, void *data) { struct drm_debugfs_entry *entry = m->private; struct drm_device *dev = entry->dev; struct drm_master *master; mutex_lock(&dev->master_mutex); master = dev->master; seq_printf(m, "%s", dev->driver->name); if (dev->dev) seq_printf(m, " dev=%s", dev_name(dev->dev)); if (master && master->unique) seq_printf(m, " master=%s", master->unique); if (dev->unique) seq_printf(m, " unique=%s", dev->unique); seq_printf(m, "\n"); mutex_unlock(&dev->master_mutex); return 0; } static int drm_clients_info(struct seq_file *m, void *data) { struct drm_debugfs_entry *entry = m->private; struct drm_device *dev = entry->dev; struct drm_file *priv; kuid_t uid; seq_printf(m, "%20s %5s %3s master a %5s %10s %*s\n", "command", "tgid", "dev", "uid", "magic", DRM_CLIENT_NAME_MAX_LEN, "name"); /* dev->filelist is sorted youngest first, but we want to present * oldest first (i.e. kernel, servers, clients), so walk backwardss. */ mutex_lock(&dev->filelist_mutex); list_for_each_entry_reverse(priv, &dev->filelist, lhead) { bool is_current_master = drm_is_current_master(priv); struct task_struct *task; struct pid *pid; mutex_lock(&priv->client_name_lock); rcu_read_lock(); /* Locks priv->pid and pid_task()->comm! */ pid = rcu_dereference(priv->pid); task = pid_task(pid, PIDTYPE_TGID); uid = task ? __task_cred(task)->euid : GLOBAL_ROOT_UID; seq_printf(m, "%20s %5d %3d %c %c %5d %10u %*s\n", task ? task->comm : "<unknown>", pid_vnr(pid), priv->minor->index, is_current_master ? 'y' : 'n', priv->authenticated ? 'y' : 'n', from_kuid_munged(seq_user_ns(m), uid), priv->magic, DRM_CLIENT_NAME_MAX_LEN, priv->client_name ? priv->client_name : "<unset>"); rcu_read_unlock(); mutex_unlock(&priv->client_name_lock); } mutex_unlock(&dev->filelist_mutex); return 0; } static int drm_gem_one_name_info(int id, void *ptr, void *data) { struct drm_gem_object *obj = ptr; struct seq_file *m = data; seq_printf(m, "%6d %8zd %7d %8d\n", obj->name, obj->size, obj->handle_count, kref_read(&obj->refcount)); return 0; } static int drm_gem_name_info(struct seq_file *m, void *data) { struct drm_debugfs_entry *entry = m->private; struct drm_device *dev = entry->dev; seq_printf(m, " name size handles refcount\n"); mutex_lock(&dev->object_name_lock); idr_for_each(&dev->object_name_idr, drm_gem_one_name_info, m); mutex_unlock(&dev->object_name_lock); return 0; } static const struct drm_debugfs_info drm_debugfs_list[] = { {"name", drm_name_info, 0}, {"clients", drm_clients_info, 0}, {"gem_names", drm_gem_name_info, DRIVER_GEM}, }; #define DRM_DEBUGFS_ENTRIES ARRAY_SIZE(drm_debugfs_list) static int drm_debugfs_open(struct inode *inode, struct file *file) { struct drm_info_node *node = inode->i_private; if (!device_is_registered(node->minor->kdev)) return -ENODEV; return single_open(file, node->info_ent->show, node); } static int drm_debugfs_entry_open(struct inode *inode, struct file *file) { struct drm_debugfs_entry *entry = inode->i_private; struct drm_debugfs_info *node = &entry->file; struct drm_minor *minor = entry->dev->primary ?: entry->dev->accel; if (!device_is_registered(minor->kdev)) return -ENODEV; return single_open(file, node->show, entry); } static const struct file_operations drm_debugfs_entry_fops = { .owner = THIS_MODULE, .open = drm_debugfs_entry_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; static const struct file_operations drm_debugfs_fops = { .owner = THIS_MODULE, .open = drm_debugfs_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; /** * drm_debugfs_gpuva_info - dump the given DRM GPU VA space * @m: pointer to the &seq_file to write * @gpuvm: the &drm_gpuvm representing the GPU VA space * * Dumps the GPU VA mappings of a given DRM GPU VA manager. * * For each DRM GPU VA space drivers should call this function from their * &drm_info_list's show callback. * * Returns: 0 on success, -ENODEV if the &gpuvm is not initialized */ int drm_debugfs_gpuva_info(struct seq_file *m, struct drm_gpuvm *gpuvm) { struct drm_gpuva *va, *kva = &gpuvm->kernel_alloc_node; if (!gpuvm->name) return -ENODEV; seq_printf(m, "DRM GPU VA space (%s) [0x%016llx;0x%016llx]\n", gpuvm->name, gpuvm->mm_start, gpuvm->mm_start + gpuvm->mm_range); seq_printf(m, "Kernel reserved node [0x%016llx;0x%016llx]\n", kva->va.addr, kva->va.addr + kva->va.range); seq_puts(m, "\n"); seq_puts(m, " VAs | start | range | end | object | object offset\n"); seq_puts(m, "-------------------------------------------------------------------------------------------------------------\n"); drm_gpuvm_for_each_va(va, gpuvm) { if (unlikely(va == kva)) continue; seq_printf(m, " | 0x%016llx | 0x%016llx | 0x%016llx | 0x%016llx | 0x%016llx\n", va->va.addr, va->va.range, va->va.addr + va->va.range, (u64)(uintptr_t)va->gem.obj, va->gem.offset); } return 0; } EXPORT_SYMBOL(drm_debugfs_gpuva_info); /** * drm_debugfs_create_files - Initialize a given set of debugfs files for DRM * minor * @files: The array of files to create * @count: The number of files given * @root: DRI debugfs dir entry. * @minor: device minor number * * Create a given set of debugfs files represented by an array of * &struct drm_info_list in the given root directory. These files will be removed * automatically on drm_debugfs_dev_fini(). */ void drm_debugfs_create_files(const struct drm_info_list *files, int count, struct dentry *root, struct drm_minor *minor) { struct drm_device *dev = minor->dev; struct drm_info_node *tmp; int i; for (i = 0; i < count; i++) { u32 features = files[i].driver_features; if (features && !drm_core_check_all_features(dev, features)) continue; tmp = drmm_kzalloc(dev, sizeof(*tmp), GFP_KERNEL); if (tmp == NULL) continue; tmp->minor = minor; tmp->dent = debugfs_create_file(files[i].name, 0444, root, tmp, &drm_debugfs_fops); tmp->info_ent = &files[i]; } } EXPORT_SYMBOL(drm_debugfs_create_files); int drm_debugfs_remove_files(const struct drm_info_list *files, int count, struct dentry *root, struct drm_minor *minor) { int i; for (i = 0; i < count; i++) { struct dentry *dent = debugfs_lookup(files[i].name, root); if (!dent) continue; drmm_kfree(minor->dev, d_inode(dent)->i_private); debugfs_remove(dent); } return 0; } EXPORT_SYMBOL(drm_debugfs_remove_files); /** * drm_debugfs_dev_init - create debugfs directory for the device * @dev: the device which we want to create the directory for * @root: the parent directory depending on the device type * * Creates the debugfs directory for the device under the given root directory. */ void drm_debugfs_dev_init(struct drm_device *dev, struct dentry *root) { dev->debugfs_root = debugfs_create_dir(dev->unique, root); } /** * drm_debugfs_dev_fini - cleanup debugfs directory * @dev: the device to cleanup the debugfs stuff * * Remove the debugfs directory, might be called multiple times. */ void drm_debugfs_dev_fini(struct drm_device *dev) { debugfs_remove_recursive(dev->debugfs_root); dev->debugfs_root = NULL; } void drm_debugfs_dev_register(struct drm_device *dev) { drm_debugfs_add_files(dev, drm_debugfs_list, DRM_DEBUGFS_ENTRIES); if (drm_core_check_feature(dev, DRIVER_MODESET)) { drm_framebuffer_debugfs_init(dev); drm_client_debugfs_init(dev); } if (drm_drv_uses_atomic_modeset(dev)) drm_atomic_debugfs_init(dev); } int drm_debugfs_register(struct drm_minor *minor, int minor_id, struct dentry *root) { struct drm_device *dev = minor->dev; char name[64]; sprintf(name, "%d", minor_id); minor->debugfs_symlink = debugfs_create_symlink(name, root, dev->unique); /* TODO: Only for compatibility with drivers */ minor->debugfs_root = dev->debugfs_root; if (dev->driver->debugfs_init && dev->render != minor) dev->driver->debugfs_init(minor); return 0; } void drm_debugfs_unregister(struct drm_minor *minor) { debugfs_remove(minor->debugfs_symlink); minor->debugfs_symlink = NULL; } /** * drm_debugfs_add_file - Add a given file to the DRM device debugfs file list * @dev: drm device for the ioctl * @name: debugfs file name * @show: show callback * @data: driver-private data, should not be device-specific * * Add a given file entry to the DRM device debugfs file list to be created on * drm_debugfs_init. */ void drm_debugfs_add_file(struct drm_device *dev, const char *name, int (*show)(struct seq_file*, void*), void *data) { struct drm_debugfs_entry *entry = drmm_kzalloc(dev, sizeof(*entry), GFP_KERNEL); if (!entry) return; entry->file.name = name; entry->file.show = show; entry->file.data = data; entry->dev = dev; debugfs_create_file(name, 0444, dev->debugfs_root, entry, &drm_debugfs_entry_fops); } EXPORT_SYMBOL(drm_debugfs_add_file); /** * drm_debugfs_add_files - Add an array of files to the DRM device debugfs file list * @dev: drm device for the ioctl * @files: The array of files to create * @count: The number of files given * * Add a given set of debugfs files represented by an array of * &struct drm_debugfs_info in the DRM device debugfs file list. */ void drm_debugfs_add_files(struct drm_device *dev, const struct drm_debugfs_info *files, int count) { int i; for (i = 0; i < count; i++) drm_debugfs_add_file(dev, files[i].name, files[i].show, files[i].data); } EXPORT_SYMBOL(drm_debugfs_add_files); static int connector_show(struct seq_file *m, void *data) { struct drm_connector *connector = m->private; seq_printf(m, "%s\n", drm_get_connector_force_name(connector->force)); return 0; } static int connector_open(struct inode *inode, struct file *file) { struct drm_connector *dev = inode->i_private; return single_open(file, connector_show, dev); } static ssize_t connector_write(struct file *file, const char __user *ubuf, size_t len, loff_t *offp) { struct seq_file *m = file->private_data; struct drm_connector *connector = m->private; char buf[12]; if (len > sizeof(buf) - 1) return -EINVAL; if (copy_from_user(buf, ubuf, len)) return -EFAULT; buf[len] = '\0'; if (sysfs_streq(buf, "on")) connector->force = DRM_FORCE_ON; else if (sysfs_streq(buf, "digital")) connector->force = DRM_FORCE_ON_DIGITAL; else if (sysfs_streq(buf, "off")) connector->force = DRM_FORCE_OFF; else if (sysfs_streq(buf, "unspecified")) connector->force = DRM_FORCE_UNSPECIFIED; else return -EINVAL; return len; } static int edid_show(struct seq_file *m, void *data) { return drm_edid_override_show(m->private, m); } static int edid_open(struct inode *inode, struct file *file) { struct drm_connector *dev = inode->i_private; return single_open(file, edid_show, dev); } static ssize_t edid_write(struct file *file, const char __user *ubuf, size_t len, loff_t *offp) { struct seq_file *m = file->private_data; struct drm_connector *connector = m->private; char *buf; int ret; buf = memdup_user(ubuf, len); if (IS_ERR(buf)) return PTR_ERR(buf); if (len == 5 && !strncmp(buf, "reset", 5)) ret = drm_edid_override_reset(connector); else ret = drm_edid_override_set(connector, buf, len); kfree(buf); return ret ? ret : len; } /* * Returns the min and max vrr vfreq through the connector's debugfs file. * Example usage: cat /sys/kernel/debug/dri/0/DP-1/vrr_range */ static int vrr_range_show(struct seq_file *m, void *data) { struct drm_connector *connector = m->private; if (connector->status != connector_status_connected) return -ENODEV; seq_printf(m, "Min: %u\n", connector->display_info.monitor_range.min_vfreq); seq_printf(m, "Max: %u\n", connector->display_info.monitor_range.max_vfreq); return 0; } DEFINE_SHOW_ATTRIBUTE(vrr_range); /* * Returns Connector's max supported bpc through debugfs file. * Example usage: cat /sys/kernel/debug/dri/0/DP-1/output_bpc */ static int output_bpc_show(struct seq_file *m, void *data) { struct drm_connector *connector = m->private; if (connector->status != connector_status_connected) return -ENODEV; seq_printf(m, "Maximum: %u\n", connector->display_info.bpc); return 0; } DEFINE_SHOW_ATTRIBUTE(output_bpc); static const struct file_operations drm_edid_fops = { .owner = THIS_MODULE, .open = edid_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, .write = edid_write }; static const struct file_operations drm_connector_fops = { .owner = THIS_MODULE, .open = connector_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, .write = connector_write }; static ssize_t audio_infoframe_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { struct drm_connector_hdmi_infoframe *infoframe; struct drm_connector *connector; union hdmi_infoframe *frame; u8 buf[HDMI_INFOFRAME_SIZE(AUDIO)]; ssize_t len = 0; connector = filp->private_data; mutex_lock(&connector->hdmi.infoframes.lock); infoframe = &connector->hdmi.infoframes.audio; if (!infoframe->set) goto out; frame = &infoframe->data; len = hdmi_infoframe_pack(frame, buf, sizeof(buf)); if (len < 0) goto out; len = simple_read_from_buffer(ubuf, count, ppos, buf, len); out: mutex_unlock(&connector->hdmi.infoframes.lock); return len; } static const struct file_operations audio_infoframe_fops = { .owner = THIS_MODULE, .open = simple_open, .read = audio_infoframe_read, }; static int create_hdmi_audio_infoframe_file(struct drm_connector *connector, struct dentry *parent) { struct dentry *file; file = debugfs_create_file("audio", 0400, parent, connector, &audio_infoframe_fops); if (IS_ERR(file)) return PTR_ERR(file); return 0; } #define DEFINE_INFOFRAME_FILE(_f) \ static ssize_t _f##_read_infoframe(struct file *filp, \ char __user *ubuf, \ size_t count, \ loff_t *ppos) \ { \ struct drm_connector_hdmi_infoframe *infoframe; \ struct drm_connector_state *conn_state; \ struct drm_connector *connector; \ union hdmi_infoframe *frame; \ struct drm_device *dev; \ u8 buf[HDMI_INFOFRAME_SIZE(MAX)]; \ ssize_t len = 0; \ \ connector = filp->private_data; \ dev = connector->dev; \ \ drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); \ \ conn_state = connector->state; \ infoframe = &conn_state->hdmi.infoframes._f; \ if (!infoframe->set) \ goto out; \ \ frame = &infoframe->data; \ len = hdmi_infoframe_pack(frame, buf, sizeof(buf)); \ if (len < 0) \ goto out; \ \ len = simple_read_from_buffer(ubuf, count, ppos, buf, len); \ \ out: \ drm_modeset_unlock(&dev->mode_config.connection_mutex); \ return len; \ } \ \ static const struct file_operations _f##_infoframe_fops = { \ .owner = THIS_MODULE, \ .open = simple_open, \ .read = _f##_read_infoframe, \ }; \ \ static int create_hdmi_## _f ## _infoframe_file(struct drm_connector *connector, \ struct dentry *parent) \ { \ struct dentry *file; \ \ file = debugfs_create_file(#_f, 0400, parent, connector, &_f ## _infoframe_fops); \ if (IS_ERR(file)) \ return PTR_ERR(file); \ \ return 0; \ } DEFINE_INFOFRAME_FILE(avi); DEFINE_INFOFRAME_FILE(hdmi); DEFINE_INFOFRAME_FILE(hdr_drm); DEFINE_INFOFRAME_FILE(spd); static int create_hdmi_infoframe_files(struct drm_connector *connector, struct dentry *parent) { int ret; ret = create_hdmi_audio_infoframe_file(connector, parent); if (ret) return ret; ret = create_hdmi_avi_infoframe_file(connector, parent); if (ret) return ret; ret = create_hdmi_hdmi_infoframe_file(connector, parent); if (ret) return ret; ret = create_hdmi_hdr_drm_infoframe_file(connector, parent); if (ret) return ret; ret = create_hdmi_spd_infoframe_file(connector, parent); if (ret) return ret; return 0; } static void hdmi_debugfs_add(struct drm_connector *connector) { struct dentry *dir; if (!(connector->connector_type == DRM_MODE_CONNECTOR_HDMIA || connector->connector_type == DRM_MODE_CONNECTOR_HDMIB)) return; dir = debugfs_create_dir("infoframes", connector->debugfs_entry); if (IS_ERR(dir)) return; create_hdmi_infoframe_files(connector, dir); } void drm_debugfs_connector_add(struct drm_connector *connector) { struct drm_device *dev = connector->dev; struct dentry *root; if (!dev->debugfs_root) return; root = debugfs_create_dir(connector->name, dev->debugfs_root); connector->debugfs_entry = root; /* force */ debugfs_create_file("force", 0644, root, connector, &drm_connector_fops); /* edid */ debugfs_create_file("edid_override", 0644, root, connector, &drm_edid_fops); /* vrr range */ debugfs_create_file("vrr_range", 0444, root, connector, &vrr_range_fops); /* max bpc */ debugfs_create_file("output_bpc", 0444, root, connector, &output_bpc_fops); hdmi_debugfs_add(connector); if (connector->funcs->debugfs_init) connector->funcs->debugfs_init(connector, root); } void drm_debugfs_connector_remove(struct drm_connector *connector) { if (!connector->debugfs_entry) return; debugfs_remove_recursive(connector->debugfs_entry); connector->debugfs_entry = NULL; } void drm_debugfs_crtc_add(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct dentry *root; char *name; name = kasprintf(GFP_KERNEL, "crtc-%d", crtc->index); if (!name) return; root = debugfs_create_dir(name, dev->debugfs_root); kfree(name); crtc->debugfs_entry = root; drm_debugfs_crtc_crc_add(crtc); } void drm_debugfs_crtc_remove(struct drm_crtc *crtc) { debugfs_remove_recursive(crtc->debugfs_entry); crtc->debugfs_entry = NULL; } static int bridges_show(struct seq_file *m, void *data) { struct drm_encoder *encoder = m->private; struct drm_printer p = drm_seq_file_printer(m); struct drm_bridge *bridge; unsigned int idx = 0; drm_for_each_bridge_in_chain(encoder, bridge) { drm_printf(&p, "bridge[%u]: %ps\n", idx++, bridge->funcs); drm_printf(&p, "\ttype: [%d] %s\n", bridge->type, drm_get_connector_type_name(bridge->type)); if (bridge->of_node) drm_printf(&p, "\tOF: %pOFfc\n", bridge->of_node); drm_printf(&p, "\tops: [0x%x]", bridge->ops); if (bridge->ops & DRM_BRIDGE_OP_DETECT) drm_puts(&p, " detect"); if (bridge->ops & DRM_BRIDGE_OP_EDID) drm_puts(&p, " edid"); if (bridge->ops & DRM_BRIDGE_OP_HPD) drm_puts(&p, " hpd"); if (bridge->ops & DRM_BRIDGE_OP_MODES) drm_puts(&p, " modes"); if (bridge->ops & DRM_BRIDGE_OP_HDMI) drm_puts(&p, " hdmi"); drm_puts(&p, "\n"); } return 0; } DEFINE_SHOW_ATTRIBUTE(bridges); void drm_debugfs_encoder_add(struct drm_encoder *encoder) { struct drm_minor *minor = encoder->dev->primary; struct dentry *root; char *name; name = kasprintf(GFP_KERNEL, "encoder-%d", encoder->index); if (!name) return; root = debugfs_create_dir(name, minor->debugfs_root); kfree(name); encoder->debugfs_entry = root; /* bridges list */ debugfs_create_file("bridges", 0444, root, encoder, &bridges_fops); if (encoder->funcs && encoder->funcs->debugfs_init) encoder->funcs->debugfs_init(encoder, root); } void drm_debugfs_encoder_remove(struct drm_encoder *encoder) { debugfs_remove_recursive(encoder->debugfs_entry); encoder->debugfs_entry = NULL; } |
4814 43 3281 4815 4821 4808 4811 391 392 19 3764 20 3768 3766 43 3765 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 | // SPDX-License-Identifier: GPL-2.0 /* sysfs entries for device PM */ #include <linux/device.h> #include <linux/kobject.h> #include <linux/string.h> #include <linux/export.h> #include <linux/pm_qos.h> #include <linux/pm_runtime.h> #include <linux/atomic.h> #include <linux/jiffies.h> #include "power.h" /* * control - Report/change current runtime PM setting of the device * * Runtime power management of a device can be blocked with the help of * this attribute. All devices have one of the following two values for * the power/control file: * * + "auto\n" to allow the device to be power managed at run time; * + "on\n" to prevent the device from being power managed at run time; * * The default for all devices is "auto", which means that devices may be * subject to automatic power management, depending on their drivers. * Changing this attribute to "on" prevents the driver from power managing * the device at run time. Doing that while the device is suspended causes * it to be woken up. * * wakeup - Report/change current wakeup option for device * * Some devices support "wakeup" events, which are hardware signals * used to activate devices from suspended or low power states. Such * devices have one of three values for the sysfs power/wakeup file: * * + "enabled\n" to issue the events; * + "disabled\n" not to do so; or * + "\n" for temporary or permanent inability to issue wakeup. * * (For example, unconfigured USB devices can't issue wakeups.) * * Familiar examples of devices that can issue wakeup events include * keyboards and mice (both PS2 and USB styles), power buttons, modems, * "Wake-On-LAN" Ethernet links, GPIO lines, and more. Some events * will wake the entire system from a suspend state; others may just * wake up the device (if the system as a whole is already active). * Some wakeup events use normal IRQ lines; other use special out * of band signaling. * * It is the responsibility of device drivers to enable (or disable) * wakeup signaling as part of changing device power states, respecting * the policy choices provided through the driver model. * * Devices may not be able to generate wakeup events from all power * states. Also, the events may be ignored in some configurations; * for example, they might need help from other devices that aren't * active, or which may have wakeup disabled. Some drivers rely on * wakeup events internally (unless they are disabled), keeping * their hardware in low power modes whenever they're unused. This * saves runtime power, without requiring system-wide sleep states. * * async - Report/change current async suspend setting for the device * * Asynchronous suspend and resume of the device during system-wide power * state transitions can be enabled by writing "enabled" to this file. * Analogously, if "disabled" is written to this file, the device will be * suspended and resumed synchronously. * * All devices have one of the following two values for power/async: * * + "enabled\n" to permit the asynchronous suspend/resume of the device; * + "disabled\n" to forbid it; * * NOTE: It generally is unsafe to permit the asynchronous suspend/resume * of a device unless it is certain that all of the PM dependencies of the * device are known to the PM core. However, for some devices this * attribute is set to "enabled" by bus type code or device drivers and in * that cases it should be safe to leave the default value. * * autosuspend_delay_ms - Report/change a device's autosuspend_delay value * * Some drivers don't want to carry out a runtime suspend as soon as a * device becomes idle; they want it always to remain idle for some period * of time before suspending it. This period is the autosuspend_delay * value (expressed in milliseconds) and it can be controlled by the user. * If the value is negative then the device will never be runtime * suspended. * * NOTE: The autosuspend_delay_ms attribute and the autosuspend_delay * value are used only if the driver calls pm_runtime_use_autosuspend(). * * wakeup_count - Report the number of wakeup events related to the device */ const char power_group_name[] = "power"; EXPORT_SYMBOL_GPL(power_group_name); static const char ctrl_auto[] = "auto"; static const char ctrl_on[] = "on"; static ssize_t control_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%s\n", dev->power.runtime_auto ? ctrl_auto : ctrl_on); } static ssize_t control_store(struct device * dev, struct device_attribute *attr, const char * buf, size_t n) { device_lock(dev); if (sysfs_streq(buf, ctrl_auto)) pm_runtime_allow(dev); else if (sysfs_streq(buf, ctrl_on)) pm_runtime_forbid(dev); else n = -EINVAL; device_unlock(dev); return n; } static DEVICE_ATTR_RW(control); static ssize_t runtime_active_time_show(struct device *dev, struct device_attribute *attr, char *buf) { u64 tmp = pm_runtime_active_time(dev); do_div(tmp, NSEC_PER_MSEC); return sysfs_emit(buf, "%llu\n", tmp); } static DEVICE_ATTR_RO(runtime_active_time); static ssize_t runtime_suspended_time_show(struct device *dev, struct device_attribute *attr, char *buf) { u64 tmp = pm_runtime_suspended_time(dev); do_div(tmp, NSEC_PER_MSEC); return sysfs_emit(buf, "%llu\n", tmp); } static DEVICE_ATTR_RO(runtime_suspended_time); static ssize_t runtime_status_show(struct device *dev, struct device_attribute *attr, char *buf) { const char *output; if (dev->power.runtime_error) { output = "error"; } else if (dev->power.disable_depth) { output = "unsupported"; } else { switch (dev->power.runtime_status) { case RPM_SUSPENDED: output = "suspended"; break; case RPM_SUSPENDING: output = "suspending"; break; case RPM_RESUMING: output = "resuming"; break; case RPM_ACTIVE: output = "active"; break; default: return -EIO; } } return sysfs_emit(buf, "%s\n", output); } static DEVICE_ATTR_RO(runtime_status); static ssize_t autosuspend_delay_ms_show(struct device *dev, struct device_attribute *attr, char *buf) { if (!dev->power.use_autosuspend) return -EIO; return sysfs_emit(buf, "%d\n", dev->power.autosuspend_delay); } static ssize_t autosuspend_delay_ms_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t n) { long delay; if (!dev->power.use_autosuspend) return -EIO; if (kstrtol(buf, 10, &delay) != 0 || delay != (int) delay) return -EINVAL; device_lock(dev); pm_runtime_set_autosuspend_delay(dev, delay); device_unlock(dev); return n; } static DEVICE_ATTR_RW(autosuspend_delay_ms); static ssize_t pm_qos_resume_latency_us_show(struct device *dev, struct device_attribute *attr, char *buf) { s32 value = dev_pm_qos_requested_resume_latency(dev); if (value == 0) return sysfs_emit(buf, "n/a\n"); if (value == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) value = 0; return sysfs_emit(buf, "%d\n", value); } static ssize_t pm_qos_resume_latency_us_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t n) { s32 value; int ret; if (!kstrtos32(buf, 0, &value)) { /* * Prevent users from writing negative or "no constraint" values * directly. */ if (value < 0 || value == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) return -EINVAL; if (value == 0) value = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; } else if (sysfs_streq(buf, "n/a")) { value = 0; } else { return -EINVAL; } ret = dev_pm_qos_update_request(dev->power.qos->resume_latency_req, value); return ret < 0 ? ret : n; } static DEVICE_ATTR_RW(pm_qos_resume_latency_us); static ssize_t pm_qos_latency_tolerance_us_show(struct device *dev, struct device_attribute *attr, char *buf) { s32 value = dev_pm_qos_get_user_latency_tolerance(dev); if (value < 0) return sysfs_emit(buf, "%s\n", "auto"); if (value == PM_QOS_LATENCY_ANY) return sysfs_emit(buf, "%s\n", "any"); return sysfs_emit(buf, "%d\n", value); } static ssize_t pm_qos_latency_tolerance_us_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t n) { s32 value; int ret; if (kstrtos32(buf, 0, &value) == 0) { /* Users can't write negative values directly */ if (value < 0) return -EINVAL; } else { if (sysfs_streq(buf, "auto")) value = PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT; else if (sysfs_streq(buf, "any")) value = PM_QOS_LATENCY_ANY; else return -EINVAL; } ret = dev_pm_qos_update_user_latency_tolerance(dev, value); return ret < 0 ? ret : n; } static DEVICE_ATTR_RW(pm_qos_latency_tolerance_us); static ssize_t pm_qos_no_power_off_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", !!(dev_pm_qos_requested_flags(dev) & PM_QOS_FLAG_NO_POWER_OFF)); } static ssize_t pm_qos_no_power_off_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t n) { int ret; if (kstrtoint(buf, 0, &ret)) return -EINVAL; if (ret != 0 && ret != 1) return -EINVAL; ret = dev_pm_qos_update_flags(dev, PM_QOS_FLAG_NO_POWER_OFF, ret); return ret < 0 ? ret : n; } static DEVICE_ATTR_RW(pm_qos_no_power_off); #ifdef CONFIG_PM_SLEEP static const char _enabled[] = "enabled"; static const char _disabled[] = "disabled"; static ssize_t wakeup_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%s\n", device_can_wakeup(dev) ? (device_may_wakeup(dev) ? _enabled : _disabled) : ""); } static ssize_t wakeup_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t n) { if (!device_can_wakeup(dev)) return -EINVAL; if (sysfs_streq(buf, _enabled)) device_set_wakeup_enable(dev, 1); else if (sysfs_streq(buf, _disabled)) device_set_wakeup_enable(dev, 0); else return -EINVAL; return n; } static DEVICE_ATTR_RW(wakeup); static ssize_t wakeup_count_show(struct device *dev, struct device_attribute *attr, char *buf) { unsigned long count; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { count = dev->power.wakeup->wakeup_count; enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%lu\n", count); } static DEVICE_ATTR_RO(wakeup_count); static ssize_t wakeup_active_count_show(struct device *dev, struct device_attribute *attr, char *buf) { unsigned long count; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { count = dev->power.wakeup->active_count; enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%lu\n", count); } static DEVICE_ATTR_RO(wakeup_active_count); static ssize_t wakeup_abort_count_show(struct device *dev, struct device_attribute *attr, char *buf) { unsigned long count; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { count = dev->power.wakeup->wakeup_count; enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%lu\n", count); } static DEVICE_ATTR_RO(wakeup_abort_count); static ssize_t wakeup_expire_count_show(struct device *dev, struct device_attribute *attr, char *buf) { unsigned long count; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { count = dev->power.wakeup->expire_count; enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%lu\n", count); } static DEVICE_ATTR_RO(wakeup_expire_count); static ssize_t wakeup_active_show(struct device *dev, struct device_attribute *attr, char *buf) { unsigned int active; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { active = dev->power.wakeup->active; enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%u\n", active); } static DEVICE_ATTR_RO(wakeup_active); static ssize_t wakeup_total_time_ms_show(struct device *dev, struct device_attribute *attr, char *buf) { s64 msec; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { msec = ktime_to_ms(dev->power.wakeup->total_time); enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%lld\n", msec); } static DEVICE_ATTR_RO(wakeup_total_time_ms); static ssize_t wakeup_max_time_ms_show(struct device *dev, struct device_attribute *attr, char *buf) { s64 msec; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { msec = ktime_to_ms(dev->power.wakeup->max_time); enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%lld\n", msec); } static DEVICE_ATTR_RO(wakeup_max_time_ms); static ssize_t wakeup_last_time_ms_show(struct device *dev, struct device_attribute *attr, char *buf) { s64 msec; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { msec = ktime_to_ms(dev->power.wakeup->last_time); enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%lld\n", msec); } static DEVICE_ATTR_RO(wakeup_last_time_ms); #ifdef CONFIG_PM_AUTOSLEEP static ssize_t wakeup_prevent_sleep_time_ms_show(struct device *dev, struct device_attribute *attr, char *buf) { s64 msec; bool enabled = false; spin_lock_irq(&dev->power.lock); if (dev->power.wakeup) { msec = ktime_to_ms(dev->power.wakeup->prevent_sleep_time); enabled = true; } spin_unlock_irq(&dev->power.lock); if (!enabled) return sysfs_emit(buf, "\n"); return sysfs_emit(buf, "%lld\n", msec); } static DEVICE_ATTR_RO(wakeup_prevent_sleep_time_ms); #endif /* CONFIG_PM_AUTOSLEEP */ static inline int dpm_sysfs_wakeup_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid) { if (dev->power.wakeup && dev->power.wakeup->dev) return device_change_owner(dev->power.wakeup->dev, kuid, kgid); return 0; } #else /* CONFIG_PM_SLEEP */ static inline int dpm_sysfs_wakeup_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid) { return 0; } #endif #ifdef CONFIG_PM_ADVANCED_DEBUG static ssize_t runtime_usage_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", atomic_read(&dev->power.usage_count)); } static DEVICE_ATTR_RO(runtime_usage); static ssize_t runtime_active_kids_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", dev->power.ignore_children ? 0 : atomic_read(&dev->power.child_count)); } static DEVICE_ATTR_RO(runtime_active_kids); static ssize_t runtime_enabled_show(struct device *dev, struct device_attribute *attr, char *buf) { const char *output; if (dev->power.disable_depth && !dev->power.runtime_auto) output = "disabled & forbidden"; else if (dev->power.disable_depth) output = "disabled"; else if (!dev->power.runtime_auto) output = "forbidden"; else output = "enabled"; return sysfs_emit(buf, "%s\n", output); } static DEVICE_ATTR_RO(runtime_enabled); #ifdef CONFIG_PM_SLEEP static ssize_t async_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%s\n", device_async_suspend_enabled(dev) ? _enabled : _disabled); } static ssize_t async_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t n) { if (sysfs_streq(buf, _enabled)) device_enable_async_suspend(dev); else if (sysfs_streq(buf, _disabled)) device_disable_async_suspend(dev); else return -EINVAL; return n; } static DEVICE_ATTR_RW(async); #endif /* CONFIG_PM_SLEEP */ #endif /* CONFIG_PM_ADVANCED_DEBUG */ static struct attribute *power_attrs[] = { #ifdef CONFIG_PM_ADVANCED_DEBUG #ifdef CONFIG_PM_SLEEP &dev_attr_async.attr, #endif &dev_attr_runtime_status.attr, &dev_attr_runtime_usage.attr, &dev_attr_runtime_active_kids.attr, &dev_attr_runtime_enabled.attr, #endif /* CONFIG_PM_ADVANCED_DEBUG */ NULL, }; static const struct attribute_group pm_attr_group = { .name = power_group_name, .attrs = power_attrs, }; static struct attribute *wakeup_attrs[] = { #ifdef CONFIG_PM_SLEEP &dev_attr_wakeup.attr, &dev_attr_wakeup_count.attr, &dev_attr_wakeup_active_count.attr, &dev_attr_wakeup_abort_count.attr, &dev_attr_wakeup_expire_count.attr, &dev_attr_wakeup_active.attr, &dev_attr_wakeup_total_time_ms.attr, &dev_attr_wakeup_max_time_ms.attr, &dev_attr_wakeup_last_time_ms.attr, #ifdef CONFIG_PM_AUTOSLEEP &dev_attr_wakeup_prevent_sleep_time_ms.attr, #endif #endif NULL, }; static const struct attribute_group pm_wakeup_attr_group = { .name = power_group_name, .attrs = wakeup_attrs, }; static struct attribute *runtime_attrs[] = { #ifndef CONFIG_PM_ADVANCED_DEBUG &dev_attr_runtime_status.attr, #endif &dev_attr_control.attr, &dev_attr_runtime_suspended_time.attr, &dev_attr_runtime_active_time.attr, &dev_attr_autosuspend_delay_ms.attr, NULL, }; static const struct attribute_group pm_runtime_attr_group = { .name = power_group_name, .attrs = runtime_attrs, }; static struct attribute *pm_qos_resume_latency_attrs[] = { &dev_attr_pm_qos_resume_latency_us.attr, NULL, }; static const struct attribute_group pm_qos_resume_latency_attr_group = { .name = power_group_name, .attrs = pm_qos_resume_latency_attrs, }; static struct attribute *pm_qos_latency_tolerance_attrs[] = { &dev_attr_pm_qos_latency_tolerance_us.attr, NULL, }; static const struct attribute_group pm_qos_latency_tolerance_attr_group = { .name = power_group_name, .attrs = pm_qos_latency_tolerance_attrs, }; static struct attribute *pm_qos_flags_attrs[] = { &dev_attr_pm_qos_no_power_off.attr, NULL, }; static const struct attribute_group pm_qos_flags_attr_group = { .name = power_group_name, .attrs = pm_qos_flags_attrs, }; int dpm_sysfs_add(struct device *dev) { int rc; /* No need to create PM sysfs if explicitly disabled. */ if (device_pm_not_required(dev)) return 0; rc = sysfs_create_group(&dev->kobj, &pm_attr_group); if (rc) return rc; if (!pm_runtime_has_no_callbacks(dev)) { rc = sysfs_merge_group(&dev->kobj, &pm_runtime_attr_group); if (rc) goto err_out; } if (device_can_wakeup(dev)) { rc = sysfs_merge_group(&dev->kobj, &pm_wakeup_attr_group); if (rc) goto err_runtime; } if (dev->power.set_latency_tolerance) { rc = sysfs_merge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group); if (rc) goto err_wakeup; } rc = pm_wakeup_source_sysfs_add(dev); if (rc) goto err_latency; return 0; err_latency: sysfs_unmerge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group); err_wakeup: sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group); err_runtime: sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group); err_out: sysfs_remove_group(&dev->kobj, &pm_attr_group); return rc; } int dpm_sysfs_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid) { int rc; if (device_pm_not_required(dev)) return 0; rc = sysfs_group_change_owner(&dev->kobj, &pm_attr_group, kuid, kgid); if (rc) return rc; if (!pm_runtime_has_no_callbacks(dev)) { rc = sysfs_group_change_owner( &dev->kobj, &pm_runtime_attr_group, kuid, kgid); if (rc) return rc; } if (device_can_wakeup(dev)) { rc = sysfs_group_change_owner(&dev->kobj, &pm_wakeup_attr_group, kuid, kgid); if (rc) return rc; rc = dpm_sysfs_wakeup_change_owner(dev, kuid, kgid); if (rc) return rc; } if (dev->power.set_latency_tolerance) { rc = sysfs_group_change_owner( &dev->kobj, &pm_qos_latency_tolerance_attr_group, kuid, kgid); if (rc) return rc; } return 0; } int wakeup_sysfs_add(struct device *dev) { int ret = sysfs_merge_group(&dev->kobj, &pm_wakeup_attr_group); if (!ret) kobject_uevent(&dev->kobj, KOBJ_CHANGE); return ret; } void wakeup_sysfs_remove(struct device *dev) { sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group); kobject_uevent(&dev->kobj, KOBJ_CHANGE); } int pm_qos_sysfs_add_resume_latency(struct device *dev) { return sysfs_merge_group(&dev->kobj, &pm_qos_resume_latency_attr_group); } void pm_qos_sysfs_remove_resume_latency(struct device *dev) { sysfs_unmerge_group(&dev->kobj, &pm_qos_resume_latency_attr_group); } int pm_qos_sysfs_add_flags(struct device *dev) { return sysfs_merge_group(&dev->kobj, &pm_qos_flags_attr_group); } void pm_qos_sysfs_remove_flags(struct device *dev) { sysfs_unmerge_group(&dev->kobj, &pm_qos_flags_attr_group); } int pm_qos_sysfs_add_latency_tolerance(struct device *dev) { return sysfs_merge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group); } void pm_qos_sysfs_remove_latency_tolerance(struct device *dev) { sysfs_unmerge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group); } void rpm_sysfs_remove(struct device *dev) { sysfs_unmerge_group(&dev->kobj, &pm_runtime_attr_group); } void dpm_sysfs_remove(struct device *dev) { if (device_pm_not_required(dev)) return; sysfs_unmerge_group(&dev->kobj, &pm_qos_latency_tolerance_attr_group); dev_pm_qos_constraints_destroy(dev); rpm_sysfs_remove(dev); sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group); sysfs_remove_group(&dev->kobj, &pm_attr_group); } |
32 27 15 21 18 21 1 1 1 1 1 23 5 18 14 14 1 1 7 7 7 1 1 5 5 5 17 17 17 17 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 | // SPDX-License-Identifier: GPL-2.0-or-later /* * ASIX AX8817X based USB 2.0 Ethernet Devices * Copyright (C) 2003-2006 David Hollis <dhollis@davehollis.com> * Copyright (C) 2005 Phil Chang <pchang23@sbcglobal.net> * Copyright (C) 2006 James Painter <jamie.painter@iname.com> * Copyright (c) 2002-2003 TiVo Inc. */ #include "asix.h" #define AX_HOST_EN_RETRIES 30 int __must_check asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, u16 size, void *data, int in_pm) { int ret; int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16); BUG_ON(!dev); if (!in_pm) fn = usbnet_read_cmd; else fn = usbnet_read_cmd_nopm; ret = fn(dev, cmd, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, data, size); if (unlikely(ret < size)) { ret = ret < 0 ? ret : -ENODATA; netdev_warn(dev->net, "Failed to read reg index 0x%04x: %d\n", index, ret); } return ret; } int asix_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, u16 size, void *data, int in_pm) { int ret; int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16); BUG_ON(!dev); if (!in_pm) fn = usbnet_write_cmd; else fn = usbnet_write_cmd_nopm; ret = fn(dev, cmd, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, data, size); if (unlikely(ret < 0)) netdev_warn(dev->net, "Failed to write reg index 0x%04x: %d\n", index, ret); return ret; } void asix_write_cmd_async(struct usbnet *dev, u8 cmd, u16 value, u16 index, u16 size, void *data) { usbnet_write_cmd_async(dev, cmd, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, data, size); } static int asix_set_sw_mii(struct usbnet *dev, int in_pm) { int ret; ret = asix_write_cmd(dev, AX_CMD_SET_SW_MII, 0x0000, 0, 0, NULL, in_pm); if (ret < 0) netdev_err(dev->net, "Failed to enable software MII access\n"); return ret; } static int asix_set_hw_mii(struct usbnet *dev, int in_pm) { int ret; ret = asix_write_cmd(dev, AX_CMD_SET_HW_MII, 0x0000, 0, 0, NULL, in_pm); if (ret < 0) netdev_err(dev->net, "Failed to enable hardware MII access\n"); return ret; } static int asix_check_host_enable(struct usbnet *dev, int in_pm) { int i, ret; u8 smsr; for (i = 0; i < AX_HOST_EN_RETRIES; ++i) { ret = asix_set_sw_mii(dev, in_pm); if (ret == -ENODEV || ret == -ETIMEDOUT) break; usleep_range(1000, 1100); ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &smsr, in_pm); if (ret == -ENODEV) break; else if (ret < 0) continue; else if (smsr & AX_HOST_EN) break; } return i >= AX_HOST_EN_RETRIES ? -ETIMEDOUT : ret; } static void reset_asix_rx_fixup_info(struct asix_rx_fixup_info *rx) { /* Reset the variables that have a lifetime outside of * asix_rx_fixup_internal() so that future processing starts from a * known set of initial conditions. */ if (rx->ax_skb) { /* Discard any incomplete Ethernet frame in the netdev buffer */ kfree_skb(rx->ax_skb); rx->ax_skb = NULL; } /* Assume the Data header 32-bit word is at the start of the current * or next URB socket buffer so reset all the state variables. */ rx->remaining = 0; rx->split_head = false; rx->header = 0; } int asix_rx_fixup_internal(struct usbnet *dev, struct sk_buff *skb, struct asix_rx_fixup_info *rx) { int offset = 0; u16 size; /* When an Ethernet frame spans multiple URB socket buffers, * do a sanity test for the Data header synchronisation. * Attempt to detect the situation of the previous socket buffer having * been truncated or a socket buffer was missing. These situations * cause a discontinuity in the data stream and therefore need to avoid * appending bad data to the end of the current netdev socket buffer. * Also avoid unnecessarily discarding a good current netdev socket * buffer. */ if (rx->remaining && (rx->remaining + sizeof(u32) <= skb->len)) { offset = ((rx->remaining + 1) & 0xfffe); rx->header = get_unaligned_le32(skb->data + offset); offset = 0; size = (u16)(rx->header & 0x7ff); if (size != ((~rx->header >> 16) & 0x7ff)) { netdev_err(dev->net, "asix_rx_fixup() Data Header synchronisation was lost, remaining %d\n", rx->remaining); reset_asix_rx_fixup_info(rx); } } while (offset + sizeof(u16) <= skb->len) { u16 copy_length; if (!rx->remaining) { if (skb->len - offset == sizeof(u16)) { rx->header = get_unaligned_le16( skb->data + offset); rx->split_head = true; offset += sizeof(u16); break; } if (rx->split_head == true) { rx->header |= (get_unaligned_le16( skb->data + offset) << 16); rx->split_head = false; offset += sizeof(u16); } else { rx->header = get_unaligned_le32(skb->data + offset); offset += sizeof(u32); } /* take frame length from Data header 32-bit word */ size = (u16)(rx->header & 0x7ff); if (size != ((~rx->header >> 16) & 0x7ff)) { netdev_err(dev->net, "asix_rx_fixup() Bad Header Length 0x%x, offset %d\n", rx->header, offset); reset_asix_rx_fixup_info(rx); return 0; } if (size > dev->net->mtu + ETH_HLEN + VLAN_HLEN) { netdev_dbg(dev->net, "asix_rx_fixup() Bad RX Length %d\n", size); reset_asix_rx_fixup_info(rx); return 0; } /* Sometimes may fail to get a netdev socket buffer but * continue to process the URB socket buffer so that * synchronisation of the Ethernet frame Data header * word is maintained. */ rx->ax_skb = netdev_alloc_skb_ip_align(dev->net, size); rx->remaining = size; } if (rx->remaining > skb->len - offset) { copy_length = skb->len - offset; rx->remaining -= copy_length; } else { copy_length = rx->remaining; rx->remaining = 0; } if (rx->ax_skb) { skb_put_data(rx->ax_skb, skb->data + offset, copy_length); if (!rx->remaining) { usbnet_skb_return(dev, rx->ax_skb); rx->ax_skb = NULL; } } offset += (copy_length + 1) & 0xfffe; } if (skb->len != offset) { netdev_err(dev->net, "asix_rx_fixup() Bad SKB Length %d, %d\n", skb->len, offset); reset_asix_rx_fixup_info(rx); return 0; } return 1; } int asix_rx_fixup_common(struct usbnet *dev, struct sk_buff *skb) { struct asix_common_private *dp = dev->driver_priv; struct asix_rx_fixup_info *rx = &dp->rx_fixup_info; return asix_rx_fixup_internal(dev, skb, rx); } void asix_rx_fixup_common_free(struct asix_common_private *dp) { struct asix_rx_fixup_info *rx; if (!dp) return; rx = &dp->rx_fixup_info; if (rx->ax_skb) { kfree_skb(rx->ax_skb); rx->ax_skb = NULL; } } struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) { int padlen; int headroom = skb_headroom(skb); int tailroom = skb_tailroom(skb); u32 packet_len; u32 padbytes = 0xffff0000; void *ptr; padlen = ((skb->len + 4) & (dev->maxpacket - 1)) ? 0 : 4; /* We need to push 4 bytes in front of frame (packet_len) * and maybe add 4 bytes after the end (if padlen is 4) * * Avoid skb_copy_expand() expensive call, using following rules : * - We are allowed to push 4 bytes in headroom if skb_header_cloned() * is false (and if we have 4 bytes of headroom) * - We are allowed to put 4 bytes at tail if skb_cloned() * is false (and if we have 4 bytes of tailroom) * * TCP packets for example are cloned, but __skb_header_release() * was called in tcp stack, allowing us to use headroom for our needs. */ if (!skb_header_cloned(skb) && !(padlen && skb_cloned(skb)) && headroom + tailroom >= 4 + padlen) { /* following should not happen, but better be safe */ if (headroom < 4 || tailroom < padlen) { skb->data = memmove(skb->head + 4, skb->data, skb->len); skb_set_tail_pointer(skb, skb->len); } } else { struct sk_buff *skb2; skb2 = skb_copy_expand(skb, 4, padlen, flags); dev_kfree_skb_any(skb); skb = skb2; if (!skb) return NULL; } packet_len = ((skb->len ^ 0x0000ffff) << 16) + skb->len; ptr = skb_push(skb, 4); put_unaligned_le32(packet_len, ptr); if (padlen) { put_unaligned_le32(padbytes, skb_tail_pointer(skb)); skb_put(skb, sizeof(padbytes)); } usbnet_set_skb_tx_stats(skb, 1, 0); return skb; } int asix_read_phy_addr(struct usbnet *dev, bool internal) { int ret, offset; u8 buf[2]; ret = asix_read_cmd(dev, AX_CMD_READ_PHY_ID, 0, 0, 2, buf, 0); if (ret < 0) goto error; if (ret < 2) { ret = -EIO; goto error; } offset = (internal ? 1 : 0); ret = buf[offset]; netdev_dbg(dev->net, "%s PHY address 0x%x\n", internal ? "internal" : "external", ret); return ret; error: netdev_err(dev->net, "Error reading PHY_ID register: %02x\n", ret); return ret; } int asix_sw_reset(struct usbnet *dev, u8 flags, int in_pm) { int ret; ret = asix_write_cmd(dev, AX_CMD_SW_RESET, flags, 0, 0, NULL, in_pm); if (ret < 0) netdev_err(dev->net, "Failed to send software reset: %02x\n", ret); return ret; } u16 asix_read_rx_ctl(struct usbnet *dev, int in_pm) { __le16 v; int ret = asix_read_cmd(dev, AX_CMD_READ_RX_CTL, 0, 0, 2, &v, in_pm); if (ret < 0) { netdev_err(dev->net, "Error reading RX_CTL register: %02x\n", ret); goto out; } ret = le16_to_cpu(v); out: return ret; } int asix_write_rx_ctl(struct usbnet *dev, u16 mode, int in_pm) { int ret; netdev_dbg(dev->net, "asix_write_rx_ctl() - mode = 0x%04x\n", mode); ret = asix_write_cmd(dev, AX_CMD_WRITE_RX_CTL, mode, 0, 0, NULL, in_pm); if (ret < 0) netdev_err(dev->net, "Failed to write RX_CTL mode to 0x%04x: %02x\n", mode, ret); return ret; } u16 asix_read_medium_status(struct usbnet *dev, int in_pm) { __le16 v; int ret = asix_read_cmd(dev, AX_CMD_READ_MEDIUM_STATUS, 0, 0, 2, &v, in_pm); if (ret < 0) { netdev_err(dev->net, "Error reading Medium Status register: %02x\n", ret); return ret; /* TODO: callers not checking for error ret */ } return le16_to_cpu(v); } int asix_write_medium_mode(struct usbnet *dev, u16 mode, int in_pm) { int ret; netdev_dbg(dev->net, "asix_write_medium_mode() - mode = 0x%04x\n", mode); ret = asix_write_cmd(dev, AX_CMD_WRITE_MEDIUM_MODE, mode, 0, 0, NULL, in_pm); if (ret < 0) netdev_err(dev->net, "Failed to write Medium Mode mode to 0x%04x: %02x\n", mode, ret); return ret; } /* set MAC link settings according to information from phylib */ void asix_adjust_link(struct net_device *netdev) { struct phy_device *phydev = netdev->phydev; struct usbnet *dev = netdev_priv(netdev); u16 mode = 0; if (phydev->link) { mode = AX88772_MEDIUM_DEFAULT; if (phydev->duplex == DUPLEX_HALF) mode &= ~AX_MEDIUM_FD; if (phydev->speed != SPEED_100) mode &= ~AX_MEDIUM_PS; } asix_write_medium_mode(dev, mode, 0); phy_print_status(phydev); usbnet_link_change(dev, phydev->link, 0); } int asix_write_gpio(struct usbnet *dev, u16 value, int sleep, int in_pm) { int ret; netdev_dbg(dev->net, "asix_write_gpio() - value = 0x%04x\n", value); ret = asix_write_cmd(dev, AX_CMD_WRITE_GPIOS, value, 0, 0, NULL, in_pm); if (ret < 0) netdev_err(dev->net, "Failed to write GPIO value 0x%04x: %02x\n", value, ret); if (sleep) msleep(sleep); return ret; } /* * AX88772 & AX88178 have a 16-bit RX_CTL value */ void asix_set_multicast(struct net_device *net) { struct usbnet *dev = netdev_priv(net); struct asix_data *data = (struct asix_data *)&dev->data; u16 rx_ctl = AX_DEFAULT_RX_CTL; if (net->flags & IFF_PROMISC) { rx_ctl |= AX_RX_CTL_PRO; } else if (net->flags & IFF_ALLMULTI || netdev_mc_count(net) > AX_MAX_MCAST) { rx_ctl |= AX_RX_CTL_AMALL; } else if (netdev_mc_empty(net)) { /* just broadcast and directed */ } else { /* We use the 20 byte dev->data * for our 8 byte filter buffer * to avoid allocating memory that * is tricky to free later */ struct netdev_hw_addr *ha; u32 crc_bits; memset(data->multi_filter, 0, AX_MCAST_FILTER_SIZE); /* Build the multicast hash filter. */ netdev_for_each_mc_addr(ha, net) { crc_bits = ether_crc(ETH_ALEN, ha->addr) >> 26; data->multi_filter[crc_bits >> 3] |= 1 << (crc_bits & 7); } asix_write_cmd_async(dev, AX_CMD_WRITE_MULTI_FILTER, 0, 0, AX_MCAST_FILTER_SIZE, data->multi_filter); rx_ctl |= AX_RX_CTL_AM; } asix_write_cmd_async(dev, AX_CMD_WRITE_RX_CTL, rx_ctl, 0, 0, NULL); } static int __asix_mdio_read(struct net_device *netdev, int phy_id, int loc, bool in_pm) { struct usbnet *dev = netdev_priv(netdev); __le16 res; int ret; mutex_lock(&dev->phy_mutex); ret = asix_check_host_enable(dev, in_pm); if (ret == -ENODEV || ret == -ETIMEDOUT) { mutex_unlock(&dev->phy_mutex); return ret; } ret = asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id, (__u16)loc, 2, &res, in_pm); if (ret < 0) goto out; ret = asix_set_hw_mii(dev, in_pm); out: mutex_unlock(&dev->phy_mutex); netdev_dbg(dev->net, "asix_mdio_read() phy_id=0x%02x, loc=0x%02x, returns=0x%04x\n", phy_id, loc, le16_to_cpu(res)); return ret < 0 ? ret : le16_to_cpu(res); } int asix_mdio_read(struct net_device *netdev, int phy_id, int loc) { return __asix_mdio_read(netdev, phy_id, loc, false); } static int __asix_mdio_write(struct net_device *netdev, int phy_id, int loc, int val, bool in_pm) { struct usbnet *dev = netdev_priv(netdev); __le16 res = cpu_to_le16(val); int ret; netdev_dbg(dev->net, "asix_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x\n", phy_id, loc, val); mutex_lock(&dev->phy_mutex); ret = asix_check_host_enable(dev, in_pm); if (ret == -ENODEV) goto out; ret = asix_write_cmd(dev, AX_CMD_WRITE_MII_REG, phy_id, (__u16)loc, 2, &res, in_pm); if (ret < 0) goto out; ret = asix_set_hw_mii(dev, in_pm); out: mutex_unlock(&dev->phy_mutex); return ret < 0 ? ret : 0; } void asix_mdio_write(struct net_device *netdev, int phy_id, int loc, int val) { __asix_mdio_write(netdev, phy_id, loc, val, false); } /* MDIO read and write wrappers for phylib */ int asix_mdio_bus_read(struct mii_bus *bus, int phy_id, int regnum) { struct usbnet *priv = bus->priv; return __asix_mdio_read(priv->net, phy_id, regnum, false); } int asix_mdio_bus_write(struct mii_bus *bus, int phy_id, int regnum, u16 val) { struct usbnet *priv = bus->priv; return __asix_mdio_write(priv->net, phy_id, regnum, val, false); } int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc) { return __asix_mdio_read(netdev, phy_id, loc, true); } void asix_mdio_write_nopm(struct net_device *netdev, int phy_id, int loc, int val) { __asix_mdio_write(netdev, phy_id, loc, val, true); } void asix_get_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) { struct usbnet *dev = netdev_priv(net); u8 opt; if (asix_read_cmd(dev, AX_CMD_READ_MONITOR_MODE, 0, 0, 1, &opt, 0) < 0) { wolinfo->supported = 0; wolinfo->wolopts = 0; return; } wolinfo->supported = WAKE_PHY | WAKE_MAGIC; wolinfo->wolopts = 0; if (opt & AX_MONITOR_LINK) wolinfo->wolopts |= WAKE_PHY; if (opt & AX_MONITOR_MAGIC) wolinfo->wolopts |= WAKE_MAGIC; } int asix_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) { struct usbnet *dev = netdev_priv(net); u8 opt = 0; if (wolinfo->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) return -EINVAL; if (wolinfo->wolopts & WAKE_PHY) opt |= AX_MONITOR_LINK; if (wolinfo->wolopts & WAKE_MAGIC) opt |= AX_MONITOR_MAGIC; if (asix_write_cmd(dev, AX_CMD_WRITE_MONITOR_MODE, opt, 0, 0, NULL, 0) < 0) return -EINVAL; return 0; } int asix_get_eeprom_len(struct net_device *net) { return AX_EEPROM_LEN; } int asix_get_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom, u8 *data) { struct usbnet *dev = netdev_priv(net); u16 *eeprom_buff; int first_word, last_word; int i; if (eeprom->len == 0) return -EINVAL; eeprom->magic = AX_EEPROM_MAGIC; first_word = eeprom->offset >> 1; last_word = (eeprom->offset + eeprom->len - 1) >> 1; eeprom_buff = kmalloc_array(last_word - first_word + 1, sizeof(u16), GFP_KERNEL); if (!eeprom_buff) return -ENOMEM; /* ax8817x returns 2 bytes from eeprom on read */ for (i = first_word; i <= last_word; i++) { if (asix_read_cmd(dev, AX_CMD_READ_EEPROM, i, 0, 2, &eeprom_buff[i - first_word], 0) < 0) { kfree(eeprom_buff); return -EIO; } } memcpy(data, (u8 *)eeprom_buff + (eeprom->offset & 1), eeprom->len); kfree(eeprom_buff); return 0; } int asix_set_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom, u8 *data) { struct usbnet *dev = netdev_priv(net); u16 *eeprom_buff; int first_word, last_word; int i; int ret; netdev_dbg(net, "write EEPROM len %d, offset %d, magic 0x%x\n", eeprom->len, eeprom->offset, eeprom->magic); if (eeprom->len == 0) return -EINVAL; if (eeprom->magic != AX_EEPROM_MAGIC) return -EINVAL; first_word = eeprom->offset >> 1; last_word = (eeprom->offset + eeprom->len - 1) >> 1; eeprom_buff = kmalloc_array(last_word - first_word + 1, sizeof(u16), GFP_KERNEL); if (!eeprom_buff) return -ENOMEM; /* align data to 16 bit boundaries, read the missing data from the EEPROM */ if (eeprom->offset & 1) { ret = asix_read_cmd(dev, AX_CMD_READ_EEPROM, first_word, 0, 2, &eeprom_buff[0], 0); if (ret < 0) { netdev_err(net, "Failed to read EEPROM at offset 0x%02x.\n", first_word); goto free; } } if ((eeprom->offset + eeprom->len) & 1) { ret = asix_read_cmd(dev, AX_CMD_READ_EEPROM, last_word, 0, 2, &eeprom_buff[last_word - first_word], 0); if (ret < 0) { netdev_err(net, "Failed to read EEPROM at offset 0x%02x.\n", last_word); goto free; } } memcpy((u8 *)eeprom_buff + (eeprom->offset & 1), data, eeprom->len); /* write data to EEPROM */ ret = asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0x0000, 0, 0, NULL, 0); if (ret < 0) { netdev_err(net, "Failed to enable EEPROM write\n"); goto free; } msleep(20); for (i = first_word; i <= last_word; i++) { netdev_dbg(net, "write to EEPROM at offset 0x%02x, data 0x%04x\n", i, eeprom_buff[i - first_word]); ret = asix_write_cmd(dev, AX_CMD_WRITE_EEPROM, i, eeprom_buff[i - first_word], 0, NULL, 0); if (ret < 0) { netdev_err(net, "Failed to write EEPROM at offset 0x%02x.\n", i); goto free; } msleep(20); } ret = asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0x0000, 0, 0, NULL, 0); if (ret < 0) { netdev_err(net, "Failed to disable EEPROM write\n"); goto free; } ret = 0; free: kfree(eeprom_buff); return ret; } void asix_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info) { /* Inherit standard device info */ usbnet_get_drvinfo(net, info); strscpy(info->driver, DRIVER_NAME, sizeof(info->driver)); strscpy(info->version, DRIVER_VERSION, sizeof(info->version)); } int asix_set_mac_address(struct net_device *net, void *p) { struct usbnet *dev = netdev_priv(net); struct asix_data *data = (struct asix_data *)&dev->data; struct sockaddr *addr = p; if (netif_running(net)) return -EBUSY; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; eth_hw_addr_set(net, addr->sa_data); /* We use the 20 byte dev->data * for our 6 byte mac buffer * to avoid allocating memory that * is tricky to free later */ memcpy(data->mac_addr, addr->sa_data, ETH_ALEN); asix_write_cmd_async(dev, AX_CMD_WRITE_NODE_ID, 0, 0, ETH_ALEN, data->mac_addr); return 0; } |
5 6 5 6 2 9 11 11 3 5 3 2 9 11 1 10 1 10 11 11 20 6 20 11 11 6 5 6 3 26 4 2 2 22 4 2 2 26 14 12 11 11 11 2 11 8 2 1 11 9 11 9 8 3 11 11 11 23 21 3 2 1 1 16 17 14 21 17 22 14 52 52 51 14 43 51 20 31 6 1 31 5 31 36 7 29 3 11 12 11 2 1 9 11 11 11 5 20 40 12 12 12 12 47 41 16 46 46 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 | /* * cdc_ncm.c * * Copyright (C) ST-Ericsson 2010-2012 * Contact: Alexey Orishko <alexey.orishko@stericsson.com> * Original author: Hans Petter Selasky <hans.petter.selasky@stericsson.com> * * USB Host Driver for Network Control Model (NCM) * http://www.usb.org/developers/docs/devclass_docs/NCM10_012011.zip * * The NCM encoding, decoding and initialization logic * derives from FreeBSD 8.x. if_cdce.c and if_cdcereg.h * * This software is available to you under a choice of one of two * licenses. You may choose this file to be licensed under the terms * of the GNU General Public License (GPL) Version 2 or the 2-clause * BSD license listed below: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include <linux/module.h> #include <linux/netdevice.h> #include <linux/ctype.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/kstrtox.h> #include <linux/workqueue.h> #include <linux/mii.h> #include <linux/crc32.h> #include <linux/usb.h> #include <linux/hrtimer.h> #include <linux/atomic.h> #include <linux/usb/usbnet.h> #include <linux/usb/cdc.h> #include <linux/usb/cdc_ncm.h> #if IS_ENABLED(CONFIG_USB_NET_CDC_MBIM) static bool prefer_mbim = true; #else static bool prefer_mbim; #endif module_param(prefer_mbim, bool, 0644); MODULE_PARM_DESC(prefer_mbim, "Prefer MBIM setting on dual NCM/MBIM functions"); static void cdc_ncm_txpath_bh(struct tasklet_struct *t); static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx); static enum hrtimer_restart cdc_ncm_tx_timer_cb(struct hrtimer *hr_timer); static struct usb_driver cdc_ncm_driver; struct cdc_ncm_stats { char stat_string[ETH_GSTRING_LEN]; int sizeof_stat; int stat_offset; }; #define CDC_NCM_STAT(str, m) { \ .stat_string = str, \ .sizeof_stat = sizeof(((struct cdc_ncm_ctx *)0)->m), \ .stat_offset = offsetof(struct cdc_ncm_ctx, m) } #define CDC_NCM_SIMPLE_STAT(m) CDC_NCM_STAT(__stringify(m), m) static const struct cdc_ncm_stats cdc_ncm_gstrings_stats[] = { CDC_NCM_SIMPLE_STAT(tx_reason_ntb_full), CDC_NCM_SIMPLE_STAT(tx_reason_ndp_full), CDC_NCM_SIMPLE_STAT(tx_reason_timeout), CDC_NCM_SIMPLE_STAT(tx_reason_max_datagram), CDC_NCM_SIMPLE_STAT(tx_overhead), CDC_NCM_SIMPLE_STAT(tx_ntbs), CDC_NCM_SIMPLE_STAT(rx_overhead), CDC_NCM_SIMPLE_STAT(rx_ntbs), }; #define CDC_NCM_LOW_MEM_MAX_CNT 10 static int cdc_ncm_get_sset_count(struct net_device __always_unused *netdev, int sset) { switch (sset) { case ETH_SS_STATS: return ARRAY_SIZE(cdc_ncm_gstrings_stats); default: return -EOPNOTSUPP; } } static void cdc_ncm_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats __always_unused *stats, u64 *data) { struct usbnet *dev = netdev_priv(netdev); struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; int i; char *p = NULL; for (i = 0; i < ARRAY_SIZE(cdc_ncm_gstrings_stats); i++) { p = (char *)ctx + cdc_ncm_gstrings_stats[i].stat_offset; data[i] = (cdc_ncm_gstrings_stats[i].sizeof_stat == sizeof(u64)) ? *(u64 *)p : *(u32 *)p; } } static void cdc_ncm_get_strings(struct net_device __always_unused *netdev, u32 stringset, u8 *data) { u8 *p = data; int i; switch (stringset) { case ETH_SS_STATS: for (i = 0; i < ARRAY_SIZE(cdc_ncm_gstrings_stats); i++) { memcpy(p, cdc_ncm_gstrings_stats[i].stat_string, ETH_GSTRING_LEN); p += ETH_GSTRING_LEN; } } } static void cdc_ncm_update_rxtx_max(struct usbnet *dev, u32 new_rx, u32 new_tx); static const struct ethtool_ops cdc_ncm_ethtool_ops = { .get_link = usbnet_get_link, .nway_reset = usbnet_nway_reset, .get_drvinfo = usbnet_get_drvinfo, .get_msglevel = usbnet_get_msglevel, .set_msglevel = usbnet_set_msglevel, .get_ts_info = ethtool_op_get_ts_info, .get_sset_count = cdc_ncm_get_sset_count, .get_strings = cdc_ncm_get_strings, .get_ethtool_stats = cdc_ncm_get_ethtool_stats, .get_link_ksettings = usbnet_get_link_ksettings_internal, .set_link_ksettings = NULL, }; static u32 cdc_ncm_check_rx_max(struct usbnet *dev, u32 new_rx) { struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; u32 val, max, min; /* clamp new_rx to sane values */ min = USB_CDC_NCM_NTB_MIN_IN_SIZE; max = min_t(u32, CDC_NCM_NTB_MAX_SIZE_RX, le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize)); /* dwNtbInMaxSize spec violation? Use MIN size for both limits */ if (max < min) { dev_warn(&dev->intf->dev, "dwNtbInMaxSize=%u is too small. Using %u\n", le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize), min); max = min; } val = clamp_t(u32, new_rx, min, max); if (val != new_rx) dev_dbg(&dev->intf->dev, "rx_max must be in the [%u, %u] range\n", min, max); return val; } static u32 cdc_ncm_check_tx_max(struct usbnet *dev, u32 new_tx) { struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; u32 val, max, min; /* clamp new_tx to sane values */ if (ctx->is_ndp16) min = ctx->max_datagram_size + ctx->max_ndp_size + sizeof(struct usb_cdc_ncm_nth16); else min = ctx->max_datagram_size + ctx->max_ndp_size + sizeof(struct usb_cdc_ncm_nth32); if (le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize) == 0) max = CDC_NCM_NTB_MAX_SIZE_TX; /* dwNtbOutMaxSize not set */ else max = clamp_t(u32, le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize), USB_CDC_NCM_NTB_MIN_OUT_SIZE, CDC_NCM_NTB_MAX_SIZE_TX); /* some devices set dwNtbOutMaxSize too low for the above default */ min = min(min, max); val = clamp_t(u32, new_tx, min, max); if (val != new_tx) dev_dbg(&dev->intf->dev, "tx_max must be in the [%u, %u] range\n", min, max); return val; } static ssize_t min_tx_pkt_show(struct device *d, struct device_attribute *attr, char *buf) { struct usbnet *dev = netdev_priv(to_net_dev(d)); struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; return sprintf(buf, "%u\n", ctx->min_tx_pkt); } static ssize_t rx_max_show(struct device *d, struct device_attribute *attr, char *buf) { struct usbnet *dev = netdev_priv(to_net_dev(d)); struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; return sprintf(buf, "%u\n", ctx->rx_max); } static ssize_t tx_max_show(struct device *d, struct device_attribute *attr, char *buf) { struct usbnet *dev = netdev_priv(to_net_dev(d)); struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; return sprintf(buf, "%u\n", ctx->tx_max); } static ssize_t tx_timer_usecs_show(struct device *d, struct device_attribute *attr, char *buf) { struct usbnet *dev = netdev_priv(to_net_dev(d)); struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; return sprintf(buf, "%u\n", ctx->timer_interval / (u32)NSEC_PER_USEC); } static ssize_t min_tx_pkt_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { struct usbnet *dev = netdev_priv(to_net_dev(d)); struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; unsigned long val; /* no need to restrict values - anything from 0 to infinity is OK */ if (kstrtoul(buf, 0, &val)) return -EINVAL; ctx->min_tx_pkt = val; return len; } static ssize_t rx_max_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { struct usbnet *dev = netdev_priv(to_net_dev(d)); struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; unsigned long val; if (kstrtoul(buf, 0, &val) || cdc_ncm_check_rx_max(dev, val) != val) return -EINVAL; cdc_ncm_update_rxtx_max(dev, val, ctx->tx_max); return len; } static ssize_t tx_max_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { struct usbnet *dev = netdev_priv(to_net_dev(d)); struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; unsigned long val; if (kstrtoul(buf, 0, &val) || cdc_ncm_check_tx_max(dev, val) != val) return -EINVAL; cdc_ncm_update_rxtx_max(dev, ctx->rx_max, val); return len; } static ssize_t tx_timer_usecs_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { struct usbnet *dev = netdev_priv(to_net_dev(d)); struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; ssize_t ret; unsigned long val; ret = kstrtoul(buf, 0, &val); if (ret) return ret; if (val && (val < CDC_NCM_TIMER_INTERVAL_MIN || val > CDC_NCM_TIMER_INTERVAL_MAX)) return -EINVAL; spin_lock_bh(&ctx->mtx); ctx->timer_interval = val * NSEC_PER_USEC; if (!ctx->timer_interval) ctx->tx_timer_pending = 0; spin_unlock_bh(&ctx->mtx); return len; } static DEVICE_ATTR_RW(min_tx_pkt); static DEVICE_ATTR_RW(rx_max); static DEVICE_ATTR_RW(tx_max); static DEVICE_ATTR_RW(tx_timer_usecs); static ssize_t ndp_to_end_show(struct device *d, struct device_attribute *attr, char *buf) { struct usbnet *dev = netdev_priv(to_net_dev(d)); struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; return sprintf(buf, "%c\n", ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END ? 'Y' : 'N'); } static ssize_t ndp_to_end_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { struct usbnet *dev = netdev_priv(to_net_dev(d)); struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; bool enable; if (kstrtobool(buf, &enable)) return -EINVAL; /* no change? */ if (enable == (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END)) return len; if (enable) { if (ctx->is_ndp16 && !ctx->delayed_ndp16) { ctx->delayed_ndp16 = kzalloc(ctx->max_ndp_size, GFP_KERNEL); if (!ctx->delayed_ndp16) return -ENOMEM; } if (!ctx->is_ndp16 && !ctx->delayed_ndp32) { ctx->delayed_ndp32 = kzalloc(ctx->max_ndp_size, GFP_KERNEL); if (!ctx->delayed_ndp32) return -ENOMEM; } } /* flush pending data before changing flag */ netif_tx_lock_bh(dev->net); usbnet_start_xmit(NULL, dev->net); spin_lock_bh(&ctx->mtx); if (enable) ctx->drvflags |= CDC_NCM_FLAG_NDP_TO_END; else ctx->drvflags &= ~CDC_NCM_FLAG_NDP_TO_END; spin_unlock_bh(&ctx->mtx); netif_tx_unlock_bh(dev->net); return len; } static DEVICE_ATTR_RW(ndp_to_end); #define NCM_PARM_ATTR(name, format, tocpu) \ static ssize_t cdc_ncm_show_##name(struct device *d, struct device_attribute *attr, char *buf) \ { \ struct usbnet *dev = netdev_priv(to_net_dev(d)); \ struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; \ return sprintf(buf, format "\n", tocpu(ctx->ncm_parm.name)); \ } \ static DEVICE_ATTR(name, 0444, cdc_ncm_show_##name, NULL) NCM_PARM_ATTR(bmNtbFormatsSupported, "0x%04x", le16_to_cpu); NCM_PARM_ATTR(dwNtbInMaxSize, "%u", le32_to_cpu); NCM_PARM_ATTR(wNdpInDivisor, "%u", le16_to_cpu); NCM_PARM_ATTR(wNdpInPayloadRemainder, "%u", le16_to_cpu); NCM_PARM_ATTR(wNdpInAlignment, "%u", le16_to_cpu); NCM_PARM_ATTR(dwNtbOutMaxSize, "%u", le32_to_cpu); NCM_PARM_ATTR(wNdpOutDivisor, "%u", le16_to_cpu); NCM_PARM_ATTR(wNdpOutPayloadRemainder, "%u", le16_to_cpu); NCM_PARM_ATTR(wNdpOutAlignment, "%u", le16_to_cpu); NCM_PARM_ATTR(wNtbOutMaxDatagrams, "%u", le16_to_cpu); static struct attribute *cdc_ncm_sysfs_attrs[] = { &dev_attr_min_tx_pkt.attr, &dev_attr_ndp_to_end.attr, &dev_attr_rx_max.attr, &dev_attr_tx_max.attr, &dev_attr_tx_timer_usecs.attr, &dev_attr_bmNtbFormatsSupported.attr, &dev_attr_dwNtbInMaxSize.attr, &dev_attr_wNdpInDivisor.attr, &dev_attr_wNdpInPayloadRemainder.attr, &dev_attr_wNdpInAlignment.attr, &dev_attr_dwNtbOutMaxSize.attr, &dev_attr_wNdpOutDivisor.attr, &dev_attr_wNdpOutPayloadRemainder.attr, &dev_attr_wNdpOutAlignment.attr, &dev_attr_wNtbOutMaxDatagrams.attr, NULL, }; static const struct attribute_group cdc_ncm_sysfs_attr_group = { .name = "cdc_ncm", .attrs = cdc_ncm_sysfs_attrs, }; /* handle rx_max and tx_max changes */ static void cdc_ncm_update_rxtx_max(struct usbnet *dev, u32 new_rx, u32 new_tx) { struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; u8 iface_no = ctx->control->cur_altsetting->desc.bInterfaceNumber; u32 val; val = cdc_ncm_check_rx_max(dev, new_rx); /* inform device about NTB input size changes */ if (val != ctx->rx_max) { __le32 dwNtbInMaxSize = cpu_to_le32(val); dev_info(&dev->intf->dev, "setting rx_max = %u\n", val); /* tell device to use new size */ if (usbnet_write_cmd(dev, USB_CDC_SET_NTB_INPUT_SIZE, USB_TYPE_CLASS | USB_DIR_OUT | USB_RECIP_INTERFACE, 0, iface_no, &dwNtbInMaxSize, 4) < 0) dev_dbg(&dev->intf->dev, "Setting NTB Input Size failed\n"); else ctx->rx_max = val; } /* usbnet use these values for sizing rx queues */ if (dev->rx_urb_size != ctx->rx_max) { dev->rx_urb_size = ctx->rx_max; if (netif_running(dev->net)) usbnet_unlink_rx_urbs(dev); } val = cdc_ncm_check_tx_max(dev, new_tx); if (val != ctx->tx_max) dev_info(&dev->intf->dev, "setting tx_max = %u\n", val); /* Adding a pad byte here if necessary simplifies the handling * in cdc_ncm_fill_tx_frame, making tx_max always represent * the real skb max size. * * We cannot use dev->maxpacket here because this is called from * .bind which is called before usbnet sets up dev->maxpacket */ if (val != le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize) && val % usb_maxpacket(dev->udev, dev->out) == 0) val++; /* we might need to flush any pending tx buffers if running */ if (netif_running(dev->net) && val > ctx->tx_max) { netif_tx_lock_bh(dev->net); usbnet_start_xmit(NULL, dev->net); /* make sure tx_curr_skb is reallocated if it was empty */ if (ctx->tx_curr_skb) { dev_kfree_skb_any(ctx->tx_curr_skb); ctx->tx_curr_skb = NULL; } ctx->tx_max = val; netif_tx_unlock_bh(dev->net); } else { ctx->tx_max = val; } dev->hard_mtu = ctx->tx_max; /* max qlen depend on hard_mtu and rx_urb_size */ usbnet_update_max_qlen(dev); /* never pad more than 3 full USB packets per transfer */ ctx->min_tx_pkt = clamp_t(u16, ctx->tx_max - 3 * usb_maxpacket(dev->udev, dev->out), CDC_NCM_MIN_TX_PKT, ctx->tx_max); } /* helpers for NCM and MBIM differences */ static u8 cdc_ncm_flags(struct usbnet *dev) { struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; if (cdc_ncm_comm_intf_is_mbim(dev->intf->cur_altsetting) && ctx->mbim_desc) return ctx->mbim_desc->bmNetworkCapabilities; if (ctx->func_desc) return ctx->func_desc->bmNetworkCapabilities; return 0; } static int cdc_ncm_eth_hlen(struct usbnet *dev) { if (cdc_ncm_comm_intf_is_mbim(dev->intf->cur_altsetting)) return 0; return ETH_HLEN; } static u32 cdc_ncm_min_dgram_size(struct usbnet *dev) { if (cdc_ncm_comm_intf_is_mbim(dev->intf->cur_altsetting)) return CDC_MBIM_MIN_DATAGRAM_SIZE; return CDC_NCM_MIN_DATAGRAM_SIZE; } static u32 cdc_ncm_max_dgram_size(struct usbnet *dev) { struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; if (cdc_ncm_comm_intf_is_mbim(dev->intf->cur_altsetting) && ctx->mbim_desc) return le16_to_cpu(ctx->mbim_desc->wMaxSegmentSize); if (ctx->ether_desc) return le16_to_cpu(ctx->ether_desc->wMaxSegmentSize); return CDC_NCM_MAX_DATAGRAM_SIZE; } /* initial one-time device setup. MUST be called with the data interface * in altsetting 0 */ static int cdc_ncm_init(struct usbnet *dev) { struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; u8 iface_no = ctx->control->cur_altsetting->desc.bInterfaceNumber; int err; err = usbnet_read_cmd(dev, USB_CDC_GET_NTB_PARAMETERS, USB_TYPE_CLASS | USB_DIR_IN |USB_RECIP_INTERFACE, 0, iface_no, &ctx->ncm_parm, sizeof(ctx->ncm_parm)); if (err < 0) { dev_err(&dev->intf->dev, "failed GET_NTB_PARAMETERS\n"); return err; /* GET_NTB_PARAMETERS is required */ } /* set CRC Mode */ if (cdc_ncm_flags(dev) & USB_CDC_NCM_NCAP_CRC_MODE) { dev_dbg(&dev->intf->dev, "Setting CRC mode off\n"); err = usbnet_write_cmd(dev, USB_CDC_SET_CRC_MODE, USB_TYPE_CLASS | USB_DIR_OUT | USB_RECIP_INTERFACE, USB_CDC_NCM_CRC_NOT_APPENDED, iface_no, NULL, 0); if (err < 0) dev_err(&dev->intf->dev, "SET_CRC_MODE failed\n"); } /* use ndp16 by default */ ctx->is_ndp16 = 1; /* set NTB format, if both formats are supported. * * "The host shall only send this command while the NCM Data * Interface is in alternate setting 0." */ if (le16_to_cpu(ctx->ncm_parm.bmNtbFormatsSupported) & USB_CDC_NCM_NTB32_SUPPORTED) { if (ctx->drvflags & CDC_NCM_FLAG_PREFER_NTB32) { ctx->is_ndp16 = 0; dev_dbg(&dev->intf->dev, "Setting NTB format to 32-bit\n"); err = usbnet_write_cmd(dev, USB_CDC_SET_NTB_FORMAT, USB_TYPE_CLASS | USB_DIR_OUT | USB_RECIP_INTERFACE, USB_CDC_NCM_NTB32_FORMAT, iface_no, NULL, 0); } else { ctx->is_ndp16 = 1; dev_dbg(&dev->intf->dev, "Setting NTB format to 16-bit\n"); err = usbnet_write_cmd(dev, USB_CDC_SET_NTB_FORMAT, USB_TYPE_CLASS | USB_DIR_OUT | USB_RECIP_INTERFACE, USB_CDC_NCM_NTB16_FORMAT, iface_no, NULL, 0); } if (err < 0) { ctx->is_ndp16 = 1; dev_err(&dev->intf->dev, "SET_NTB_FORMAT failed\n"); } } /* set initial device values */ ctx->rx_max = le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize); ctx->tx_max = le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize); ctx->tx_remainder = le16_to_cpu(ctx->ncm_parm.wNdpOutPayloadRemainder); ctx->tx_modulus = le16_to_cpu(ctx->ncm_parm.wNdpOutDivisor); ctx->tx_ndp_modulus = le16_to_cpu(ctx->ncm_parm.wNdpOutAlignment); /* devices prior to NCM Errata shall set this field to zero */ ctx->tx_max_datagrams = le16_to_cpu(ctx->ncm_parm.wNtbOutMaxDatagrams); dev_dbg(&dev->intf->dev, "dwNtbInMaxSize=%u dwNtbOutMaxSize=%u wNdpOutPayloadRemainder=%u wNdpOutDivisor=%u wNdpOutAlignment=%u wNtbOutMaxDatagrams=%u flags=0x%x\n", ctx->rx_max, ctx->tx_max, ctx->tx_remainder, ctx->tx_modulus, ctx->tx_ndp_modulus, ctx->tx_max_datagrams, cdc_ncm_flags(dev)); /* max count of tx datagrams */ if ((ctx->tx_max_datagrams == 0) || (ctx->tx_max_datagrams > CDC_NCM_DPT_DATAGRAMS_MAX)) ctx->tx_max_datagrams = CDC_NCM_DPT_DATAGRAMS_MAX; /* set up maximum NDP size */ if (ctx->is_ndp16) ctx->max_ndp_size = sizeof(struct usb_cdc_ncm_ndp16) + (ctx->tx_max_datagrams + 1) * sizeof(struct usb_cdc_ncm_dpe16); else ctx->max_ndp_size = sizeof(struct usb_cdc_ncm_ndp32) + (ctx->tx_max_datagrams + 1) * sizeof(struct usb_cdc_ncm_dpe32); /* initial coalescing timer interval */ ctx->timer_interval = CDC_NCM_TIMER_INTERVAL_USEC * NSEC_PER_USEC; return 0; } /* set a new max datagram size */ static void cdc_ncm_set_dgram_size(struct usbnet *dev, int new_size) { struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; u8 iface_no = ctx->control->cur_altsetting->desc.bInterfaceNumber; __le16 max_datagram_size; u16 mbim_mtu; int err; /* set default based on descriptors */ ctx->max_datagram_size = clamp_t(u32, new_size, cdc_ncm_min_dgram_size(dev), CDC_NCM_MAX_DATAGRAM_SIZE); /* inform the device about the selected Max Datagram Size? */ if (!(cdc_ncm_flags(dev) & USB_CDC_NCM_NCAP_MAX_DATAGRAM_SIZE)) goto out; /* read current mtu value from device */ err = usbnet_read_cmd(dev, USB_CDC_GET_MAX_DATAGRAM_SIZE, USB_TYPE_CLASS | USB_DIR_IN | USB_RECIP_INTERFACE, 0, iface_no, &max_datagram_size, sizeof(max_datagram_size)); if (err != sizeof(max_datagram_size)) { dev_dbg(&dev->intf->dev, "GET_MAX_DATAGRAM_SIZE failed\n"); goto out; } if (le16_to_cpu(max_datagram_size) == ctx->max_datagram_size) goto out; max_datagram_size = cpu_to_le16(ctx->max_datagram_size); err = usbnet_write_cmd(dev, USB_CDC_SET_MAX_DATAGRAM_SIZE, USB_TYPE_CLASS | USB_DIR_OUT | USB_RECIP_INTERFACE, 0, iface_no, &max_datagram_size, sizeof(max_datagram_size)); if (err < 0) dev_dbg(&dev->intf->dev, "SET_MAX_DATAGRAM_SIZE failed\n"); out: /* set MTU to max supported by the device if necessary */ dev->net->mtu = min_t(int, dev->net->mtu, ctx->max_datagram_size - cdc_ncm_eth_hlen(dev)); /* do not exceed operator preferred MTU */ if (ctx->mbim_extended_desc) { mbim_mtu = le16_to_cpu(ctx->mbim_extended_desc->wMTU); if (mbim_mtu != 0 && mbim_mtu < dev->net->mtu) dev->net->mtu = mbim_mtu; } } static void cdc_ncm_fix_modulus(struct usbnet *dev) { struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; u32 val; /* * verify that the structure alignment is: * - power of two * - not greater than the maximum transmit length * - not less than four bytes */ val = ctx->tx_ndp_modulus; if ((val < USB_CDC_NCM_NDP_ALIGN_MIN_SIZE) || (val != ((-val) & val)) || (val >= ctx->tx_max)) { dev_dbg(&dev->intf->dev, "Using default alignment: 4 bytes\n"); ctx->tx_ndp_modulus = USB_CDC_NCM_NDP_ALIGN_MIN_SIZE; } /* * verify that the payload alignment is: * - power of two * - not greater than the maximum transmit length * - not less than four bytes */ val = ctx->tx_modulus; if ((val < USB_CDC_NCM_NDP_ALIGN_MIN_SIZE) || (val != ((-val) & val)) || (val >= ctx->tx_max)) { dev_dbg(&dev->intf->dev, "Using default transmit modulus: 4 bytes\n"); ctx->tx_modulus = USB_CDC_NCM_NDP_ALIGN_MIN_SIZE; } /* verify the payload remainder */ if (ctx->tx_remainder >= ctx->tx_modulus) { dev_dbg(&dev->intf->dev, "Using default transmit remainder: 0 bytes\n"); ctx->tx_remainder = 0; } /* adjust TX-remainder according to NCM specification. */ ctx->tx_remainder = ((ctx->tx_remainder - cdc_ncm_eth_hlen(dev)) & (ctx->tx_modulus - 1)); } static int cdc_ncm_setup(struct usbnet *dev) { struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; u32 def_rx, def_tx; /* be conservative when selecting initial buffer size to * increase the number of hosts this will work for */ def_rx = min_t(u32, CDC_NCM_NTB_DEF_SIZE_RX, le32_to_cpu(ctx->ncm_parm.dwNtbInMaxSize)); def_tx = min_t(u32, CDC_NCM_NTB_DEF_SIZE_TX, le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize)); /* clamp rx_max and tx_max and inform device */ cdc_ncm_update_rxtx_max(dev, def_rx, def_tx); /* sanitize the modulus and remainder values */ cdc_ncm_fix_modulus(dev); /* set max datagram size */ cdc_ncm_set_dgram_size(dev, cdc_ncm_max_dgram_size(dev)); return 0; } static void cdc_ncm_find_endpoints(struct usbnet *dev, struct usb_interface *intf) { struct usb_host_endpoint *e, *in = NULL, *out = NULL; u8 ep; for (ep = 0; ep < intf->cur_altsetting->desc.bNumEndpoints; ep++) { e = intf->cur_altsetting->endpoint + ep; /* ignore endpoints which cannot transfer data */ if (!usb_endpoint_maxp(&e->desc)) continue; switch (e->desc.bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) { case USB_ENDPOINT_XFER_INT: if (usb_endpoint_dir_in(&e->desc)) { if (!dev->status) dev->status = e; } break; case USB_ENDPOINT_XFER_BULK: if (usb_endpoint_dir_in(&e->desc)) { if (!in) in = e; } else { if (!out) out = e; } break; default: break; } } if (in && !dev->in) dev->in = usb_rcvbulkpipe(dev->udev, in->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); if (out && !dev->out) dev->out = usb_sndbulkpipe(dev->udev, out->desc.bEndpointAddress & USB_ENDPOINT_NUMBER_MASK); } static void cdc_ncm_free(struct cdc_ncm_ctx *ctx) { if (ctx == NULL) return; if (ctx->tx_rem_skb != NULL) { dev_kfree_skb_any(ctx->tx_rem_skb); ctx->tx_rem_skb = NULL; } if (ctx->tx_curr_skb != NULL) { dev_kfree_skb_any(ctx->tx_curr_skb); ctx->tx_curr_skb = NULL; } if (ctx->is_ndp16) kfree(ctx->delayed_ndp16); else kfree(ctx->delayed_ndp32); kfree(ctx); } /* we need to override the usbnet change_mtu ndo for two reasons: * - respect the negotiated maximum datagram size * - avoid unwanted changes to rx and tx buffers */ int cdc_ncm_change_mtu(struct net_device *net, int new_mtu) { struct usbnet *dev = netdev_priv(net); WRITE_ONCE(net->mtu, new_mtu); cdc_ncm_set_dgram_size(dev, new_mtu + cdc_ncm_eth_hlen(dev)); return 0; } EXPORT_SYMBOL_GPL(cdc_ncm_change_mtu); static const struct net_device_ops cdc_ncm_netdev_ops = { .ndo_open = usbnet_open, .ndo_stop = usbnet_stop, .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, .ndo_set_rx_mode = usbnet_set_rx_mode, .ndo_get_stats64 = dev_get_tstats64, .ndo_change_mtu = cdc_ncm_change_mtu, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, }; int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_altsetting, int drvflags) { struct cdc_ncm_ctx *ctx; struct usb_driver *driver; u8 *buf; int len; int temp; u8 iface_no; struct usb_cdc_parsed_header hdr; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->dev = dev; hrtimer_setup(&ctx->tx_timer, &cdc_ncm_tx_timer_cb, CLOCK_MONOTONIC, HRTIMER_MODE_REL); tasklet_setup(&ctx->bh, cdc_ncm_txpath_bh); atomic_set(&ctx->stop, 0); spin_lock_init(&ctx->mtx); /* store ctx pointer in device data field */ dev->data[0] = (unsigned long)ctx; /* only the control interface can be successfully probed */ ctx->control = intf; /* get some pointers */ driver = driver_of(intf); buf = intf->cur_altsetting->extra; len = intf->cur_altsetting->extralen; /* parse through descriptors associated with control interface */ cdc_parse_cdc_header(&hdr, intf, buf, len); if (hdr.usb_cdc_union_desc) ctx->data = usb_ifnum_to_if(dev->udev, hdr.usb_cdc_union_desc->bSlaveInterface0); ctx->ether_desc = hdr.usb_cdc_ether_desc; ctx->func_desc = hdr.usb_cdc_ncm_desc; ctx->mbim_desc = hdr.usb_cdc_mbim_desc; ctx->mbim_extended_desc = hdr.usb_cdc_mbim_extended_desc; /* some buggy devices have an IAD but no CDC Union */ if (!hdr.usb_cdc_union_desc && intf->intf_assoc && intf->intf_assoc->bInterfaceCount == 2) { ctx->data = usb_ifnum_to_if(dev->udev, intf->cur_altsetting->desc.bInterfaceNumber + 1); dev_dbg(&intf->dev, "CDC Union missing - got slave from IAD\n"); } /* check if we got everything */ if (!ctx->data) { dev_err(&intf->dev, "CDC Union missing and no IAD found\n"); goto error; } if (cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting)) { if (!ctx->mbim_desc) { dev_err(&intf->dev, "MBIM functional descriptor missing\n"); goto error; } } else { if (!ctx->ether_desc || !ctx->func_desc) { dev_err(&intf->dev, "NCM or ECM functional descriptors missing\n"); goto error; } } /* claim data interface, if different from control */ if (ctx->data != ctx->control) { temp = usb_driver_claim_interface(driver, ctx->data, dev); if (temp) { dev_err(&intf->dev, "failed to claim data intf\n"); goto error; } } iface_no = ctx->data->cur_altsetting->desc.bInterfaceNumber; /* Device-specific flags */ ctx->drvflags = drvflags; /* Reset data interface. Some devices will not reset properly * unless they are configured first. Toggle the altsetting to * force a reset. * Some other devices do not work properly with this procedure * that can be avoided using quirk CDC_MBIM_FLAG_AVOID_ALTSETTING_TOGGLE */ if (!(ctx->drvflags & CDC_MBIM_FLAG_AVOID_ALTSETTING_TOGGLE)) usb_set_interface(dev->udev, iface_no, data_altsetting); temp = usb_set_interface(dev->udev, iface_no, 0); if (temp) { dev_dbg(&intf->dev, "set interface failed\n"); goto error2; } /* initialize basic device settings */ if (cdc_ncm_init(dev)) goto error2; /* Some firmwares need a pause here or they will silently fail * to set up the interface properly. This value was decided * empirically on a Sierra Wireless MC7455 running 02.08.02.00 * firmware. */ usleep_range(10000, 20000); /* configure data interface */ temp = usb_set_interface(dev->udev, iface_no, data_altsetting); if (temp) { dev_dbg(&intf->dev, "set interface failed\n"); goto error2; } cdc_ncm_find_endpoints(dev, ctx->data); cdc_ncm_find_endpoints(dev, ctx->control); if (!dev->in || !dev->out || (!dev->status && dev->driver_info->flags & FLAG_LINK_INTR)) { dev_dbg(&intf->dev, "failed to collect endpoints\n"); goto error2; } usb_set_intfdata(ctx->control, dev); if (ctx->ether_desc) { temp = usbnet_get_ethernet_addr(dev, ctx->ether_desc->iMACAddress); if (temp) { dev_err(&intf->dev, "failed to get mac address\n"); goto error2; } dev_info(&intf->dev, "MAC-Address: %pM\n", dev->net->dev_addr); } /* finish setting up the device specific data */ cdc_ncm_setup(dev); /* Allocate the delayed NDP if needed. */ if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) { if (ctx->is_ndp16) { ctx->delayed_ndp16 = kzalloc(ctx->max_ndp_size, GFP_KERNEL); if (!ctx->delayed_ndp16) goto error2; } else { ctx->delayed_ndp32 = kzalloc(ctx->max_ndp_size, GFP_KERNEL); if (!ctx->delayed_ndp32) goto error2; } dev_info(&intf->dev, "NDP will be placed at end of frame for this device."); } /* override ethtool_ops */ dev->net->ethtool_ops = &cdc_ncm_ethtool_ops; /* add our sysfs attrs */ dev->net->sysfs_groups[0] = &cdc_ncm_sysfs_attr_group; /* must handle MTU changes */ dev->net->netdev_ops = &cdc_ncm_netdev_ops; dev->net->max_mtu = cdc_ncm_max_dgram_size(dev) - cdc_ncm_eth_hlen(dev); return 0; error2: usb_set_intfdata(ctx->control, NULL); usb_set_intfdata(ctx->data, NULL); if (ctx->data != ctx->control) usb_driver_release_interface(driver, ctx->data); error: cdc_ncm_free((struct cdc_ncm_ctx *)dev->data[0]); dev->data[0] = 0; dev_info(&intf->dev, "bind() failure\n"); return -ENODEV; } EXPORT_SYMBOL_GPL(cdc_ncm_bind_common); void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf) { struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; struct usb_driver *driver = driver_of(intf); if (ctx == NULL) return; /* no setup */ atomic_set(&ctx->stop, 1); hrtimer_cancel(&ctx->tx_timer); tasklet_kill(&ctx->bh); /* handle devices with combined control and data interface */ if (ctx->control == ctx->data) ctx->data = NULL; /* disconnect master --> disconnect slave */ if (intf == ctx->control && ctx->data) { usb_set_intfdata(ctx->data, NULL); usb_driver_release_interface(driver, ctx->data); ctx->data = NULL; } else if (intf == ctx->data && ctx->control) { usb_set_intfdata(ctx->control, NULL); usb_driver_release_interface(driver, ctx->control); ctx->control = NULL; } usb_set_intfdata(intf, NULL); cdc_ncm_free(ctx); } EXPORT_SYMBOL_GPL(cdc_ncm_unbind); /* Return the number of the MBIM control interface altsetting iff it * is preferred and available, */ u8 cdc_ncm_select_altsetting(struct usb_interface *intf) { struct usb_host_interface *alt; /* The MBIM spec defines a NCM compatible default altsetting, * which we may have matched: * * "Functions that implement both NCM 1.0 and MBIM (an * “NCM/MBIM function”) according to this recommendation * shall provide two alternate settings for the * Communication Interface. Alternate setting 0, and the * associated class and endpoint descriptors, shall be * constructed according to the rules given for the * Communication Interface in section 5 of [USBNCM10]. * Alternate setting 1, and the associated class and * endpoint descriptors, shall be constructed according to * the rules given in section 6 (USB Device Model) of this * specification." */ if (intf->num_altsetting < 2) return intf->cur_altsetting->desc.bAlternateSetting; if (prefer_mbim) { alt = usb_altnum_to_altsetting(intf, CDC_NCM_COMM_ALTSETTING_MBIM); if (alt && cdc_ncm_comm_intf_is_mbim(alt)) return CDC_NCM_COMM_ALTSETTING_MBIM; } return CDC_NCM_COMM_ALTSETTING_NCM; } EXPORT_SYMBOL_GPL(cdc_ncm_select_altsetting); static int cdc_ncm_bind(struct usbnet *dev, struct usb_interface *intf) { /* MBIM backwards compatible function? */ if (cdc_ncm_select_altsetting(intf) != CDC_NCM_COMM_ALTSETTING_NCM) return -ENODEV; /* The NCM data altsetting is fixed, so we hard-coded it. * Additionally, generic NCM devices are assumed to accept arbitrarily * placed NDP. */ return cdc_ncm_bind_common(dev, intf, CDC_NCM_DATA_ALTSETTING_NCM, 0); } static void cdc_ncm_align_tail(struct sk_buff *skb, size_t modulus, size_t remainder, size_t max) { size_t align = ALIGN(skb->len, modulus) - skb->len + remainder; if (skb->len + align > max) align = max - skb->len; if (align && skb_tailroom(skb) >= align) skb_put_zero(skb, align); } /* return a pointer to a valid struct usb_cdc_ncm_ndp16 of type sign, possibly * allocating a new one within skb */ static struct usb_cdc_ncm_ndp16 *cdc_ncm_ndp16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign, size_t reserve) { struct usb_cdc_ncm_ndp16 *ndp16 = NULL; struct usb_cdc_ncm_nth16 *nth16 = (void *)skb->data; size_t ndpoffset = le16_to_cpu(nth16->wNdpIndex); /* If NDP should be moved to the end of the NCM package, we can't follow the * NTH16 header as we would normally do. NDP isn't written to the SKB yet, and * the wNdpIndex field in the header is actually not consistent with reality. It will be later. */ if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) { if (ctx->delayed_ndp16->dwSignature == sign) return ctx->delayed_ndp16; /* We can only push a single NDP to the end. Return * NULL to send what we've already got and queue this * skb for later. */ else if (ctx->delayed_ndp16->dwSignature) return NULL; } /* follow the chain of NDPs, looking for a match */ while (ndpoffset) { ndp16 = (struct usb_cdc_ncm_ndp16 *)(skb->data + ndpoffset); if (ndp16->dwSignature == sign) return ndp16; ndpoffset = le16_to_cpu(ndp16->wNextNdpIndex); } /* align new NDP */ if (!(ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END)) cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_curr_size); /* verify that there is room for the NDP and the datagram (reserve) */ if ((ctx->tx_curr_size - skb->len - reserve) < ctx->max_ndp_size) return NULL; /* link to it */ if (ndp16) ndp16->wNextNdpIndex = cpu_to_le16(skb->len); else nth16->wNdpIndex = cpu_to_le16(skb->len); /* push a new empty NDP */ if (!(ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END)) ndp16 = skb_put_zero(skb, ctx->max_ndp_size); else ndp16 = ctx->delayed_ndp16; ndp16->dwSignature = sign; ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16) + sizeof(struct usb_cdc_ncm_dpe16)); return ndp16; } static struct usb_cdc_ncm_ndp32 *cdc_ncm_ndp32(struct cdc_ncm_ctx *ctx, struct sk_buff *skb, __le32 sign, size_t reserve) { struct usb_cdc_ncm_ndp32 *ndp32 = NULL; struct usb_cdc_ncm_nth32 *nth32 = (void *)skb->data; size_t ndpoffset = le32_to_cpu(nth32->dwNdpIndex); /* If NDP should be moved to the end of the NCM package, we can't follow the * NTH32 header as we would normally do. NDP isn't written to the SKB yet, and * the wNdpIndex field in the header is actually not consistent with reality. It will be later. */ if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) { if (ctx->delayed_ndp32->dwSignature == sign) return ctx->delayed_ndp32; /* We can only push a single NDP to the end. Return * NULL to send what we've already got and queue this * skb for later. */ else if (ctx->delayed_ndp32->dwSignature) return NULL; } /* follow the chain of NDPs, looking for a match */ while (ndpoffset) { ndp32 = (struct usb_cdc_ncm_ndp32 *)(skb->data + ndpoffset); if (ndp32->dwSignature == sign) return ndp32; ndpoffset = le32_to_cpu(ndp32->dwNextNdpIndex); } /* align new NDP */ if (!(ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END)) cdc_ncm_align_tail(skb, ctx->tx_ndp_modulus, 0, ctx->tx_curr_size); /* verify that there is room for the NDP and the datagram (reserve) */ if ((ctx->tx_curr_size - skb->len - reserve) < ctx->max_ndp_size) return NULL; /* link to it */ if (ndp32) ndp32->dwNextNdpIndex = cpu_to_le32(skb->len); else nth32->dwNdpIndex = cpu_to_le32(skb->len); /* push a new empty NDP */ if (!(ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END)) ndp32 = skb_put_zero(skb, ctx->max_ndp_size); else ndp32 = ctx->delayed_ndp32; ndp32->dwSignature = sign; ndp32->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp32) + sizeof(struct usb_cdc_ncm_dpe32)); return ndp32; } struct sk_buff * cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign) { struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; union { struct usb_cdc_ncm_nth16 *nth16; struct usb_cdc_ncm_nth32 *nth32; } nth; union { struct usb_cdc_ncm_ndp16 *ndp16; struct usb_cdc_ncm_ndp32 *ndp32; } ndp; struct sk_buff *skb_out; u16 n = 0, index, ndplen; u8 ready2send = 0; u32 delayed_ndp_size; size_t padding_count; /* When our NDP gets written in cdc_ncm_ndp(), then skb_out->len gets updated * accordingly. Otherwise, we should check here. */ if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) delayed_ndp_size = ctx->max_ndp_size + max_t(u32, ctx->tx_ndp_modulus, ctx->tx_modulus + ctx->tx_remainder) - 1; else delayed_ndp_size = 0; /* if there is a remaining skb, it gets priority */ if (skb != NULL) { swap(skb, ctx->tx_rem_skb); swap(sign, ctx->tx_rem_sign); } else { ready2send = 1; } /* check if we are resuming an OUT skb */ skb_out = ctx->tx_curr_skb; /* allocate a new OUT skb */ if (!skb_out) { if (ctx->tx_low_mem_val == 0) { ctx->tx_curr_size = ctx->tx_max; skb_out = alloc_skb(ctx->tx_curr_size, GFP_ATOMIC); /* If the memory allocation fails we will wait longer * each time before attempting another full size * allocation again to not overload the system * further. */ if (skb_out == NULL) { /* If even the smallest allocation fails, abort. */ if (ctx->tx_curr_size == USB_CDC_NCM_NTB_MIN_OUT_SIZE) goto alloc_failed; ctx->tx_low_mem_max_cnt = min(ctx->tx_low_mem_max_cnt + 1, (unsigned)CDC_NCM_LOW_MEM_MAX_CNT); ctx->tx_low_mem_val = ctx->tx_low_mem_max_cnt; } } if (skb_out == NULL) { /* See if a very small allocation is possible. * We will send this packet immediately and hope * that there is more memory available later. */ if (skb) ctx->tx_curr_size = max(skb->len, (u32)USB_CDC_NCM_NTB_MIN_OUT_SIZE); else ctx->tx_curr_size = USB_CDC_NCM_NTB_MIN_OUT_SIZE; skb_out = alloc_skb(ctx->tx_curr_size, GFP_ATOMIC); /* No allocation possible so we will abort */ if (!skb_out) goto alloc_failed; ctx->tx_low_mem_val--; } if (ctx->is_ndp16) { /* fill out the initial 16-bit NTB header */ nth.nth16 = skb_put_zero(skb_out, sizeof(struct usb_cdc_ncm_nth16)); nth.nth16->dwSignature = cpu_to_le32(USB_CDC_NCM_NTH16_SIGN); nth.nth16->wHeaderLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16)); nth.nth16->wSequence = cpu_to_le16(ctx->tx_seq++); } else { /* fill out the initial 32-bit NTB header */ nth.nth32 = skb_put_zero(skb_out, sizeof(struct usb_cdc_ncm_nth32)); nth.nth32->dwSignature = cpu_to_le32(USB_CDC_NCM_NTH32_SIGN); nth.nth32->wHeaderLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth32)); nth.nth32->wSequence = cpu_to_le16(ctx->tx_seq++); } /* count total number of frames in this NTB */ ctx->tx_curr_frame_num = 0; /* recent payload counter for this skb_out */ ctx->tx_curr_frame_payload = 0; } for (n = ctx->tx_curr_frame_num; n < ctx->tx_max_datagrams; n++) { /* send any remaining skb first */ if (skb == NULL) { skb = ctx->tx_rem_skb; sign = ctx->tx_rem_sign; ctx->tx_rem_skb = NULL; /* check for end of skb */ if (skb == NULL) break; } /* get the appropriate NDP for this skb */ if (ctx->is_ndp16) ndp.ndp16 = cdc_ncm_ndp16(ctx, skb_out, sign, skb->len + ctx->tx_modulus + ctx->tx_remainder); else ndp.ndp32 = cdc_ncm_ndp32(ctx, skb_out, sign, skb->len + ctx->tx_modulus + ctx->tx_remainder); /* align beginning of next frame */ cdc_ncm_align_tail(skb_out, ctx->tx_modulus, ctx->tx_remainder, ctx->tx_curr_size); /* check if we had enough room left for both NDP and frame */ if ((ctx->is_ndp16 && !ndp.ndp16) || (!ctx->is_ndp16 && !ndp.ndp32) || skb_out->len + skb->len + delayed_ndp_size > ctx->tx_curr_size) { if (n == 0) { /* won't fit, MTU problem? */ dev_kfree_skb_any(skb); skb = NULL; dev->net->stats.tx_dropped++; } else { /* no room for skb - store for later */ if (ctx->tx_rem_skb != NULL) { dev_kfree_skb_any(ctx->tx_rem_skb); dev->net->stats.tx_dropped++; } ctx->tx_rem_skb = skb; ctx->tx_rem_sign = sign; skb = NULL; ready2send = 1; ctx->tx_reason_ntb_full++; /* count reason for transmitting */ } break; } /* calculate frame number within this NDP */ if (ctx->is_ndp16) { ndplen = le16_to_cpu(ndp.ndp16->wLength); index = (ndplen - sizeof(struct usb_cdc_ncm_ndp16)) / sizeof(struct usb_cdc_ncm_dpe16) - 1; /* OK, add this skb */ ndp.ndp16->dpe16[index].wDatagramLength = cpu_to_le16(skb->len); ndp.ndp16->dpe16[index].wDatagramIndex = cpu_to_le16(skb_out->len); ndp.ndp16->wLength = cpu_to_le16(ndplen + sizeof(struct usb_cdc_ncm_dpe16)); } else { ndplen = le16_to_cpu(ndp.ndp32->wLength); index = (ndplen - sizeof(struct usb_cdc_ncm_ndp32)) / sizeof(struct usb_cdc_ncm_dpe32) - 1; ndp.ndp32->dpe32[index].dwDatagramLength = cpu_to_le32(skb->len); ndp.ndp32->dpe32[index].dwDatagramIndex = cpu_to_le32(skb_out->len); ndp.ndp32->wLength = cpu_to_le16(ndplen + sizeof(struct usb_cdc_ncm_dpe32)); } skb_put_data(skb_out, skb->data, skb->len); ctx->tx_curr_frame_payload += skb->len; /* count real tx payload data */ dev_kfree_skb_any(skb); skb = NULL; /* send now if this NDP is full */ if (index >= CDC_NCM_DPT_DATAGRAMS_MAX) { ready2send = 1; ctx->tx_reason_ndp_full++; /* count reason for transmitting */ break; } } /* free up any dangling skb */ if (skb != NULL) { dev_kfree_skb_any(skb); skb = NULL; dev->net->stats.tx_dropped++; } ctx->tx_curr_frame_num = n; if (n == 0) { /* wait for more frames */ /* push variables */ ctx->tx_curr_skb = skb_out; goto exit_no_skb; } else if ((n < ctx->tx_max_datagrams) && (ready2send == 0) && (ctx->timer_interval > 0)) { /* wait for more frames */ /* push variables */ ctx->tx_curr_skb = skb_out; /* set the pending count */ if (n < CDC_NCM_RESTART_TIMER_DATAGRAM_CNT) ctx->tx_timer_pending = CDC_NCM_TIMER_PENDING_CNT; goto exit_no_skb; } else { if (n == ctx->tx_max_datagrams) ctx->tx_reason_max_datagram++; /* count reason for transmitting */ /* frame goes out */ /* variables will be reset at next call */ } /* If requested, put NDP at end of frame. */ if (ctx->drvflags & CDC_NCM_FLAG_NDP_TO_END) { if (ctx->is_ndp16) { nth.nth16 = (struct usb_cdc_ncm_nth16 *)skb_out->data; cdc_ncm_align_tail(skb_out, ctx->tx_ndp_modulus, 0, ctx->tx_curr_size - ctx->max_ndp_size); nth.nth16->wNdpIndex = cpu_to_le16(skb_out->len); skb_put_data(skb_out, ctx->delayed_ndp16, ctx->max_ndp_size); /* Zero out delayed NDP - signature checking will naturally fail. */ ndp.ndp16 = memset(ctx->delayed_ndp16, 0, ctx->max_ndp_size); } else { nth.nth32 = (struct usb_cdc_ncm_nth32 *)skb_out->data; cdc_ncm_align_tail(skb_out, ctx->tx_ndp_modulus, 0, ctx->tx_curr_size - ctx->max_ndp_size); nth.nth32->dwNdpIndex = cpu_to_le32(skb_out->len); skb_put_data(skb_out, ctx->delayed_ndp32, ctx->max_ndp_size); ndp.ndp32 = memset(ctx->delayed_ndp32, 0, ctx->max_ndp_size); } } /* If collected data size is less or equal ctx->min_tx_pkt * bytes, we send buffers as it is. If we get more data, it * would be more efficient for USB HS mobile device with DMA * engine to receive a full size NTB, than canceling DMA * transfer and receiving a short packet. * * This optimization support is pointless if we end up sending * a ZLP after full sized NTBs. */ if (!(dev->driver_info->flags & FLAG_SEND_ZLP) && skb_out->len > ctx->min_tx_pkt) { padding_count = ctx->tx_curr_size - skb_out->len; if (!WARN_ON(padding_count > ctx->tx_curr_size)) skb_put_zero(skb_out, padding_count); } else if (skb_out->len < ctx->tx_curr_size && (skb_out->len % dev->maxpacket) == 0) { skb_put_u8(skb_out, 0); /* force short packet */ } /* set final frame length */ if (ctx->is_ndp16) { nth.nth16 = (struct usb_cdc_ncm_nth16 *)skb_out->data; nth.nth16->wBlockLength = cpu_to_le16(skb_out->len); } else { nth.nth32 = (struct usb_cdc_ncm_nth32 *)skb_out->data; nth.nth32->dwBlockLength = cpu_to_le32(skb_out->len); } /* return skb */ ctx->tx_curr_skb = NULL; /* keep private stats: framing overhead and number of NTBs */ ctx->tx_overhead += skb_out->len - ctx->tx_curr_frame_payload; ctx->tx_ntbs++; /* usbnet will count all the framing overhead by default. * Adjust the stats so that the tx_bytes counter show real * payload data instead. */ usbnet_set_skb_tx_stats(skb_out, n, (long)ctx->tx_curr_frame_payload - skb_out->len); return skb_out; alloc_failed: if (skb) { dev_kfree_skb_any(skb); dev->net->stats.tx_dropped++; } exit_no_skb: /* Start timer, if there is a remaining non-empty skb */ if (ctx->tx_curr_skb != NULL && n > 0) cdc_ncm_tx_timeout_start(ctx); return NULL; } EXPORT_SYMBOL_GPL(cdc_ncm_fill_tx_frame); static void cdc_ncm_tx_timeout_start(struct cdc_ncm_ctx *ctx) { /* start timer, if not already started */ if (!(hrtimer_active(&ctx->tx_timer) || atomic_read(&ctx->stop))) hrtimer_start(&ctx->tx_timer, ctx->timer_interval, HRTIMER_MODE_REL); } static enum hrtimer_restart cdc_ncm_tx_timer_cb(struct hrtimer *timer) { struct cdc_ncm_ctx *ctx = container_of(timer, struct cdc_ncm_ctx, tx_timer); if (!atomic_read(&ctx->stop)) tasklet_schedule(&ctx->bh); return HRTIMER_NORESTART; } static void cdc_ncm_txpath_bh(struct tasklet_struct *t) { struct cdc_ncm_ctx *ctx = from_tasklet(ctx, t, bh); struct usbnet *dev = ctx->dev; spin_lock(&ctx->mtx); if (ctx->tx_timer_pending != 0) { ctx->tx_timer_pending--; cdc_ncm_tx_timeout_start(ctx); spin_unlock(&ctx->mtx); } else if (dev->net != NULL) { ctx->tx_reason_timeout++; /* count reason for transmitting */ spin_unlock(&ctx->mtx); netif_tx_lock_bh(dev->net); usbnet_start_xmit(NULL, dev->net); netif_tx_unlock_bh(dev->net); } else { spin_unlock(&ctx->mtx); } } struct sk_buff * cdc_ncm_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) { struct sk_buff *skb_out; struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; /* * The Ethernet API we are using does not support transmitting * multiple Ethernet frames in a single call. This driver will * accumulate multiple Ethernet frames and send out a larger * USB frame when the USB buffer is full or when a single jiffies * timeout happens. */ if (ctx == NULL) goto error; spin_lock_bh(&ctx->mtx); if (ctx->is_ndp16) skb_out = cdc_ncm_fill_tx_frame(dev, skb, cpu_to_le32(USB_CDC_NCM_NDP16_NOCRC_SIGN)); else skb_out = cdc_ncm_fill_tx_frame(dev, skb, cpu_to_le32(USB_CDC_NCM_NDP32_NOCRC_SIGN)); spin_unlock_bh(&ctx->mtx); return skb_out; error: if (skb != NULL) dev_kfree_skb_any(skb); return NULL; } EXPORT_SYMBOL_GPL(cdc_ncm_tx_fixup); /* verify NTB header and return offset of first NDP, or negative error */ int cdc_ncm_rx_verify_nth16(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in) { struct usbnet *dev = netdev_priv(skb_in->dev); struct usb_cdc_ncm_nth16 *nth16; int len; int ret = -EINVAL; if (ctx == NULL) goto error; if (skb_in->len < (sizeof(struct usb_cdc_ncm_nth16) + sizeof(struct usb_cdc_ncm_ndp16))) { netif_dbg(dev, rx_err, dev->net, "frame too short\n"); goto error; } nth16 = (struct usb_cdc_ncm_nth16 *)skb_in->data; if (nth16->dwSignature != cpu_to_le32(USB_CDC_NCM_NTH16_SIGN)) { netif_dbg(dev, rx_err, dev->net, "invalid NTH16 signature <%#010x>\n", le32_to_cpu(nth16->dwSignature)); goto error; } len = le16_to_cpu(nth16->wBlockLength); if (len > ctx->rx_max) { netif_dbg(dev, rx_err, dev->net, "unsupported NTB block length %u/%u\n", len, ctx->rx_max); goto error; } if ((ctx->rx_seq + 1) != le16_to_cpu(nth16->wSequence) && (ctx->rx_seq || le16_to_cpu(nth16->wSequence)) && !((ctx->rx_seq == 0xffff) && !le16_to_cpu(nth16->wSequence))) { netif_dbg(dev, rx_err, dev->net, "sequence number glitch prev=%d curr=%d\n", ctx->rx_seq, le16_to_cpu(nth16->wSequence)); } ctx->rx_seq = le16_to_cpu(nth16->wSequence); ret = le16_to_cpu(nth16->wNdpIndex); error: return ret; } EXPORT_SYMBOL_GPL(cdc_ncm_rx_verify_nth16); int cdc_ncm_rx_verify_nth32(struct cdc_ncm_ctx *ctx, struct sk_buff *skb_in) { struct usbnet *dev = netdev_priv(skb_in->dev); struct usb_cdc_ncm_nth32 *nth32; int len; int ret = -EINVAL; if (ctx == NULL) goto error; if (skb_in->len < (sizeof(struct usb_cdc_ncm_nth32) + sizeof(struct usb_cdc_ncm_ndp32))) { netif_dbg(dev, rx_err, dev->net, "frame too short\n"); goto error; } nth32 = (struct usb_cdc_ncm_nth32 *)skb_in->data; if (nth32->dwSignature != cpu_to_le32(USB_CDC_NCM_NTH32_SIGN)) { netif_dbg(dev, rx_err, dev->net, "invalid NTH32 signature <%#010x>\n", le32_to_cpu(nth32->dwSignature)); goto error; } len = le32_to_cpu(nth32->dwBlockLength); if (len > ctx->rx_max) { netif_dbg(dev, rx_err, dev->net, "unsupported NTB block length %u/%u\n", len, ctx->rx_max); goto error; } if ((ctx->rx_seq + 1) != le16_to_cpu(nth32->wSequence) && (ctx->rx_seq || le16_to_cpu(nth32->wSequence)) && !((ctx->rx_seq == 0xffff) && !le16_to_cpu(nth32->wSequence))) { netif_dbg(dev, rx_err, dev->net, "sequence number glitch prev=%d curr=%d\n", ctx->rx_seq, le16_to_cpu(nth32->wSequence)); } ctx->rx_seq = le16_to_cpu(nth32->wSequence); ret = le32_to_cpu(nth32->dwNdpIndex); error: return ret; } EXPORT_SYMBOL_GPL(cdc_ncm_rx_verify_nth32); /* verify NDP header and return number of datagrams, or negative error */ int cdc_ncm_rx_verify_ndp16(struct sk_buff *skb_in, int ndpoffset) { struct usbnet *dev = netdev_priv(skb_in->dev); struct usb_cdc_ncm_ndp16 *ndp16; int ret = -EINVAL; if ((ndpoffset + sizeof(struct usb_cdc_ncm_ndp16)) > skb_in->len) { netif_dbg(dev, rx_err, dev->net, "invalid NDP offset <%u>\n", ndpoffset); goto error; } ndp16 = (struct usb_cdc_ncm_ndp16 *)(skb_in->data + ndpoffset); if (le16_to_cpu(ndp16->wLength) < USB_CDC_NCM_NDP16_LENGTH_MIN) { netif_dbg(dev, rx_err, dev->net, "invalid DPT16 length <%u>\n", le16_to_cpu(ndp16->wLength)); goto error; } ret = ((le16_to_cpu(ndp16->wLength) - sizeof(struct usb_cdc_ncm_ndp16)) / sizeof(struct usb_cdc_ncm_dpe16)); ret--; /* we process NDP entries except for the last one */ if ((sizeof(struct usb_cdc_ncm_ndp16) + ret * (sizeof(struct usb_cdc_ncm_dpe16))) > skb_in->len) { netif_dbg(dev, rx_err, dev->net, "Invalid nframes = %d\n", ret); ret = -EINVAL; } error: return ret; } EXPORT_SYMBOL_GPL(cdc_ncm_rx_verify_ndp16); /* verify NDP header and return number of datagrams, or negative error */ int cdc_ncm_rx_verify_ndp32(struct sk_buff *skb_in, int ndpoffset) { struct usbnet *dev = netdev_priv(skb_in->dev); struct usb_cdc_ncm_ndp32 *ndp32; int ret = -EINVAL; if ((ndpoffset + sizeof(struct usb_cdc_ncm_ndp32)) > skb_in->len) { netif_dbg(dev, rx_err, dev->net, "invalid NDP offset <%u>\n", ndpoffset); goto error; } ndp32 = (struct usb_cdc_ncm_ndp32 *)(skb_in->data + ndpoffset); if (le16_to_cpu(ndp32->wLength) < USB_CDC_NCM_NDP32_LENGTH_MIN) { netif_dbg(dev, rx_err, dev->net, "invalid DPT32 length <%u>\n", le16_to_cpu(ndp32->wLength)); goto error; } ret = ((le16_to_cpu(ndp32->wLength) - sizeof(struct usb_cdc_ncm_ndp32)) / sizeof(struct usb_cdc_ncm_dpe32)); ret--; /* we process NDP entries except for the last one */ if ((sizeof(struct usb_cdc_ncm_ndp32) + ret * (sizeof(struct usb_cdc_ncm_dpe32))) > skb_in->len) { netif_dbg(dev, rx_err, dev->net, "Invalid nframes = %d\n", ret); ret = -EINVAL; } error: return ret; } EXPORT_SYMBOL_GPL(cdc_ncm_rx_verify_ndp32); int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in) { struct sk_buff *skb; struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; unsigned int len; int nframes; int x; unsigned int offset; union { struct usb_cdc_ncm_ndp16 *ndp16; struct usb_cdc_ncm_ndp32 *ndp32; } ndp; union { struct usb_cdc_ncm_dpe16 *dpe16; struct usb_cdc_ncm_dpe32 *dpe32; } dpe; int ndpoffset; int loopcount = 50; /* arbitrary max preventing infinite loop */ u32 payload = 0; if (ctx->is_ndp16) ndpoffset = cdc_ncm_rx_verify_nth16(ctx, skb_in); else ndpoffset = cdc_ncm_rx_verify_nth32(ctx, skb_in); if (ndpoffset < 0) goto error; next_ndp: if (ctx->is_ndp16) { nframes = cdc_ncm_rx_verify_ndp16(skb_in, ndpoffset); if (nframes < 0) goto error; ndp.ndp16 = (struct usb_cdc_ncm_ndp16 *)(skb_in->data + ndpoffset); if (ndp.ndp16->dwSignature != cpu_to_le32(USB_CDC_NCM_NDP16_NOCRC_SIGN)) { netif_dbg(dev, rx_err, dev->net, "invalid DPT16 signature <%#010x>\n", le32_to_cpu(ndp.ndp16->dwSignature)); goto err_ndp; } dpe.dpe16 = ndp.ndp16->dpe16; } else { nframes = cdc_ncm_rx_verify_ndp32(skb_in, ndpoffset); if (nframes < 0) goto error; ndp.ndp32 = (struct usb_cdc_ncm_ndp32 *)(skb_in->data + ndpoffset); if (ndp.ndp32->dwSignature != cpu_to_le32(USB_CDC_NCM_NDP32_NOCRC_SIGN)) { netif_dbg(dev, rx_err, dev->net, "invalid DPT32 signature <%#010x>\n", le32_to_cpu(ndp.ndp32->dwSignature)); goto err_ndp; } dpe.dpe32 = ndp.ndp32->dpe32; } for (x = 0; x < nframes; x++) { if (ctx->is_ndp16) { offset = le16_to_cpu(dpe.dpe16->wDatagramIndex); len = le16_to_cpu(dpe.dpe16->wDatagramLength); } else { offset = le32_to_cpu(dpe.dpe32->dwDatagramIndex); len = le32_to_cpu(dpe.dpe32->dwDatagramLength); } /* * CDC NCM ch. 3.7 * All entries after first NULL entry are to be ignored */ if ((offset == 0) || (len == 0)) { if (!x) goto err_ndp; /* empty NTB */ break; } /* sanity checking - watch out for integer wrap*/ if ((offset > skb_in->len) || (len > skb_in->len - offset) || (len > ctx->rx_max) || (len < ETH_HLEN)) { netif_dbg(dev, rx_err, dev->net, "invalid frame detected (ignored) offset[%u]=%u, length=%u, skb=%p\n", x, offset, len, skb_in); if (!x) goto err_ndp; break; } else { /* create a fresh copy to reduce truesize */ skb = netdev_alloc_skb_ip_align(dev->net, len); if (!skb) goto error; skb_put_data(skb, skb_in->data + offset, len); usbnet_skb_return(dev, skb); payload += len; /* count payload bytes in this NTB */ } if (ctx->is_ndp16) dpe.dpe16++; else dpe.dpe32++; } err_ndp: /* are there more NDPs to process? */ if (ctx->is_ndp16) ndpoffset = le16_to_cpu(ndp.ndp16->wNextNdpIndex); else ndpoffset = le32_to_cpu(ndp.ndp32->dwNextNdpIndex); if (ndpoffset && loopcount--) goto next_ndp; /* update stats */ ctx->rx_overhead += skb_in->len - payload; ctx->rx_ntbs++; return 1; error: return 0; } EXPORT_SYMBOL_GPL(cdc_ncm_rx_fixup); static void cdc_ncm_speed_change(struct usbnet *dev, struct usb_cdc_speed_change *data) { /* RTL8156 shipped before 2021 sends notification about every 32ms. */ dev->rx_speed = le32_to_cpu(data->DLBitRRate); dev->tx_speed = le32_to_cpu(data->ULBitRate); } static void cdc_ncm_status(struct usbnet *dev, struct urb *urb) { struct usb_cdc_notification *event; if (urb->actual_length < sizeof(*event)) return; /* test for split data in 8-byte chunks */ if (test_and_clear_bit(EVENT_STS_SPLIT, &dev->flags)) { cdc_ncm_speed_change(dev, (struct usb_cdc_speed_change *)urb->transfer_buffer); return; } event = urb->transfer_buffer; switch (event->bNotificationType) { case USB_CDC_NOTIFY_NETWORK_CONNECTION: /* * According to the CDC NCM specification ch.7.1 * USB_CDC_NOTIFY_NETWORK_CONNECTION notification shall be * sent by device after USB_CDC_NOTIFY_SPEED_CHANGE. */ /* RTL8156 shipped before 2021 sends notification about * every 32ms. Don't forward notification if state is same. */ if (netif_carrier_ok(dev->net) != !!event->wValue) usbnet_link_change(dev, !!event->wValue, 0); break; case USB_CDC_NOTIFY_SPEED_CHANGE: if (urb->actual_length < (sizeof(*event) + sizeof(struct usb_cdc_speed_change))) set_bit(EVENT_STS_SPLIT, &dev->flags); else cdc_ncm_speed_change(dev, (struct usb_cdc_speed_change *)&event[1]); break; default: dev_dbg(&dev->udev->dev, "NCM: unexpected notification 0x%02x!\n", event->bNotificationType); break; } } static const struct driver_info cdc_ncm_info = { .description = "CDC NCM (NO ZLP)", .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET | FLAG_LINK_INTR | FLAG_ETHER, .bind = cdc_ncm_bind, .unbind = cdc_ncm_unbind, .manage_power = usbnet_manage_power, .status = cdc_ncm_status, .rx_fixup = cdc_ncm_rx_fixup, .tx_fixup = cdc_ncm_tx_fixup, .set_rx_mode = usbnet_cdc_update_filter, }; /* Same as cdc_ncm_info, but with FLAG_SEND_ZLP */ static const struct driver_info cdc_ncm_zlp_info = { .description = "CDC NCM (SEND ZLP)", .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET | FLAG_LINK_INTR | FLAG_ETHER | FLAG_SEND_ZLP, .bind = cdc_ncm_bind, .unbind = cdc_ncm_unbind, .manage_power = usbnet_manage_power, .status = cdc_ncm_status, .rx_fixup = cdc_ncm_rx_fixup, .tx_fixup = cdc_ncm_tx_fixup, .set_rx_mode = usbnet_cdc_update_filter, }; /* Same as cdc_ncm_info, but with FLAG_SEND_ZLP */ static const struct driver_info apple_tethering_interface_info = { .description = "CDC NCM (Apple Tethering)", .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET | FLAG_LINK_INTR | FLAG_ETHER | FLAG_SEND_ZLP, .bind = cdc_ncm_bind, .unbind = cdc_ncm_unbind, .manage_power = usbnet_manage_power, .status = cdc_ncm_status, .rx_fixup = cdc_ncm_rx_fixup, .tx_fixup = cdc_ncm_tx_fixup, .set_rx_mode = usbnet_cdc_update_filter, }; /* Same as apple_tethering_interface_info, but without FLAG_LINK_INTR */ static const struct driver_info apple_private_interface_info = { .description = "CDC NCM (Apple Private)", .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET | FLAG_ETHER | FLAG_SEND_ZLP, .bind = cdc_ncm_bind, .unbind = cdc_ncm_unbind, .manage_power = usbnet_manage_power, .status = cdc_ncm_status, .rx_fixup = cdc_ncm_rx_fixup, .tx_fixup = cdc_ncm_tx_fixup, .set_rx_mode = usbnet_cdc_update_filter, }; /* Same as cdc_ncm_info, but with FLAG_WWAN */ static const struct driver_info wwan_info = { .description = "Mobile Broadband Network Device", .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET | FLAG_LINK_INTR | FLAG_WWAN, .bind = cdc_ncm_bind, .unbind = cdc_ncm_unbind, .manage_power = usbnet_manage_power, .status = cdc_ncm_status, .rx_fixup = cdc_ncm_rx_fixup, .tx_fixup = cdc_ncm_tx_fixup, .set_rx_mode = usbnet_cdc_update_filter, }; /* Same as wwan_info, but with FLAG_NOARP */ static const struct driver_info wwan_noarp_info = { .description = "Mobile Broadband Network Device (NO ARP)", .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET | FLAG_LINK_INTR | FLAG_WWAN | FLAG_NOARP, .bind = cdc_ncm_bind, .unbind = cdc_ncm_unbind, .manage_power = usbnet_manage_power, .status = cdc_ncm_status, .rx_fixup = cdc_ncm_rx_fixup, .tx_fixup = cdc_ncm_tx_fixup, .set_rx_mode = usbnet_cdc_update_filter, }; static const struct usb_device_id cdc_devs[] = { /* iPhone */ { USB_DEVICE_INTERFACE_NUMBER(0x05ac, 0x12a8, 2), .driver_info = (unsigned long)&apple_tethering_interface_info, }, { USB_DEVICE_INTERFACE_NUMBER(0x05ac, 0x12a8, 4), .driver_info = (unsigned long)&apple_private_interface_info, }, /* iPad */ { USB_DEVICE_INTERFACE_NUMBER(0x05ac, 0x12ab, 2), .driver_info = (unsigned long)&apple_tethering_interface_info, }, { USB_DEVICE_INTERFACE_NUMBER(0x05ac, 0x12ab, 4), .driver_info = (unsigned long)&apple_private_interface_info, }, /* Ericsson MBM devices like F5521gw */ { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_VENDOR, .idVendor = 0x0bdb, .bInterfaceClass = USB_CLASS_COMM, .bInterfaceSubClass = USB_CDC_SUBCLASS_NCM, .bInterfaceProtocol = USB_CDC_PROTO_NONE, .driver_info = (unsigned long) &wwan_info, }, /* Telit LE910 V2 */ { USB_DEVICE_AND_INTERFACE_INFO(0x1bc7, 0x0036, USB_CLASS_COMM, USB_CDC_SUBCLASS_NCM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_noarp_info, }, /* DW5812 LTE Verizon Mobile Broadband Card * Unlike DW5550 this device requires FLAG_NOARP */ { USB_DEVICE_AND_INTERFACE_INFO(0x413c, 0x81bb, USB_CLASS_COMM, USB_CDC_SUBCLASS_NCM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_noarp_info, }, /* DW5813 LTE AT&T Mobile Broadband Card * Unlike DW5550 this device requires FLAG_NOARP */ { USB_DEVICE_AND_INTERFACE_INFO(0x413c, 0x81bc, USB_CLASS_COMM, USB_CDC_SUBCLASS_NCM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_noarp_info, }, /* Dell branded MBM devices like DW5550 */ { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_VENDOR, .idVendor = 0x413c, .bInterfaceClass = USB_CLASS_COMM, .bInterfaceSubClass = USB_CDC_SUBCLASS_NCM, .bInterfaceProtocol = USB_CDC_PROTO_NONE, .driver_info = (unsigned long) &wwan_info, }, /* Toshiba branded MBM devices */ { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_VENDOR, .idVendor = 0x0930, .bInterfaceClass = USB_CLASS_COMM, .bInterfaceSubClass = USB_CDC_SUBCLASS_NCM, .bInterfaceProtocol = USB_CDC_PROTO_NONE, .driver_info = (unsigned long) &wwan_info, }, /* tag Huawei devices as wwan */ { USB_VENDOR_AND_INTERFACE_INFO(0x12d1, USB_CLASS_COMM, USB_CDC_SUBCLASS_NCM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, /* Infineon(now Intel) HSPA Modem platform */ { USB_DEVICE_AND_INTERFACE_INFO(0x1519, 0x0443, USB_CLASS_COMM, USB_CDC_SUBCLASS_NCM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_noarp_info, }, /* u-blox TOBY-L4 */ { USB_DEVICE_AND_INTERFACE_INFO(0x1546, 0x1010, USB_CLASS_COMM, USB_CDC_SUBCLASS_NCM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&wwan_info, }, /* DisplayLink docking stations */ { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_VENDOR, .idVendor = 0x17e9, .bInterfaceClass = USB_CLASS_COMM, .bInterfaceSubClass = USB_CDC_SUBCLASS_NCM, .bInterfaceProtocol = USB_CDC_PROTO_NONE, .driver_info = (unsigned long)&cdc_ncm_zlp_info, }, /* Generic CDC-NCM devices */ { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_NCM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&cdc_ncm_info, }, { }, }; MODULE_DEVICE_TABLE(usb, cdc_devs); static struct usb_driver cdc_ncm_driver = { .name = "cdc_ncm", .id_table = cdc_devs, .probe = usbnet_probe, .disconnect = usbnet_disconnect, .suspend = usbnet_suspend, .resume = usbnet_resume, .reset_resume = usbnet_resume, .supports_autosuspend = 1, .disable_hub_initiated_lpm = 1, }; module_usb_driver(cdc_ncm_driver); MODULE_AUTHOR("Hans Petter Selasky"); MODULE_DESCRIPTION("USB CDC NCM host driver"); MODULE_LICENSE("Dual BSD/GPL"); |
1 1 1 1 1 1 1 1 1 1 2 1 1 2 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 | // SPDX-License-Identifier: GPL-2.0+ /* * USB IR Dongle driver * * Copyright (C) 2001-2002 Greg Kroah-Hartman (greg@kroah.com) * Copyright (C) 2002 Gary Brubaker (xavyer@ix.netcom.com) * Copyright (C) 2010 Johan Hovold (jhovold@gmail.com) * * This driver allows a USB IrDA device to be used as a "dumb" serial device. * This can be useful if you do not have access to a full IrDA stack on the * other side of the connection. If you do have an IrDA stack on both devices, * please use the usb-irda driver, as it contains the proper error checking and * other goodness of a full IrDA stack. * * Portions of this driver were taken from drivers/net/irda/irda-usb.c, which * was written by Roman Weissgaerber <weissg@vienna.at>, Dag Brattli * <dag@brattli.net>, and Jean Tourrilhes <jt@hpl.hp.com> * * See Documentation/usb/usb-serial.rst for more information on using this * driver */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/tty_driver.h> #include <linux/tty_flip.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/uaccess.h> #include <linux/usb.h> #include <linux/usb/serial.h> #include <linux/usb/irda.h> #define DRIVER_AUTHOR "Greg Kroah-Hartman <greg@kroah.com>, Johan Hovold <jhovold@gmail.com>" #define DRIVER_DESC "USB IR Dongle driver" /* if overridden by the user, then use their value for the size of the read and * write urbs */ static int buffer_size; /* if overridden by the user, then use the specified number of XBOFs */ static int xbof = -1; static int ir_startup (struct usb_serial *serial); static int ir_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count); static unsigned int ir_write_room(struct tty_struct *tty); static void ir_write_bulk_callback(struct urb *urb); static void ir_process_read_urb(struct urb *urb); static void ir_set_termios(struct tty_struct *tty, struct usb_serial_port *port, const struct ktermios *old_termios); /* Not that this lot means you can only have one per system */ static u8 ir_baud; static u8 ir_xbof; static u8 ir_add_bof; static const struct usb_device_id ir_id_table[] = { { USB_DEVICE(0x050f, 0x0180) }, /* KC Technology, KC-180 */ { USB_DEVICE(0x08e9, 0x0100) }, /* XTNDAccess */ { USB_DEVICE(0x09c4, 0x0011) }, /* ACTiSys ACT-IR2000U */ { USB_INTERFACE_INFO(USB_CLASS_APP_SPEC, USB_SUBCLASS_IRDA, 0) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, ir_id_table); static struct usb_serial_driver ir_device = { .driver = { .name = "ir-usb", }, .description = "IR Dongle", .id_table = ir_id_table, .num_ports = 1, .num_bulk_in = 1, .num_bulk_out = 1, .set_termios = ir_set_termios, .attach = ir_startup, .write = ir_write, .write_room = ir_write_room, .write_bulk_callback = ir_write_bulk_callback, .process_read_urb = ir_process_read_urb, }; static struct usb_serial_driver * const serial_drivers[] = { &ir_device, NULL }; static inline void irda_usb_dump_class_desc(struct usb_serial *serial, struct usb_irda_cs_descriptor *desc) { struct device *dev = &serial->dev->dev; dev_dbg(dev, "bLength=%x\n", desc->bLength); dev_dbg(dev, "bDescriptorType=%x\n", desc->bDescriptorType); dev_dbg(dev, "bcdSpecRevision=%x\n", __le16_to_cpu(desc->bcdSpecRevision)); dev_dbg(dev, "bmDataSize=%x\n", desc->bmDataSize); dev_dbg(dev, "bmWindowSize=%x\n", desc->bmWindowSize); dev_dbg(dev, "bmMinTurnaroundTime=%d\n", desc->bmMinTurnaroundTime); dev_dbg(dev, "wBaudRate=%x\n", __le16_to_cpu(desc->wBaudRate)); dev_dbg(dev, "bmAdditionalBOFs=%x\n", desc->bmAdditionalBOFs); dev_dbg(dev, "bIrdaRateSniff=%x\n", desc->bIrdaRateSniff); dev_dbg(dev, "bMaxUnicastList=%x\n", desc->bMaxUnicastList); } /*------------------------------------------------------------------*/ /* * Function irda_usb_find_class_desc(dev, ifnum) * * Returns instance of IrDA class descriptor, or NULL if not found * * The class descriptor is some extra info that IrDA USB devices will * offer to us, describing their IrDA characteristics. We will use that in * irda_usb_init_qos() * * Based on the same function in drivers/net/irda/irda-usb.c */ static struct usb_irda_cs_descriptor * irda_usb_find_class_desc(struct usb_serial *serial, unsigned int ifnum) { struct usb_device *dev = serial->dev; struct usb_irda_cs_descriptor *desc; int ret; desc = kzalloc(sizeof(*desc), GFP_KERNEL); if (!desc) return NULL; ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), USB_REQ_CS_IRDA_GET_CLASS_DESC, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, 0, ifnum, desc, sizeof(*desc), 1000); dev_dbg(&serial->dev->dev, "%s - ret=%d\n", __func__, ret); if (ret < (int)sizeof(*desc)) { dev_dbg(&serial->dev->dev, "%s - class descriptor read %s (%d)\n", __func__, (ret < 0) ? "failed" : "too short", ret); goto error; } if (desc->bDescriptorType != USB_DT_CS_IRDA) { dev_dbg(&serial->dev->dev, "%s - bad class descriptor type\n", __func__); goto error; } irda_usb_dump_class_desc(serial, desc); return desc; error: kfree(desc); return NULL; } static u8 ir_xbof_change(u8 xbof) { u8 result; /* reference irda-usb.c */ switch (xbof) { case 48: result = 0x10; break; case 28: case 24: result = 0x20; break; default: case 12: result = 0x30; break; case 5: case 6: result = 0x40; break; case 3: result = 0x50; break; case 2: result = 0x60; break; case 1: result = 0x70; break; case 0: result = 0x80; break; } return(result); } static int ir_startup(struct usb_serial *serial) { struct usb_irda_cs_descriptor *irda_desc; int rates; irda_desc = irda_usb_find_class_desc(serial, 0); if (!irda_desc) { dev_err(&serial->dev->dev, "IRDA class descriptor not found, device not bound\n"); return -ENODEV; } rates = le16_to_cpu(irda_desc->wBaudRate); dev_dbg(&serial->dev->dev, "%s - Baud rates supported:%s%s%s%s%s%s%s%s%s\n", __func__, (rates & USB_IRDA_BR_2400) ? " 2400" : "", (rates & USB_IRDA_BR_9600) ? " 9600" : "", (rates & USB_IRDA_BR_19200) ? " 19200" : "", (rates & USB_IRDA_BR_38400) ? " 38400" : "", (rates & USB_IRDA_BR_57600) ? " 57600" : "", (rates & USB_IRDA_BR_115200) ? " 115200" : "", (rates & USB_IRDA_BR_576000) ? " 576000" : "", (rates & USB_IRDA_BR_1152000) ? " 1152000" : "", (rates & USB_IRDA_BR_4000000) ? " 4000000" : ""); switch (irda_desc->bmAdditionalBOFs) { case USB_IRDA_AB_48: ir_add_bof = 48; break; case USB_IRDA_AB_24: ir_add_bof = 24; break; case USB_IRDA_AB_12: ir_add_bof = 12; break; case USB_IRDA_AB_6: ir_add_bof = 6; break; case USB_IRDA_AB_3: ir_add_bof = 3; break; case USB_IRDA_AB_2: ir_add_bof = 2; break; case USB_IRDA_AB_1: ir_add_bof = 1; break; case USB_IRDA_AB_0: ir_add_bof = 0; break; default: break; } kfree(irda_desc); return 0; } static int ir_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *buf, int count) { struct urb *urb = NULL; unsigned long flags; int ret; if (port->bulk_out_size == 0) return -EINVAL; if (count == 0) return 0; count = min(count, port->bulk_out_size - 1); spin_lock_irqsave(&port->lock, flags); if (__test_and_clear_bit(0, &port->write_urbs_free)) { urb = port->write_urbs[0]; port->tx_bytes += count; } spin_unlock_irqrestore(&port->lock, flags); if (!urb) return 0; /* * The first byte of the packet we send to the device contains an * outbound header which indicates an additional number of BOFs and * a baud rate change. * * See section 5.4.2.2 of the USB IrDA spec. */ *(u8 *)urb->transfer_buffer = ir_xbof | ir_baud; memcpy(urb->transfer_buffer + 1, buf, count); urb->transfer_buffer_length = count + 1; urb->transfer_flags = URB_ZERO_PACKET; ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret) { dev_err(&port->dev, "failed to submit write urb: %d\n", ret); spin_lock_irqsave(&port->lock, flags); __set_bit(0, &port->write_urbs_free); port->tx_bytes -= count; spin_unlock_irqrestore(&port->lock, flags); return ret; } return count; } static void ir_write_bulk_callback(struct urb *urb) { struct usb_serial_port *port = urb->context; int status = urb->status; unsigned long flags; spin_lock_irqsave(&port->lock, flags); __set_bit(0, &port->write_urbs_free); port->tx_bytes -= urb->transfer_buffer_length - 1; spin_unlock_irqrestore(&port->lock, flags); switch (status) { case 0: break; case -ENOENT: case -ECONNRESET: case -ESHUTDOWN: dev_dbg(&port->dev, "write urb stopped: %d\n", status); return; case -EPIPE: dev_err(&port->dev, "write urb stopped: %d\n", status); return; default: dev_err(&port->dev, "nonzero write-urb status: %d\n", status); break; } usb_serial_port_softint(port); } static unsigned int ir_write_room(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; unsigned int count = 0; if (port->bulk_out_size == 0) return 0; if (test_bit(0, &port->write_urbs_free)) count = port->bulk_out_size - 1; return count; } static void ir_process_read_urb(struct urb *urb) { struct usb_serial_port *port = urb->context; unsigned char *data = urb->transfer_buffer; if (!urb->actual_length) return; /* * The first byte of the packet we get from the device * contains a busy indicator and baud rate change. * See section 5.4.1.2 of the USB IrDA spec. */ if (*data & 0x0f) ir_baud = *data & 0x0f; if (urb->actual_length == 1) return; tty_insert_flip_string(&port->port, data + 1, urb->actual_length - 1); tty_flip_buffer_push(&port->port); } static void ir_set_termios(struct tty_struct *tty, struct usb_serial_port *port, const struct ktermios *old_termios) { struct usb_device *udev = port->serial->dev; unsigned char *transfer_buffer; int actual_length; speed_t baud; int ir_baud; int ret; baud = tty_get_baud_rate(tty); /* * FIXME, we should compare the baud request against the * capability stated in the IR header that we got in the * startup function. */ switch (baud) { case 2400: ir_baud = USB_IRDA_LS_2400; break; case 9600: ir_baud = USB_IRDA_LS_9600; break; case 19200: ir_baud = USB_IRDA_LS_19200; break; case 38400: ir_baud = USB_IRDA_LS_38400; break; case 57600: ir_baud = USB_IRDA_LS_57600; break; case 115200: ir_baud = USB_IRDA_LS_115200; break; case 576000: ir_baud = USB_IRDA_LS_576000; break; case 1152000: ir_baud = USB_IRDA_LS_1152000; break; case 4000000: ir_baud = USB_IRDA_LS_4000000; break; default: ir_baud = USB_IRDA_LS_9600; baud = 9600; } if (xbof == -1) ir_xbof = ir_xbof_change(ir_add_bof); else ir_xbof = ir_xbof_change(xbof) ; /* Only speed changes are supported */ tty_termios_copy_hw(&tty->termios, old_termios); tty_encode_baud_rate(tty, baud, baud); /* * send the baud change out on an "empty" data packet */ transfer_buffer = kmalloc(1, GFP_KERNEL); if (!transfer_buffer) return; *transfer_buffer = ir_xbof | ir_baud; ret = usb_bulk_msg(udev, usb_sndbulkpipe(udev, port->bulk_out_endpointAddress), transfer_buffer, 1, &actual_length, 5000); if (ret || actual_length != 1) { if (!ret) ret = -EIO; dev_err(&port->dev, "failed to change line speed: %d\n", ret); } kfree(transfer_buffer); } static int __init ir_init(void) { if (buffer_size) { ir_device.bulk_in_size = buffer_size; ir_device.bulk_out_size = buffer_size; } return usb_serial_register_drivers(serial_drivers, KBUILD_MODNAME, ir_id_table); } static void __exit ir_exit(void) { usb_serial_deregister_drivers(serial_drivers); } module_init(ir_init); module_exit(ir_exit); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); module_param(xbof, int, 0); MODULE_PARM_DESC(xbof, "Force specific number of XBOFs"); module_param(buffer_size, int, 0); MODULE_PARM_DESC(buffer_size, "Size of the transfer buffers"); |
54 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_TTY_PORT_H #define _LINUX_TTY_PORT_H #include <linux/kfifo.h> #include <linux/kref.h> #include <linux/mutex.h> #include <linux/tty_buffer.h> #include <linux/wait.h> struct attribute_group; struct tty_driver; struct tty_port; struct tty_struct; /** * struct tty_port_operations -- operations on tty_port * @carrier_raised: return true if the carrier is raised on @port * @dtr_rts: raise the DTR line if @active is true, otherwise lower DTR * @shutdown: called when the last close completes or a hangup finishes IFF the * port was initialized. Do not use to free resources. Turn off the device * only. Called under the port mutex to serialize against @activate and * @shutdown. * @activate: called under the port mutex from tty_port_open(), serialized using * the port mutex. Supposed to turn on the device. * * FIXME: long term getting the tty argument *out* of this would be good * for consoles. * * @destruct: called on the final put of a port. Free resources, possibly incl. * the port itself. */ struct tty_port_operations { bool (*carrier_raised)(struct tty_port *port); void (*dtr_rts)(struct tty_port *port, bool active); void (*shutdown)(struct tty_port *port); int (*activate)(struct tty_port *port, struct tty_struct *tty); void (*destruct)(struct tty_port *port); }; struct tty_port_client_operations { size_t (*receive_buf)(struct tty_port *port, const u8 *cp, const u8 *fp, size_t count); void (*lookahead_buf)(struct tty_port *port, const u8 *cp, const u8 *fp, size_t count); void (*write_wakeup)(struct tty_port *port); }; extern const struct tty_port_client_operations tty_port_default_client_ops; /** * struct tty_port -- port level information * * @buf: buffer for this port, locked internally * @tty: back pointer to &struct tty_struct, valid only if the tty is open. Use * tty_port_tty_get() to obtain it (and tty_kref_put() to release). * @itty: internal back pointer to &struct tty_struct. Avoid this. It should be * eliminated in the long term. * @ops: tty port operations (like activate, shutdown), see &struct * tty_port_operations * @client_ops: tty port client operations (like receive_buf, write_wakeup). * By default, tty_port_default_client_ops is used. * @lock: lock protecting @tty * @blocked_open: # of procs waiting for open in tty_port_block_til_ready() * @count: usage count * @open_wait: open waiters queue (waiting e.g. for a carrier) * @delta_msr_wait: modem status change queue (waiting for MSR changes) * @flags: user TTY flags (%ASYNC_) * @iflags: internal flags (%TTY_PORT_) * @console: when set, the port is a console * @mutex: locking, for open, shutdown and other port operations * @buf_mutex: @xmit_buf alloc lock * @xmit_buf: optional xmit buffer used by some drivers * @xmit_fifo: optional xmit buffer used by some drivers * @close_delay: delay in jiffies to wait when closing the port * @closing_wait: delay in jiffies for output to be sent before closing * @drain_delay: set to zero if no pure time based drain is needed else set to * size of fifo * @kref: references counter. Reaching zero calls @ops->destruct() if non-%NULL * or frees the port otherwise. * @client_data: pointer to private data, for @client_ops * * Each device keeps its own port level information. &struct tty_port was * introduced as a common structure for such information. As every TTY device * shall have a backing tty_port structure, every driver can use these members. * * The tty port has a different lifetime to the tty so must be kept apart. * In addition be careful as tty -> port mappings are valid for the life * of the tty object but in many cases port -> tty mappings are valid only * until a hangup so don't use the wrong path. * * Tty port shall be initialized by tty_port_init() and shut down either by * tty_port_destroy() (refcounting not used), or tty_port_put() (refcounting). * * There is a lot of helpers around &struct tty_port too. To name the most * significant ones: tty_port_open(), tty_port_close() (or * tty_port_close_start() and tty_port_close_end() separately if need be), and * tty_port_hangup(). These call @ops->activate() and @ops->shutdown() as * needed. */ struct tty_port { struct tty_bufhead buf; struct tty_struct *tty; struct tty_struct *itty; const struct tty_port_operations *ops; const struct tty_port_client_operations *client_ops; spinlock_t lock; int blocked_open; int count; wait_queue_head_t open_wait; wait_queue_head_t delta_msr_wait; unsigned long flags; unsigned long iflags; unsigned char console:1; struct mutex mutex; struct mutex buf_mutex; u8 *xmit_buf; DECLARE_KFIFO_PTR(xmit_fifo, u8); unsigned int close_delay; unsigned int closing_wait; int drain_delay; struct kref kref; void *client_data; }; /* tty_port::iflags bits -- use atomic bit ops */ #define TTY_PORT_INITIALIZED 0 /* device is initialized */ #define TTY_PORT_SUSPENDED 1 /* device is suspended */ #define TTY_PORT_ACTIVE 2 /* device is open */ /* * uart drivers: use the uart_port::status field and the UPSTAT_* defines * for s/w-based flow control steering and carrier detection status */ #define TTY_PORT_CTS_FLOW 3 /* h/w flow control enabled */ #define TTY_PORT_CHECK_CD 4 /* carrier detect enabled */ #define TTY_PORT_KOPENED 5 /* device exclusively opened by kernel */ void tty_port_init(struct tty_port *port); void tty_port_link_device(struct tty_port *port, struct tty_driver *driver, unsigned index); struct device *tty_port_register_device(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device); struct device *tty_port_register_device_attr(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device, void *drvdata, const struct attribute_group **attr_grp); struct device *tty_port_register_device_serdev(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *host, struct device *parent); struct device *tty_port_register_device_attr_serdev(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *host, struct device *parent, void *drvdata, const struct attribute_group **attr_grp); void tty_port_unregister_device(struct tty_port *port, struct tty_driver *driver, unsigned index); int tty_port_alloc_xmit_buf(struct tty_port *port); void tty_port_free_xmit_buf(struct tty_port *port); void tty_port_destroy(struct tty_port *port); void tty_port_put(struct tty_port *port); static inline struct tty_port *tty_port_get(struct tty_port *port) { if (port && kref_get_unless_zero(&port->kref)) return port; return NULL; } /* If the cts flow control is enabled, return true. */ static inline bool tty_port_cts_enabled(const struct tty_port *port) { return test_bit(TTY_PORT_CTS_FLOW, &port->iflags); } static inline void tty_port_set_cts_flow(struct tty_port *port, bool val) { assign_bit(TTY_PORT_CTS_FLOW, &port->iflags, val); } static inline bool tty_port_active(const struct tty_port *port) { return test_bit(TTY_PORT_ACTIVE, &port->iflags); } static inline void tty_port_set_active(struct tty_port *port, bool val) { assign_bit(TTY_PORT_ACTIVE, &port->iflags, val); } static inline bool tty_port_check_carrier(const struct tty_port *port) { return test_bit(TTY_PORT_CHECK_CD, &port->iflags); } static inline void tty_port_set_check_carrier(struct tty_port *port, bool val) { assign_bit(TTY_PORT_CHECK_CD, &port->iflags, val); } static inline bool tty_port_suspended(const struct tty_port *port) { return test_bit(TTY_PORT_SUSPENDED, &port->iflags); } static inline void tty_port_set_suspended(struct tty_port *port, bool val) { assign_bit(TTY_PORT_SUSPENDED, &port->iflags, val); } static inline bool tty_port_initialized(const struct tty_port *port) { return test_bit(TTY_PORT_INITIALIZED, &port->iflags); } static inline void tty_port_set_initialized(struct tty_port *port, bool val) { assign_bit(TTY_PORT_INITIALIZED, &port->iflags, val); } static inline bool tty_port_kopened(const struct tty_port *port) { return test_bit(TTY_PORT_KOPENED, &port->iflags); } static inline void tty_port_set_kopened(struct tty_port *port, bool val) { assign_bit(TTY_PORT_KOPENED, &port->iflags, val); } struct tty_struct *tty_port_tty_get(struct tty_port *port); void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); bool tty_port_carrier_raised(struct tty_port *port); void tty_port_raise_dtr_rts(struct tty_port *port); void tty_port_lower_dtr_rts(struct tty_port *port); void tty_port_hangup(struct tty_port *port); void tty_port_tty_hangup(struct tty_port *port, bool check_clocal); void tty_port_tty_wakeup(struct tty_port *port); int tty_port_block_til_ready(struct tty_port *port, struct tty_struct *tty, struct file *filp); int tty_port_close_start(struct tty_port *port, struct tty_struct *tty, struct file *filp); void tty_port_close_end(struct tty_port *port, struct tty_struct *tty); void tty_port_close(struct tty_port *port, struct tty_struct *tty, struct file *filp); int tty_port_install(struct tty_port *port, struct tty_driver *driver, struct tty_struct *tty); int tty_port_open(struct tty_port *port, struct tty_struct *tty, struct file *filp); static inline int tty_port_users(struct tty_port *port) { return port->count + port->blocked_open; } #endif |
1 2 1 2 2 2 2 2 1 2 2 2 2 2 2 1 1 1 2 2 2 2 2 2 2 1 2 2 2 2 2 2 1 1 1 1 1 1 1 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 | // SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2007 Alan Stern * Copyright (C) IBM Corporation, 2009 * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com> * * Thanks to Ingo Molnar for his many suggestions. * * Authors: Alan Stern <stern@rowland.harvard.edu> * K.Prasad <prasad@linux.vnet.ibm.com> * Frederic Weisbecker <fweisbec@gmail.com> */ /* * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, * using the CPU's debug registers. * This file contains the arch-independent routines. */ #include <linux/hw_breakpoint.h> #include <linux/atomic.h> #include <linux/bug.h> #include <linux/cpu.h> #include <linux/export.h> #include <linux/init.h> #include <linux/irqflags.h> #include <linux/kdebug.h> #include <linux/kernel.h> #include <linux/mutex.h> #include <linux/notifier.h> #include <linux/percpu-rwsem.h> #include <linux/percpu.h> #include <linux/rhashtable.h> #include <linux/sched.h> #include <linux/slab.h> /* * Datastructure to track the total uses of N slots across tasks or CPUs; * bp_slots_histogram::count[N] is the number of assigned N+1 breakpoint slots. */ struct bp_slots_histogram { #ifdef hw_breakpoint_slots atomic_t count[hw_breakpoint_slots(0)]; #else atomic_t *count; #endif }; /* * Per-CPU constraints data. */ struct bp_cpuinfo { /* Number of pinned CPU breakpoints in a CPU. */ unsigned int cpu_pinned; /* Histogram of pinned task breakpoints in a CPU. */ struct bp_slots_histogram tsk_pinned; }; static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]); static struct bp_cpuinfo *get_bp_info(int cpu, enum bp_type_idx type) { return per_cpu_ptr(bp_cpuinfo + type, cpu); } /* Number of pinned CPU breakpoints globally. */ static struct bp_slots_histogram cpu_pinned[TYPE_MAX]; /* Number of pinned CPU-independent task breakpoints. */ static struct bp_slots_histogram tsk_pinned_all[TYPE_MAX]; /* Keep track of the breakpoints attached to tasks */ static struct rhltable task_bps_ht; static const struct rhashtable_params task_bps_ht_params = { .head_offset = offsetof(struct hw_perf_event, bp_list), .key_offset = offsetof(struct hw_perf_event, target), .key_len = sizeof_field(struct hw_perf_event, target), .automatic_shrinking = true, }; static bool constraints_initialized __ro_after_init; /* * Synchronizes accesses to the per-CPU constraints; the locking rules are: * * 1. Atomic updates to bp_cpuinfo::tsk_pinned only require a held read-lock * (due to bp_slots_histogram::count being atomic, no update are lost). * * 2. Holding a write-lock is required for computations that require a * stable snapshot of all bp_cpuinfo::tsk_pinned. * * 3. In all other cases, non-atomic accesses require the appropriately held * lock (read-lock for read-only accesses; write-lock for reads/writes). */ DEFINE_STATIC_PERCPU_RWSEM(bp_cpuinfo_sem); /* * Return mutex to serialize accesses to per-task lists in task_bps_ht. Since * rhltable synchronizes concurrent insertions/deletions, independent tasks may * insert/delete concurrently; therefore, a mutex per task is sufficient. * * Uses task_struct::perf_event_mutex, to avoid extending task_struct with a * hw_breakpoint-only mutex, which may be infrequently used. The caveat here is * that hw_breakpoint may contend with per-task perf event list management. The * assumption is that perf usecases involving hw_breakpoints are very unlikely * to result in unnecessary contention. */ static inline struct mutex *get_task_bps_mutex(struct perf_event *bp) { struct task_struct *tsk = bp->hw.target; return tsk ? &tsk->perf_event_mutex : NULL; } static struct mutex *bp_constraints_lock(struct perf_event *bp) { struct mutex *tsk_mtx = get_task_bps_mutex(bp); if (tsk_mtx) { /* * Fully analogous to the perf_try_init_event() nesting * argument in the comment near perf_event_ctx_lock_nested(); * this child->perf_event_mutex cannot ever deadlock against * the parent->perf_event_mutex usage from * perf_event_task_{en,dis}able(). * * Specifically, inherited events will never occur on * ->perf_event_list. */ mutex_lock_nested(tsk_mtx, SINGLE_DEPTH_NESTING); percpu_down_read(&bp_cpuinfo_sem); } else { percpu_down_write(&bp_cpuinfo_sem); } return tsk_mtx; } static void bp_constraints_unlock(struct mutex *tsk_mtx) { if (tsk_mtx) { percpu_up_read(&bp_cpuinfo_sem); mutex_unlock(tsk_mtx); } else { percpu_up_write(&bp_cpuinfo_sem); } } static bool bp_constraints_is_locked(struct perf_event *bp) { struct mutex *tsk_mtx = get_task_bps_mutex(bp); return percpu_is_write_locked(&bp_cpuinfo_sem) || (tsk_mtx ? mutex_is_locked(tsk_mtx) : percpu_is_read_locked(&bp_cpuinfo_sem)); } static inline void assert_bp_constraints_lock_held(struct perf_event *bp) { struct mutex *tsk_mtx = get_task_bps_mutex(bp); if (tsk_mtx) lockdep_assert_held(tsk_mtx); lockdep_assert_held(&bp_cpuinfo_sem); } #ifdef hw_breakpoint_slots /* * Number of breakpoint slots is constant, and the same for all types. */ static_assert(hw_breakpoint_slots(TYPE_INST) == hw_breakpoint_slots(TYPE_DATA)); static inline int hw_breakpoint_slots_cached(int type) { return hw_breakpoint_slots(type); } static inline int init_breakpoint_slots(void) { return 0; } #else /* * Dynamic number of breakpoint slots. */ static int __nr_bp_slots[TYPE_MAX] __ro_after_init; static inline int hw_breakpoint_slots_cached(int type) { return __nr_bp_slots[type]; } static __init bool bp_slots_histogram_alloc(struct bp_slots_histogram *hist, enum bp_type_idx type) { hist->count = kcalloc(hw_breakpoint_slots_cached(type), sizeof(*hist->count), GFP_KERNEL); return hist->count; } static __init void bp_slots_histogram_free(struct bp_slots_histogram *hist) { kfree(hist->count); } static __init int init_breakpoint_slots(void) { int i, cpu, err_cpu; for (i = 0; i < TYPE_MAX; i++) __nr_bp_slots[i] = hw_breakpoint_slots(i); for_each_possible_cpu(cpu) { for (i = 0; i < TYPE_MAX; i++) { struct bp_cpuinfo *info = get_bp_info(cpu, i); if (!bp_slots_histogram_alloc(&info->tsk_pinned, i)) goto err; } } for (i = 0; i < TYPE_MAX; i++) { if (!bp_slots_histogram_alloc(&cpu_pinned[i], i)) goto err; if (!bp_slots_histogram_alloc(&tsk_pinned_all[i], i)) goto err; } return 0; err: for_each_possible_cpu(err_cpu) { for (i = 0; i < TYPE_MAX; i++) bp_slots_histogram_free(&get_bp_info(err_cpu, i)->tsk_pinned); if (err_cpu == cpu) break; } for (i = 0; i < TYPE_MAX; i++) { bp_slots_histogram_free(&cpu_pinned[i]); bp_slots_histogram_free(&tsk_pinned_all[i]); } return -ENOMEM; } #endif static inline void bp_slots_histogram_add(struct bp_slots_histogram *hist, int old, int val) { const int old_idx = old - 1; const int new_idx = old_idx + val; if (old_idx >= 0) WARN_ON(atomic_dec_return_relaxed(&hist->count[old_idx]) < 0); if (new_idx >= 0) WARN_ON(atomic_inc_return_relaxed(&hist->count[new_idx]) < 0); } static int bp_slots_histogram_max(struct bp_slots_histogram *hist, enum bp_type_idx type) { for (int i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) { const int count = atomic_read(&hist->count[i]); /* Catch unexpected writers; we want a stable snapshot. */ ASSERT_EXCLUSIVE_WRITER(hist->count[i]); if (count > 0) return i + 1; WARN(count < 0, "inconsistent breakpoint slots histogram"); } return 0; } static int bp_slots_histogram_max_merge(struct bp_slots_histogram *hist1, struct bp_slots_histogram *hist2, enum bp_type_idx type) { for (int i = hw_breakpoint_slots_cached(type) - 1; i >= 0; i--) { const int count1 = atomic_read(&hist1->count[i]); const int count2 = atomic_read(&hist2->count[i]); /* Catch unexpected writers; we want a stable snapshot. */ ASSERT_EXCLUSIVE_WRITER(hist1->count[i]); ASSERT_EXCLUSIVE_WRITER(hist2->count[i]); if (count1 + count2 > 0) return i + 1; WARN(count1 < 0, "inconsistent breakpoint slots histogram"); WARN(count2 < 0, "inconsistent breakpoint slots histogram"); } return 0; } #ifndef hw_breakpoint_weight static inline int hw_breakpoint_weight(struct perf_event *bp) { return 1; } #endif static inline enum bp_type_idx find_slot_idx(u64 bp_type) { if (bp_type & HW_BREAKPOINT_RW) return TYPE_DATA; return TYPE_INST; } /* * Return the maximum number of pinned breakpoints a task has in this CPU. */ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type) { struct bp_slots_histogram *tsk_pinned = &get_bp_info(cpu, type)->tsk_pinned; /* * At this point we want to have acquired the bp_cpuinfo_sem as a * writer to ensure that there are no concurrent writers in * toggle_bp_task_slot() to tsk_pinned, and we get a stable snapshot. */ lockdep_assert_held_write(&bp_cpuinfo_sem); return bp_slots_histogram_max_merge(tsk_pinned, &tsk_pinned_all[type], type); } /* * Count the number of breakpoints of the same type and same task. * The given event must be not on the list. * * If @cpu is -1, but the result of task_bp_pinned() is not CPU-independent, * returns a negative value. */ static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type) { struct rhlist_head *head, *pos; struct perf_event *iter; int count = 0; /* * We need a stable snapshot of the per-task breakpoint list. */ assert_bp_constraints_lock_held(bp); rcu_read_lock(); head = rhltable_lookup(&task_bps_ht, &bp->hw.target, task_bps_ht_params); if (!head) goto out; rhl_for_each_entry_rcu(iter, pos, head, hw.bp_list) { if (find_slot_idx(iter->attr.bp_type) != type) continue; if (iter->cpu >= 0) { if (cpu == -1) { count = -1; goto out; } else if (cpu != iter->cpu) continue; } count += hw_breakpoint_weight(iter); } out: rcu_read_unlock(); return count; } static const struct cpumask *cpumask_of_bp(struct perf_event *bp) { if (bp->cpu >= 0) return cpumask_of(bp->cpu); return cpu_possible_mask; } /* * Returns the max pinned breakpoint slots in a given * CPU (cpu > -1) or across all of them (cpu = -1). */ static int max_bp_pinned_slots(struct perf_event *bp, enum bp_type_idx type) { const struct cpumask *cpumask = cpumask_of_bp(bp); int pinned_slots = 0; int cpu; if (bp->hw.target && bp->cpu < 0) { int max_pinned = task_bp_pinned(-1, bp, type); if (max_pinned >= 0) { /* * Fast path: task_bp_pinned() is CPU-independent and * returns the same value for any CPU. */ max_pinned += bp_slots_histogram_max(&cpu_pinned[type], type); return max_pinned; } } for_each_cpu(cpu, cpumask) { struct bp_cpuinfo *info = get_bp_info(cpu, type); int nr; nr = info->cpu_pinned; if (!bp->hw.target) nr += max_task_bp_pinned(cpu, type); else nr += task_bp_pinned(cpu, bp, type); pinned_slots = max(nr, pinned_slots); } return pinned_slots; } /* * Add/remove the given breakpoint in our constraint table */ static int toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, int weight) { int cpu, next_tsk_pinned; if (!enable) weight = -weight; if (!bp->hw.target) { /* * Update the pinned CPU slots, in per-CPU bp_cpuinfo and in the * global histogram. */ struct bp_cpuinfo *info = get_bp_info(bp->cpu, type); lockdep_assert_held_write(&bp_cpuinfo_sem); bp_slots_histogram_add(&cpu_pinned[type], info->cpu_pinned, weight); info->cpu_pinned += weight; return 0; } /* * If bp->hw.target, tsk_pinned is only modified, but not used * otherwise. We can permit concurrent updates as long as there are no * other uses: having acquired bp_cpuinfo_sem as a reader allows * concurrent updates here. Uses of tsk_pinned will require acquiring * bp_cpuinfo_sem as a writer to stabilize tsk_pinned's value. */ lockdep_assert_held_read(&bp_cpuinfo_sem); /* * Update the pinned task slots, in per-CPU bp_cpuinfo and in the global * histogram. We need to take care of 4 cases: * * 1. This breakpoint targets all CPUs (cpu < 0), and there may only * exist other task breakpoints targeting all CPUs. In this case we * can simply update the global slots histogram. * * 2. This breakpoint targets a specific CPU (cpu >= 0), but there may * only exist other task breakpoints targeting all CPUs. * * a. On enable: remove the existing breakpoints from the global * slots histogram and use the per-CPU histogram. * * b. On disable: re-insert the existing breakpoints into the global * slots histogram and remove from per-CPU histogram. * * 3. Some other existing task breakpoints target specific CPUs. Only * update the per-CPU slots histogram. */ if (!enable) { /* * Remove before updating histograms so we can determine if this * was the last task breakpoint for a specific CPU. */ int ret = rhltable_remove(&task_bps_ht, &bp->hw.bp_list, task_bps_ht_params); if (ret) return ret; } /* * Note: If !enable, next_tsk_pinned will not count the to-be-removed breakpoint. */ next_tsk_pinned = task_bp_pinned(-1, bp, type); if (next_tsk_pinned >= 0) { if (bp->cpu < 0) { /* Case 1: fast path */ if (!enable) next_tsk_pinned += hw_breakpoint_weight(bp); bp_slots_histogram_add(&tsk_pinned_all[type], next_tsk_pinned, weight); } else if (enable) { /* Case 2.a: slow path */ /* Add existing to per-CPU histograms. */ for_each_possible_cpu(cpu) { bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned, 0, next_tsk_pinned); } /* Add this first CPU-pinned task breakpoint. */ bp_slots_histogram_add(&get_bp_info(bp->cpu, type)->tsk_pinned, next_tsk_pinned, weight); /* Rebalance global task pinned histogram. */ bp_slots_histogram_add(&tsk_pinned_all[type], next_tsk_pinned, -next_tsk_pinned); } else { /* Case 2.b: slow path */ /* Remove this last CPU-pinned task breakpoint. */ bp_slots_histogram_add(&get_bp_info(bp->cpu, type)->tsk_pinned, next_tsk_pinned + hw_breakpoint_weight(bp), weight); /* Remove all from per-CPU histograms. */ for_each_possible_cpu(cpu) { bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned, next_tsk_pinned, -next_tsk_pinned); } /* Rebalance global task pinned histogram. */ bp_slots_histogram_add(&tsk_pinned_all[type], 0, next_tsk_pinned); } } else { /* Case 3: slow path */ const struct cpumask *cpumask = cpumask_of_bp(bp); for_each_cpu(cpu, cpumask) { next_tsk_pinned = task_bp_pinned(cpu, bp, type); if (!enable) next_tsk_pinned += hw_breakpoint_weight(bp); bp_slots_histogram_add(&get_bp_info(cpu, type)->tsk_pinned, next_tsk_pinned, weight); } } /* * Readers want a stable snapshot of the per-task breakpoint list. */ assert_bp_constraints_lock_held(bp); if (enable) return rhltable_insert(&task_bps_ht, &bp->hw.bp_list, task_bps_ht_params); return 0; } /* * Constraints to check before allowing this new breakpoint counter. * * Note: Flexible breakpoints are currently unimplemented, but outlined in the * below algorithm for completeness. The implementation treats flexible as * pinned due to no guarantee that we currently always schedule flexible events * before a pinned event in a same CPU. * * == Non-pinned counter == (Considered as pinned for now) * * - If attached to a single cpu, check: * * (per_cpu(info->flexible, cpu) || (per_cpu(info->cpu_pinned, cpu) * + max(per_cpu(info->tsk_pinned, cpu)))) < HBP_NUM * * -> If there are already non-pinned counters in this cpu, it means * there is already a free slot for them. * Otherwise, we check that the maximum number of per task * breakpoints (for this cpu) plus the number of per cpu breakpoint * (for this cpu) doesn't cover every registers. * * - If attached to every cpus, check: * * (per_cpu(info->flexible, *) || (max(per_cpu(info->cpu_pinned, *)) * + max(per_cpu(info->tsk_pinned, *)))) < HBP_NUM * * -> This is roughly the same, except we check the number of per cpu * bp for every cpu and we keep the max one. Same for the per tasks * breakpoints. * * * == Pinned counter == * * - If attached to a single cpu, check: * * ((per_cpu(info->flexible, cpu) > 1) + per_cpu(info->cpu_pinned, cpu) * + max(per_cpu(info->tsk_pinned, cpu))) < HBP_NUM * * -> Same checks as before. But now the info->flexible, if any, must keep * one register at least (or they will never be fed). * * - If attached to every cpus, check: * * ((per_cpu(info->flexible, *) > 1) + max(per_cpu(info->cpu_pinned, *)) * + max(per_cpu(info->tsk_pinned, *))) < HBP_NUM */ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type) { enum bp_type_idx type; int max_pinned_slots; int weight; /* We couldn't initialize breakpoint constraints on boot */ if (!constraints_initialized) return -ENOMEM; /* Basic checks */ if (bp_type == HW_BREAKPOINT_EMPTY || bp_type == HW_BREAKPOINT_INVALID) return -EINVAL; type = find_slot_idx(bp_type); weight = hw_breakpoint_weight(bp); /* Check if this new breakpoint can be satisfied across all CPUs. */ max_pinned_slots = max_bp_pinned_slots(bp, type) + weight; if (max_pinned_slots > hw_breakpoint_slots_cached(type)) return -ENOSPC; return toggle_bp_slot(bp, true, type, weight); } int reserve_bp_slot(struct perf_event *bp) { struct mutex *mtx = bp_constraints_lock(bp); int ret = __reserve_bp_slot(bp, bp->attr.bp_type); bp_constraints_unlock(mtx); return ret; } static void __release_bp_slot(struct perf_event *bp, u64 bp_type) { enum bp_type_idx type; int weight; type = find_slot_idx(bp_type); weight = hw_breakpoint_weight(bp); WARN_ON(toggle_bp_slot(bp, false, type, weight)); } void release_bp_slot(struct perf_event *bp) { struct mutex *mtx = bp_constraints_lock(bp); __release_bp_slot(bp, bp->attr.bp_type); bp_constraints_unlock(mtx); } static int __modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type) { int err; __release_bp_slot(bp, old_type); err = __reserve_bp_slot(bp, new_type); if (err) { /* * Reserve the old_type slot back in case * there's no space for the new type. * * This must succeed, because we just released * the old_type slot in the __release_bp_slot * call above. If not, something is broken. */ WARN_ON(__reserve_bp_slot(bp, old_type)); } return err; } static int modify_bp_slot(struct perf_event *bp, u64 old_type, u64 new_type) { struct mutex *mtx = bp_constraints_lock(bp); int ret = __modify_bp_slot(bp, old_type, new_type); bp_constraints_unlock(mtx); return ret; } /* * Allow the kernel debugger to reserve breakpoint slots without * taking a lock using the dbg_* variant of for the reserve and * release breakpoint slots. */ int dbg_reserve_bp_slot(struct perf_event *bp) { int ret; if (bp_constraints_is_locked(bp)) return -1; /* Locks aren't held; disable lockdep assert checking. */ lockdep_off(); ret = __reserve_bp_slot(bp, bp->attr.bp_type); lockdep_on(); return ret; } int dbg_release_bp_slot(struct perf_event *bp) { if (bp_constraints_is_locked(bp)) return -1; /* Locks aren't held; disable lockdep assert checking. */ lockdep_off(); __release_bp_slot(bp, bp->attr.bp_type); lockdep_on(); return 0; } static int hw_breakpoint_parse(struct perf_event *bp, const struct perf_event_attr *attr, struct arch_hw_breakpoint *hw) { int err; err = hw_breakpoint_arch_parse(bp, attr, hw); if (err) return err; if (arch_check_bp_in_kernelspace(hw)) { if (attr->exclude_kernel) return -EINVAL; /* * Don't let unprivileged users set a breakpoint in the trap * path to avoid trap recursion attacks. */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; } return 0; } int register_perf_hw_breakpoint(struct perf_event *bp) { struct arch_hw_breakpoint hw = { }; int err; err = reserve_bp_slot(bp); if (err) return err; err = hw_breakpoint_parse(bp, &bp->attr, &hw); if (err) { release_bp_slot(bp); return err; } bp->hw.info = hw; return 0; } /** * register_user_hw_breakpoint - register a hardware breakpoint for user space * @attr: breakpoint attributes * @triggered: callback to trigger when we hit the breakpoint * @context: context data could be used in the triggered callback * @tsk: pointer to 'task_struct' of the process to which the address belongs */ struct perf_event * register_user_hw_breakpoint(struct perf_event_attr *attr, perf_overflow_handler_t triggered, void *context, struct task_struct *tsk) { return perf_event_create_kernel_counter(attr, -1, tsk, triggered, context); } EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); static void hw_breakpoint_copy_attr(struct perf_event_attr *to, struct perf_event_attr *from) { to->bp_addr = from->bp_addr; to->bp_type = from->bp_type; to->bp_len = from->bp_len; to->disabled = from->disabled; } int modify_user_hw_breakpoint_check(struct perf_event *bp, struct perf_event_attr *attr, bool check) { struct arch_hw_breakpoint hw = { }; int err; err = hw_breakpoint_parse(bp, attr, &hw); if (err) return err; if (check) { struct perf_event_attr old_attr; old_attr = bp->attr; hw_breakpoint_copy_attr(&old_attr, attr); if (memcmp(&old_attr, attr, sizeof(*attr))) return -EINVAL; } if (bp->attr.bp_type != attr->bp_type) { err = modify_bp_slot(bp, bp->attr.bp_type, attr->bp_type); if (err) return err; } hw_breakpoint_copy_attr(&bp->attr, attr); bp->hw.info = hw; return 0; } /** * modify_user_hw_breakpoint - modify a user-space hardware breakpoint * @bp: the breakpoint structure to modify * @attr: new breakpoint attributes */ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) { int err; /* * modify_user_hw_breakpoint can be invoked with IRQs disabled and hence it * will not be possible to raise IPIs that invoke __perf_event_disable. * So call the function directly after making sure we are targeting the * current task. */ if (irqs_disabled() && bp->ctx && bp->ctx->task == current) perf_event_disable_local(bp); else perf_event_disable(bp); err = modify_user_hw_breakpoint_check(bp, attr, false); if (!bp->attr.disabled) perf_event_enable(bp); return err; } EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); /** * unregister_hw_breakpoint - unregister a user-space hardware breakpoint * @bp: the breakpoint structure to unregister */ void unregister_hw_breakpoint(struct perf_event *bp) { if (!bp) return; perf_event_release_kernel(bp); } EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); /** * register_wide_hw_breakpoint - register a wide breakpoint in the kernel * @attr: breakpoint attributes * @triggered: callback to trigger when we hit the breakpoint * @context: context data could be used in the triggered callback * * @return a set of per_cpu pointers to perf events */ struct perf_event * __percpu * register_wide_hw_breakpoint(struct perf_event_attr *attr, perf_overflow_handler_t triggered, void *context) { struct perf_event * __percpu *cpu_events, *bp; long err = 0; int cpu; cpu_events = alloc_percpu(typeof(*cpu_events)); if (!cpu_events) return ERR_PTR_PCPU(-ENOMEM); cpus_read_lock(); for_each_online_cpu(cpu) { bp = perf_event_create_kernel_counter(attr, cpu, NULL, triggered, context); if (IS_ERR(bp)) { err = PTR_ERR(bp); break; } per_cpu(*cpu_events, cpu) = bp; } cpus_read_unlock(); if (likely(!err)) return cpu_events; unregister_wide_hw_breakpoint(cpu_events); return ERR_PTR_PCPU(err); } EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); /** * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel * @cpu_events: the per cpu set of events to unregister */ void unregister_wide_hw_breakpoint(struct perf_event * __percpu *cpu_events) { int cpu; for_each_possible_cpu(cpu) unregister_hw_breakpoint(per_cpu(*cpu_events, cpu)); free_percpu(cpu_events); } EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint); /** * hw_breakpoint_is_used - check if breakpoints are currently used * * Returns: true if breakpoints are used, false otherwise. */ bool hw_breakpoint_is_used(void) { int cpu; if (!constraints_initialized) return false; for_each_possible_cpu(cpu) { for (int type = 0; type < TYPE_MAX; ++type) { struct bp_cpuinfo *info = get_bp_info(cpu, type); if (info->cpu_pinned) return true; for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) { if (atomic_read(&info->tsk_pinned.count[slot])) return true; } } } for (int type = 0; type < TYPE_MAX; ++type) { for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) { /* * Warn, because if there are CPU pinned counters, * should never get here; bp_cpuinfo::cpu_pinned should * be consistent with the global cpu_pinned histogram. */ if (WARN_ON(atomic_read(&cpu_pinned[type].count[slot]))) return true; if (atomic_read(&tsk_pinned_all[type].count[slot])) return true; } } return false; } static struct notifier_block hw_breakpoint_exceptions_nb = { .notifier_call = hw_breakpoint_exceptions_notify, /* we need to be notified first */ .priority = 0x7fffffff }; static void bp_perf_event_destroy(struct perf_event *event) { release_bp_slot(event); } static int hw_breakpoint_event_init(struct perf_event *bp) { int err; if (bp->attr.type != PERF_TYPE_BREAKPOINT) return -ENOENT; /* * Check if breakpoint type is supported before proceeding. * Also, no branch sampling for breakpoint events. */ if (!hw_breakpoint_slots_cached(find_slot_idx(bp->attr.bp_type)) || has_branch_stack(bp)) return -EOPNOTSUPP; err = register_perf_hw_breakpoint(bp); if (err) return err; bp->destroy = bp_perf_event_destroy; return 0; } static int hw_breakpoint_add(struct perf_event *bp, int flags) { if (!(flags & PERF_EF_START)) bp->hw.state = PERF_HES_STOPPED; if (is_sampling_event(bp)) { bp->hw.last_period = bp->hw.sample_period; perf_swevent_set_period(bp); } return arch_install_hw_breakpoint(bp); } static void hw_breakpoint_del(struct perf_event *bp, int flags) { arch_uninstall_hw_breakpoint(bp); } static void hw_breakpoint_start(struct perf_event *bp, int flags) { bp->hw.state = 0; } static void hw_breakpoint_stop(struct perf_event *bp, int flags) { bp->hw.state = PERF_HES_STOPPED; } static struct pmu perf_breakpoint = { .task_ctx_nr = perf_sw_context, /* could eventually get its own */ .event_init = hw_breakpoint_event_init, .add = hw_breakpoint_add, .del = hw_breakpoint_del, .start = hw_breakpoint_start, .stop = hw_breakpoint_stop, .read = hw_breakpoint_pmu_read, }; int __init init_hw_breakpoint(void) { int ret; ret = rhltable_init(&task_bps_ht, &task_bps_ht_params); if (ret) return ret; ret = init_breakpoint_slots(); if (ret) return ret; constraints_initialized = true; perf_pmu_register(&perf_breakpoint, "breakpoint", PERF_TYPE_BREAKPOINT); return register_die_notifier(&hw_breakpoint_exceptions_nb); } |
101 39 3 2 242 198 2 2 240 241 237 3 240 2 240 239 2 231 231 232 234 235 235 184 183 184 184 41 155 155 155 52 5 88 88 43 4 116 116 56 88 116 116 89 56 88 88 47 47 88 88 5 52 37 2 14 168 150 169 160 10 169 156 5 151 7 2 151 4 4 4 4 4 1 4 4 19 8 1 11 19 1 18 1 1 18 4 4 5 5 5 4 1 5 105 105 68 57 70 55 1 53 57 53 4 4 4 3 3 1 1 2 4 141 2 17 4 1 2 2 1 1 1 1 1 2 1 12 9 6 9 8 9 6 6 9 9 9 2 2 2 2 4 2 2 4 2 5 1 1 5 4 1 5 1 1 1 2 3 1 5 1 6 4 2 3 22 1 2 4 4 3 2 12 15 33 33 1 1 17 14 14 28 2 1 2 9 2 1 10 12 12 5 5 5 5 5 5 26 26 26 26 15 26 26 3 3 3 2 3 2 1 3 3 3 1 2 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Forwarding database * Linux ethernet bridge * * Authors: * Lennert Buytenhek <buytenh@gnu.org> */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/rculist.h> #include <linux/spinlock.h> #include <linux/times.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/jhash.h> #include <linux/random.h> #include <linux/slab.h> #include <linux/atomic.h> #include <linux/unaligned.h> #include <linux/if_vlan.h> #include <net/switchdev.h> #include <trace/events/bridge.h> #include "br_private.h" static const struct rhashtable_params br_fdb_rht_params = { .head_offset = offsetof(struct net_bridge_fdb_entry, rhnode), .key_offset = offsetof(struct net_bridge_fdb_entry, key), .key_len = sizeof(struct net_bridge_fdb_key), .automatic_shrinking = true, }; static struct kmem_cache *br_fdb_cache __read_mostly; int __init br_fdb_init(void) { br_fdb_cache = KMEM_CACHE(net_bridge_fdb_entry, SLAB_HWCACHE_ALIGN); if (!br_fdb_cache) return -ENOMEM; return 0; } void br_fdb_fini(void) { kmem_cache_destroy(br_fdb_cache); } int br_fdb_hash_init(struct net_bridge *br) { return rhashtable_init(&br->fdb_hash_tbl, &br_fdb_rht_params); } void br_fdb_hash_fini(struct net_bridge *br) { rhashtable_destroy(&br->fdb_hash_tbl); } /* if topology_changing then use forward_delay (default 15 sec) * otherwise keep longer (default 5 minutes) */ static inline unsigned long hold_time(const struct net_bridge *br) { return br->topology_change ? br->forward_delay : br->ageing_time; } static inline int has_expired(const struct net_bridge *br, const struct net_bridge_fdb_entry *fdb) { return !test_bit(BR_FDB_STATIC, &fdb->flags) && !test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags) && time_before_eq(fdb->updated + hold_time(br), jiffies); } static int fdb_to_nud(const struct net_bridge *br, const struct net_bridge_fdb_entry *fdb) { if (test_bit(BR_FDB_LOCAL, &fdb->flags)) return NUD_PERMANENT; else if (test_bit(BR_FDB_STATIC, &fdb->flags)) return NUD_NOARP; else if (has_expired(br, fdb)) return NUD_STALE; else return NUD_REACHABLE; } static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, const struct net_bridge_fdb_entry *fdb, u32 portid, u32 seq, int type, unsigned int flags) { const struct net_bridge_port *dst = READ_ONCE(fdb->dst); unsigned long now = jiffies; struct nda_cacheinfo ci; struct nlmsghdr *nlh; struct ndmsg *ndm; u32 ext_flags = 0; nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags); if (nlh == NULL) return -EMSGSIZE; ndm = nlmsg_data(nlh); ndm->ndm_family = AF_BRIDGE; ndm->ndm_pad1 = 0; ndm->ndm_pad2 = 0; ndm->ndm_flags = 0; ndm->ndm_type = 0; ndm->ndm_ifindex = dst ? dst->dev->ifindex : br->dev->ifindex; ndm->ndm_state = fdb_to_nud(br, fdb); if (test_bit(BR_FDB_OFFLOADED, &fdb->flags)) ndm->ndm_flags |= NTF_OFFLOADED; if (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags)) ndm->ndm_flags |= NTF_EXT_LEARNED; if (test_bit(BR_FDB_STICKY, &fdb->flags)) ndm->ndm_flags |= NTF_STICKY; if (test_bit(BR_FDB_LOCKED, &fdb->flags)) ext_flags |= NTF_EXT_LOCKED; if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->key.addr)) goto nla_put_failure; if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex)) goto nla_put_failure; if (nla_put_u32(skb, NDA_FLAGS_EXT, ext_flags)) goto nla_put_failure; ci.ndm_used = jiffies_to_clock_t(now - fdb->used); ci.ndm_confirmed = 0; ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated); ci.ndm_refcnt = 0; if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) goto nla_put_failure; if (fdb->key.vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->key.vlan_id)) goto nla_put_failure; if (test_bit(BR_FDB_NOTIFY, &fdb->flags)) { struct nlattr *nest = nla_nest_start(skb, NDA_FDB_EXT_ATTRS); u8 notify_bits = FDB_NOTIFY_BIT; if (!nest) goto nla_put_failure; if (test_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags)) notify_bits |= FDB_NOTIFY_INACTIVE_BIT; if (nla_put_u8(skb, NFEA_ACTIVITY_NOTIFY, notify_bits)) { nla_nest_cancel(skb, nest); goto nla_put_failure; } nla_nest_end(skb, nest); } nlmsg_end(skb, nlh); return 0; nla_put_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } static inline size_t fdb_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN) /* NDA_LLADDR */ + nla_total_size(sizeof(u32)) /* NDA_MASTER */ + nla_total_size(sizeof(u32)) /* NDA_FLAGS_EXT */ + nla_total_size(sizeof(u16)) /* NDA_VLAN */ + nla_total_size(sizeof(struct nda_cacheinfo)) + nla_total_size(0) /* NDA_FDB_EXT_ATTRS */ + nla_total_size(sizeof(u8)); /* NFEA_ACTIVITY_NOTIFY */ } static void fdb_notify(struct net_bridge *br, const struct net_bridge_fdb_entry *fdb, int type, bool swdev_notify) { struct net *net = dev_net(br->dev); struct sk_buff *skb; int err = -ENOBUFS; if (swdev_notify) br_switchdev_fdb_notify(br, fdb, type); skb = nlmsg_new(fdb_nlmsg_size(), GFP_ATOMIC); if (skb == NULL) goto errout; err = fdb_fill_info(skb, br, fdb, 0, 0, type, 0); if (err < 0) { /* -EMSGSIZE implies BUG in fdb_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); return; errout: rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); } static struct net_bridge_fdb_entry *fdb_find_rcu(struct rhashtable *tbl, const unsigned char *addr, __u16 vid) { struct net_bridge_fdb_key key; WARN_ON_ONCE(!rcu_read_lock_held()); key.vlan_id = vid; memcpy(key.addr.addr, addr, sizeof(key.addr.addr)); return rhashtable_lookup(tbl, &key, br_fdb_rht_params); } /* requires bridge hash_lock */ static struct net_bridge_fdb_entry *br_fdb_find(struct net_bridge *br, const unsigned char *addr, __u16 vid) { struct net_bridge_fdb_entry *fdb; lockdep_assert_held_once(&br->hash_lock); rcu_read_lock(); fdb = fdb_find_rcu(&br->fdb_hash_tbl, addr, vid); rcu_read_unlock(); return fdb; } struct net_device *br_fdb_find_port(const struct net_device *br_dev, const unsigned char *addr, __u16 vid) { struct net_bridge_fdb_entry *f; struct net_device *dev = NULL; struct net_bridge *br; ASSERT_RTNL(); if (!netif_is_bridge_master(br_dev)) return NULL; br = netdev_priv(br_dev); rcu_read_lock(); f = br_fdb_find_rcu(br, addr, vid); if (f && f->dst) dev = f->dst->dev; rcu_read_unlock(); return dev; } EXPORT_SYMBOL_GPL(br_fdb_find_port); struct net_bridge_fdb_entry *br_fdb_find_rcu(struct net_bridge *br, const unsigned char *addr, __u16 vid) { return fdb_find_rcu(&br->fdb_hash_tbl, addr, vid); } /* When a static FDB entry is added, the mac address from the entry is * added to the bridge private HW address list and all required ports * are then updated with the new information. * Called under RTNL. */ static void fdb_add_hw_addr(struct net_bridge *br, const unsigned char *addr) { int err; struct net_bridge_port *p; ASSERT_RTNL(); list_for_each_entry(p, &br->port_list, list) { if (!br_promisc_port(p)) { err = dev_uc_add(p->dev, addr); if (err) goto undo; } } return; undo: list_for_each_entry_continue_reverse(p, &br->port_list, list) { if (!br_promisc_port(p)) dev_uc_del(p->dev, addr); } } /* When a static FDB entry is deleted, the HW address from that entry is * also removed from the bridge private HW address list and updates all * the ports with needed information. * Called under RTNL. */ static void fdb_del_hw_addr(struct net_bridge *br, const unsigned char *addr) { struct net_bridge_port *p; ASSERT_RTNL(); list_for_each_entry(p, &br->port_list, list) { if (!br_promisc_port(p)) dev_uc_del(p->dev, addr); } } static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f, bool swdev_notify) { trace_fdb_delete(br, f); if (test_bit(BR_FDB_STATIC, &f->flags)) fdb_del_hw_addr(br, f->key.addr.addr); hlist_del_init_rcu(&f->fdb_node); rhashtable_remove_fast(&br->fdb_hash_tbl, &f->rhnode, br_fdb_rht_params); if (test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &f->flags)) atomic_dec(&br->fdb_n_learned); fdb_notify(br, f, RTM_DELNEIGH, swdev_notify); kfree_rcu(f, rcu); } /* Delete a local entry if no other port had the same address. * * This function should only be called on entries with BR_FDB_LOCAL set, * so even with BR_FDB_ADDED_BY_USER cleared we never need to increase * the accounting for dynamically learned entries again. */ static void fdb_delete_local(struct net_bridge *br, const struct net_bridge_port *p, struct net_bridge_fdb_entry *f) { const unsigned char *addr = f->key.addr.addr; struct net_bridge_vlan_group *vg; const struct net_bridge_vlan *v; struct net_bridge_port *op; u16 vid = f->key.vlan_id; /* Maybe another port has same hw addr? */ list_for_each_entry(op, &br->port_list, list) { vg = nbp_vlan_group(op); if (op != p && ether_addr_equal(op->dev->dev_addr, addr) && (!vid || br_vlan_find(vg, vid))) { f->dst = op; clear_bit(BR_FDB_ADDED_BY_USER, &f->flags); return; } } vg = br_vlan_group(br); v = br_vlan_find(vg, vid); /* Maybe bridge device has same hw addr? */ if (p && ether_addr_equal(br->dev->dev_addr, addr) && (!vid || (v && br_vlan_should_use(v)))) { f->dst = NULL; clear_bit(BR_FDB_ADDED_BY_USER, &f->flags); return; } fdb_delete(br, f, true); } void br_fdb_find_delete_local(struct net_bridge *br, const struct net_bridge_port *p, const unsigned char *addr, u16 vid) { struct net_bridge_fdb_entry *f; spin_lock_bh(&br->hash_lock); f = br_fdb_find(br, addr, vid); if (f && test_bit(BR_FDB_LOCAL, &f->flags) && !test_bit(BR_FDB_ADDED_BY_USER, &f->flags) && f->dst == p) fdb_delete_local(br, p, f); spin_unlock_bh(&br->hash_lock); } static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr, __u16 vid, unsigned long flags) { bool learned = !test_bit(BR_FDB_ADDED_BY_USER, &flags) && !test_bit(BR_FDB_LOCAL, &flags); u32 max_learned = READ_ONCE(br->fdb_max_learned); struct net_bridge_fdb_entry *fdb; int err; if (likely(learned)) { int n_learned = atomic_read(&br->fdb_n_learned); if (unlikely(max_learned && n_learned >= max_learned)) return NULL; __set_bit(BR_FDB_DYNAMIC_LEARNED, &flags); } fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC); if (!fdb) return NULL; memcpy(fdb->key.addr.addr, addr, ETH_ALEN); WRITE_ONCE(fdb->dst, source); fdb->key.vlan_id = vid; fdb->flags = flags; fdb->updated = fdb->used = jiffies; err = rhashtable_lookup_insert_fast(&br->fdb_hash_tbl, &fdb->rhnode, br_fdb_rht_params); if (err) { kmem_cache_free(br_fdb_cache, fdb); return NULL; } if (likely(learned)) atomic_inc(&br->fdb_n_learned); hlist_add_head_rcu(&fdb->fdb_node, &br->fdb_list); return fdb; } static int fdb_add_local(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr, u16 vid) { struct net_bridge_fdb_entry *fdb; if (!is_valid_ether_addr(addr)) return -EINVAL; fdb = br_fdb_find(br, addr, vid); if (fdb) { /* it is okay to have multiple ports with same * address, just use the first one. */ if (test_bit(BR_FDB_LOCAL, &fdb->flags)) return 0; br_warn(br, "adding interface %s with same address as a received packet (addr:%pM, vlan:%u)\n", source ? source->dev->name : br->dev->name, addr, vid); fdb_delete(br, fdb, true); } fdb = fdb_create(br, source, addr, vid, BIT(BR_FDB_LOCAL) | BIT(BR_FDB_STATIC)); if (!fdb) return -ENOMEM; fdb_add_hw_addr(br, addr); fdb_notify(br, fdb, RTM_NEWNEIGH, true); return 0; } void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr) { struct net_bridge_vlan_group *vg; struct net_bridge_fdb_entry *f; struct net_bridge *br = p->br; struct net_bridge_vlan *v; spin_lock_bh(&br->hash_lock); vg = nbp_vlan_group(p); hlist_for_each_entry(f, &br->fdb_list, fdb_node) { if (f->dst == p && test_bit(BR_FDB_LOCAL, &f->flags) && !test_bit(BR_FDB_ADDED_BY_USER, &f->flags)) { /* delete old one */ fdb_delete_local(br, p, f); /* if this port has no vlan information * configured, we can safely be done at * this point. */ if (!vg || !vg->num_vlans) goto insert; } } insert: /* insert new address, may fail if invalid address or dup. */ fdb_add_local(br, p, newaddr, 0); if (!vg || !vg->num_vlans) goto done; /* Now add entries for every VLAN configured on the port. * This function runs under RTNL so the bitmap will not change * from under us. */ list_for_each_entry(v, &vg->vlan_list, vlist) fdb_add_local(br, p, newaddr, v->vid); done: spin_unlock_bh(&br->hash_lock); } void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) { struct net_bridge_vlan_group *vg; struct net_bridge_fdb_entry *f; struct net_bridge_vlan *v; spin_lock_bh(&br->hash_lock); /* If old entry was unassociated with any port, then delete it. */ f = br_fdb_find(br, br->dev->dev_addr, 0); if (f && test_bit(BR_FDB_LOCAL, &f->flags) && !f->dst && !test_bit(BR_FDB_ADDED_BY_USER, &f->flags)) fdb_delete_local(br, NULL, f); fdb_add_local(br, NULL, newaddr, 0); vg = br_vlan_group(br); if (!vg || !vg->num_vlans) goto out; /* Now remove and add entries for every VLAN configured on the * bridge. This function runs under RTNL so the bitmap will not * change from under us. */ list_for_each_entry(v, &vg->vlan_list, vlist) { if (!br_vlan_should_use(v)) continue; f = br_fdb_find(br, br->dev->dev_addr, v->vid); if (f && test_bit(BR_FDB_LOCAL, &f->flags) && !f->dst && !test_bit(BR_FDB_ADDED_BY_USER, &f->flags)) fdb_delete_local(br, NULL, f); fdb_add_local(br, NULL, newaddr, v->vid); } out: spin_unlock_bh(&br->hash_lock); } void br_fdb_cleanup(struct work_struct *work) { struct net_bridge *br = container_of(work, struct net_bridge, gc_work.work); struct net_bridge_fdb_entry *f = NULL; unsigned long delay = hold_time(br); unsigned long work_delay = delay; unsigned long now = jiffies; /* this part is tricky, in order to avoid blocking learning and * consequently forwarding, we rely on rcu to delete objects with * delayed freeing allowing us to continue traversing */ rcu_read_lock(); hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { unsigned long this_timer = f->updated + delay; if (test_bit(BR_FDB_STATIC, &f->flags) || test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &f->flags)) { if (test_bit(BR_FDB_NOTIFY, &f->flags)) { if (time_after(this_timer, now)) work_delay = min(work_delay, this_timer - now); else if (!test_and_set_bit(BR_FDB_NOTIFY_INACTIVE, &f->flags)) fdb_notify(br, f, RTM_NEWNEIGH, false); } continue; } if (time_after(this_timer, now)) { work_delay = min(work_delay, this_timer - now); } else { spin_lock_bh(&br->hash_lock); if (!hlist_unhashed(&f->fdb_node)) fdb_delete(br, f, true); spin_unlock_bh(&br->hash_lock); } } rcu_read_unlock(); /* Cleanup minimum 10 milliseconds apart */ work_delay = max_t(unsigned long, work_delay, msecs_to_jiffies(10)); mod_delayed_work(system_long_wq, &br->gc_work, work_delay); } static bool __fdb_flush_matches(const struct net_bridge *br, const struct net_bridge_fdb_entry *f, const struct net_bridge_fdb_flush_desc *desc) { const struct net_bridge_port *dst = READ_ONCE(f->dst); int port_ifidx = dst ? dst->dev->ifindex : br->dev->ifindex; if (desc->vlan_id && desc->vlan_id != f->key.vlan_id) return false; if (desc->port_ifindex && desc->port_ifindex != port_ifidx) return false; if (desc->flags_mask && (f->flags & desc->flags_mask) != desc->flags) return false; return true; } /* Flush forwarding database entries matching the description */ void br_fdb_flush(struct net_bridge *br, const struct net_bridge_fdb_flush_desc *desc) { struct net_bridge_fdb_entry *f; rcu_read_lock(); hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { if (!__fdb_flush_matches(br, f, desc)) continue; spin_lock_bh(&br->hash_lock); if (!hlist_unhashed(&f->fdb_node)) fdb_delete(br, f, true); spin_unlock_bh(&br->hash_lock); } rcu_read_unlock(); } static unsigned long __ndm_state_to_fdb_flags(u16 ndm_state) { unsigned long flags = 0; if (ndm_state & NUD_PERMANENT) __set_bit(BR_FDB_LOCAL, &flags); if (ndm_state & NUD_NOARP) __set_bit(BR_FDB_STATIC, &flags); return flags; } static unsigned long __ndm_flags_to_fdb_flags(u8 ndm_flags) { unsigned long flags = 0; if (ndm_flags & NTF_USE) __set_bit(BR_FDB_ADDED_BY_USER, &flags); if (ndm_flags & NTF_EXT_LEARNED) __set_bit(BR_FDB_ADDED_BY_EXT_LEARN, &flags); if (ndm_flags & NTF_OFFLOADED) __set_bit(BR_FDB_OFFLOADED, &flags); if (ndm_flags & NTF_STICKY) __set_bit(BR_FDB_STICKY, &flags); return flags; } static int __fdb_flush_validate_ifindex(const struct net_bridge *br, int ifindex, struct netlink_ext_ack *extack) { const struct net_device *dev; dev = __dev_get_by_index(dev_net(br->dev), ifindex); if (!dev) { NL_SET_ERR_MSG_MOD(extack, "Unknown flush device ifindex"); return -ENODEV; } if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev)) { NL_SET_ERR_MSG_MOD(extack, "Flush device is not a bridge or bridge port"); return -EINVAL; } if (netif_is_bridge_master(dev) && dev != br->dev) { NL_SET_ERR_MSG_MOD(extack, "Flush bridge device does not match target bridge device"); return -EINVAL; } if (netif_is_bridge_port(dev)) { struct net_bridge_port *p = br_port_get_rtnl(dev); if (p->br != br) { NL_SET_ERR_MSG_MOD(extack, "Port belongs to a different bridge device"); return -EINVAL; } } return 0; } static const struct nla_policy br_fdb_del_bulk_policy[NDA_MAX + 1] = { [NDA_VLAN] = NLA_POLICY_RANGE(NLA_U16, 1, VLAN_N_VID - 2), [NDA_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), [NDA_NDM_STATE_MASK] = { .type = NLA_U16 }, [NDA_NDM_FLAGS_MASK] = { .type = NLA_U8 }, }; int br_fdb_delete_bulk(struct nlmsghdr *nlh, struct net_device *dev, struct netlink_ext_ack *extack) { struct net_bridge_fdb_flush_desc desc = {}; struct ndmsg *ndm = nlmsg_data(nlh); struct net_bridge_port *p = NULL; struct nlattr *tb[NDA_MAX + 1]; struct net_bridge *br; u8 ndm_flags; int err; ndm_flags = ndm->ndm_flags & ~FDB_FLUSH_IGNORED_NDM_FLAGS; err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, br_fdb_del_bulk_policy, extack); if (err) return err; if (netif_is_bridge_master(dev)) { br = netdev_priv(dev); } else { p = br_port_get_rtnl(dev); if (!p) { NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge port"); return -EINVAL; } br = p->br; } if (tb[NDA_VLAN]) desc.vlan_id = nla_get_u16(tb[NDA_VLAN]); if (ndm_flags & ~FDB_FLUSH_ALLOWED_NDM_FLAGS) { NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm flag bits set"); return -EINVAL; } if (ndm->ndm_state & ~FDB_FLUSH_ALLOWED_NDM_STATES) { NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm state bits set"); return -EINVAL; } desc.flags |= __ndm_state_to_fdb_flags(ndm->ndm_state); desc.flags |= __ndm_flags_to_fdb_flags(ndm_flags); if (tb[NDA_NDM_STATE_MASK]) { u16 ndm_state_mask = nla_get_u16(tb[NDA_NDM_STATE_MASK]); desc.flags_mask |= __ndm_state_to_fdb_flags(ndm_state_mask); } if (tb[NDA_NDM_FLAGS_MASK]) { u8 ndm_flags_mask = nla_get_u8(tb[NDA_NDM_FLAGS_MASK]); desc.flags_mask |= __ndm_flags_to_fdb_flags(ndm_flags_mask); } if (tb[NDA_IFINDEX]) { int ifidx = nla_get_s32(tb[NDA_IFINDEX]); err = __fdb_flush_validate_ifindex(br, ifidx, extack); if (err) return err; desc.port_ifindex = ifidx; } else if (p) { /* flush was invoked with port device and NTF_MASTER */ desc.port_ifindex = p->dev->ifindex; } br_debug(br, "flushing port ifindex: %d vlan id: %u flags: 0x%lx flags mask: 0x%lx\n", desc.port_ifindex, desc.vlan_id, desc.flags, desc.flags_mask); br_fdb_flush(br, &desc); return 0; } /* Flush all entries referring to a specific port. * if do_all is set also flush static entries * if vid is set delete all entries that match the vlan_id */ void br_fdb_delete_by_port(struct net_bridge *br, const struct net_bridge_port *p, u16 vid, int do_all) { struct net_bridge_fdb_entry *f; struct hlist_node *tmp; spin_lock_bh(&br->hash_lock); hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) { if (f->dst != p) continue; if (!do_all) if (test_bit(BR_FDB_STATIC, &f->flags) || (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &f->flags) && !test_bit(BR_FDB_OFFLOADED, &f->flags)) || (vid && f->key.vlan_id != vid)) continue; if (test_bit(BR_FDB_LOCAL, &f->flags)) fdb_delete_local(br, p, f); else fdb_delete(br, f, true); } spin_unlock_bh(&br->hash_lock); } #if IS_ENABLED(CONFIG_ATM_LANE) /* Interface used by ATM LANE hook to test * if an addr is on some other bridge port */ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr) { struct net_bridge_fdb_entry *fdb; struct net_bridge_port *port; int ret; rcu_read_lock(); port = br_port_get_rcu(dev); if (!port) ret = 0; else { const struct net_bridge_port *dst = NULL; fdb = br_fdb_find_rcu(port->br, addr, 0); if (fdb) dst = READ_ONCE(fdb->dst); ret = dst && dst->dev != dev && dst->state == BR_STATE_FORWARDING; } rcu_read_unlock(); return ret; } #endif /* CONFIG_ATM_LANE */ /* * Fill buffer with forwarding table records in * the API format. */ int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long maxnum, unsigned long skip) { struct net_bridge_fdb_entry *f; struct __fdb_entry *fe = buf; int num = 0; memset(buf, 0, maxnum*sizeof(struct __fdb_entry)); rcu_read_lock(); hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { if (num >= maxnum) break; if (has_expired(br, f)) continue; /* ignore pseudo entry for local MAC address */ if (!f->dst) continue; if (skip) { --skip; continue; } /* convert from internal format to API */ memcpy(fe->mac_addr, f->key.addr.addr, ETH_ALEN); /* due to ABI compat need to split into hi/lo */ fe->port_no = f->dst->port_no; fe->port_hi = f->dst->port_no >> 8; fe->is_local = test_bit(BR_FDB_LOCAL, &f->flags); if (!test_bit(BR_FDB_STATIC, &f->flags)) fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated); ++fe; ++num; } rcu_read_unlock(); return num; } /* Add entry for local address of interface */ int br_fdb_add_local(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr, u16 vid) { int ret; spin_lock_bh(&br->hash_lock); ret = fdb_add_local(br, source, addr, vid); spin_unlock_bh(&br->hash_lock); return ret; } /* returns true if the fdb was modified */ static bool __fdb_mark_active(struct net_bridge_fdb_entry *fdb) { return !!(test_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags) && test_and_clear_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags)); } void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, const unsigned char *addr, u16 vid, unsigned long flags) { struct net_bridge_fdb_entry *fdb; /* some users want to always flood. */ if (hold_time(br) == 0) return; fdb = fdb_find_rcu(&br->fdb_hash_tbl, addr, vid); if (likely(fdb)) { /* attempt to update an entry for a local interface */ if (unlikely(test_bit(BR_FDB_LOCAL, &fdb->flags))) { if (net_ratelimit()) br_warn(br, "received packet on %s with own address as source address (addr:%pM, vlan:%u)\n", source->dev->name, addr, vid); } else { unsigned long now = jiffies; bool fdb_modified = false; if (now != fdb->updated) { fdb->updated = now; fdb_modified = __fdb_mark_active(fdb); } /* fastpath: update of existing entry */ if (unlikely(source != READ_ONCE(fdb->dst) && !test_bit(BR_FDB_STICKY, &fdb->flags))) { br_switchdev_fdb_notify(br, fdb, RTM_DELNEIGH); WRITE_ONCE(fdb->dst, source); fdb_modified = true; /* Take over HW learned entry */ if (unlikely(test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags))) clear_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags); /* Clear locked flag when roaming to an * unlocked port. */ if (unlikely(test_bit(BR_FDB_LOCKED, &fdb->flags))) clear_bit(BR_FDB_LOCKED, &fdb->flags); } if (unlikely(test_bit(BR_FDB_ADDED_BY_USER, &flags))) { set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); if (test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &fdb->flags)) atomic_dec(&br->fdb_n_learned); } if (unlikely(fdb_modified)) { trace_br_fdb_update(br, source, addr, vid, flags); fdb_notify(br, fdb, RTM_NEWNEIGH, true); } } } else { spin_lock(&br->hash_lock); fdb = fdb_create(br, source, addr, vid, flags); if (fdb) { trace_br_fdb_update(br, source, addr, vid, flags); fdb_notify(br, fdb, RTM_NEWNEIGH, true); } /* else we lose race and someone else inserts * it first, don't bother updating */ spin_unlock(&br->hash_lock); } } /* Dump information about entries, in response to GETNEIGH */ int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, int *idx) { struct ndo_fdb_dump_context *ctx = (void *)cb->ctx; struct net_bridge *br = netdev_priv(dev); struct net_bridge_fdb_entry *f; int err = 0; if (!netif_is_bridge_master(dev)) return err; if (!filter_dev) { err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); if (err < 0) return err; } rcu_read_lock(); hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { if (*idx < ctx->fdb_idx) goto skip; if (filter_dev && (!f->dst || f->dst->dev != filter_dev)) { if (filter_dev != dev) goto skip; /* !f->dst is a special case for bridge * It means the MAC belongs to the bridge * Therefore need a little more filtering * we only want to dump the !f->dst case */ if (f->dst) goto skip; } if (!filter_dev && f->dst) goto skip; err = fdb_fill_info(skb, br, f, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI); if (err < 0) break; skip: *idx += 1; } rcu_read_unlock(); return err; } int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u32 portid, u32 seq, struct netlink_ext_ack *extack) { struct net_bridge *br = netdev_priv(dev); struct net_bridge_fdb_entry *f; int err = 0; rcu_read_lock(); f = br_fdb_find_rcu(br, addr, vid); if (!f) { NL_SET_ERR_MSG(extack, "Fdb entry not found"); err = -ENOENT; goto errout; } err = fdb_fill_info(skb, br, f, portid, seq, RTM_NEWNEIGH, 0); errout: rcu_read_unlock(); return err; } /* returns true if the fdb is modified */ static bool fdb_handle_notify(struct net_bridge_fdb_entry *fdb, u8 notify) { bool modified = false; /* allow to mark an entry as inactive, usually done on creation */ if ((notify & FDB_NOTIFY_INACTIVE_BIT) && !test_and_set_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags)) modified = true; if ((notify & FDB_NOTIFY_BIT) && !test_and_set_bit(BR_FDB_NOTIFY, &fdb->flags)) { /* enabled activity tracking */ modified = true; } else if (!(notify & FDB_NOTIFY_BIT) && test_and_clear_bit(BR_FDB_NOTIFY, &fdb->flags)) { /* disabled activity tracking, clear notify state */ clear_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags); modified = true; } return modified; } /* Update (create or replace) forwarding database entry */ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, const u8 *addr, struct ndmsg *ndm, u16 flags, u16 vid, struct nlattr *nfea_tb[]) { bool is_sticky = !!(ndm->ndm_flags & NTF_STICKY); bool refresh = !nfea_tb[NFEA_DONT_REFRESH]; struct net_bridge_fdb_entry *fdb; u16 state = ndm->ndm_state; bool modified = false; u8 notify = 0; /* If the port cannot learn allow only local and static entries */ if (source && !(state & NUD_PERMANENT) && !(state & NUD_NOARP) && !(source->state == BR_STATE_LEARNING || source->state == BR_STATE_FORWARDING)) return -EPERM; if (!source && !(state & NUD_PERMANENT)) { pr_info("bridge: RTM_NEWNEIGH %s without NUD_PERMANENT\n", br->dev->name); return -EINVAL; } if (is_sticky && (state & NUD_PERMANENT)) return -EINVAL; if (nfea_tb[NFEA_ACTIVITY_NOTIFY]) { notify = nla_get_u8(nfea_tb[NFEA_ACTIVITY_NOTIFY]); if ((notify & ~BR_FDB_NOTIFY_SETTABLE_BITS) || (notify & BR_FDB_NOTIFY_SETTABLE_BITS) == FDB_NOTIFY_INACTIVE_BIT) return -EINVAL; } fdb = br_fdb_find(br, addr, vid); if (fdb == NULL) { if (!(flags & NLM_F_CREATE)) return -ENOENT; fdb = fdb_create(br, source, addr, vid, BIT(BR_FDB_ADDED_BY_USER)); if (!fdb) return -ENOMEM; modified = true; } else { if (flags & NLM_F_EXCL) return -EEXIST; if (READ_ONCE(fdb->dst) != source) { WRITE_ONCE(fdb->dst, source); modified = true; } set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); if (test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &fdb->flags)) atomic_dec(&br->fdb_n_learned); } if (fdb_to_nud(br, fdb) != state) { if (state & NUD_PERMANENT) { set_bit(BR_FDB_LOCAL, &fdb->flags); if (!test_and_set_bit(BR_FDB_STATIC, &fdb->flags)) fdb_add_hw_addr(br, addr); } else if (state & NUD_NOARP) { clear_bit(BR_FDB_LOCAL, &fdb->flags); if (!test_and_set_bit(BR_FDB_STATIC, &fdb->flags)) fdb_add_hw_addr(br, addr); } else { clear_bit(BR_FDB_LOCAL, &fdb->flags); if (test_and_clear_bit(BR_FDB_STATIC, &fdb->flags)) fdb_del_hw_addr(br, addr); } modified = true; } if (is_sticky != test_bit(BR_FDB_STICKY, &fdb->flags)) { change_bit(BR_FDB_STICKY, &fdb->flags); modified = true; } if (test_and_clear_bit(BR_FDB_LOCKED, &fdb->flags)) modified = true; if (fdb_handle_notify(fdb, notify)) modified = true; fdb->used = jiffies; if (modified) { if (refresh) fdb->updated = jiffies; fdb_notify(br, fdb, RTM_NEWNEIGH, true); } return 0; } static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[], bool *notified, struct netlink_ext_ack *extack) { int err = 0; if (ndm->ndm_flags & NTF_USE) { if (!p) { pr_info("bridge: RTM_NEWNEIGH %s with NTF_USE is not supported\n", br->dev->name); return -EINVAL; } if (!nbp_state_should_learn(p)) return 0; local_bh_disable(); rcu_read_lock(); br_fdb_update(br, p, addr, vid, BIT(BR_FDB_ADDED_BY_USER)); rcu_read_unlock(); local_bh_enable(); } else if (ndm->ndm_flags & NTF_EXT_LEARNED) { if (!p && !(ndm->ndm_state & NUD_PERMANENT)) { NL_SET_ERR_MSG_MOD(extack, "FDB entry towards bridge must be permanent"); return -EINVAL; } err = br_fdb_external_learn_add(br, p, addr, vid, false, true); } else { spin_lock_bh(&br->hash_lock); err = fdb_add_entry(br, p, addr, ndm, nlh_flags, vid, nfea_tb); spin_unlock_bh(&br->hash_lock); } if (!err) *notified = true; return err; } static const struct nla_policy br_nda_fdb_pol[NFEA_MAX + 1] = { [NFEA_ACTIVITY_NOTIFY] = { .type = NLA_U8 }, [NFEA_DONT_REFRESH] = { .type = NLA_FLAG }, }; /* Add new permanent fdb entry with RTM_NEWNEIGH */ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 nlh_flags, bool *notified, struct netlink_ext_ack *extack) { struct nlattr *nfea_tb[NFEA_MAX + 1], *attr; struct net_bridge_vlan_group *vg; struct net_bridge_port *p = NULL; struct net_bridge_vlan *v; struct net_bridge *br = NULL; u32 ext_flags = 0; int err = 0; trace_br_fdb_add(ndm, dev, addr, vid, nlh_flags); if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) { pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state); return -EINVAL; } if (is_zero_ether_addr(addr)) { pr_info("bridge: RTM_NEWNEIGH with invalid ether address\n"); return -EINVAL; } if (netif_is_bridge_master(dev)) { br = netdev_priv(dev); vg = br_vlan_group(br); } else { p = br_port_get_rtnl(dev); if (!p) { pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n", dev->name); return -EINVAL; } br = p->br; vg = nbp_vlan_group(p); } if (tb[NDA_FLAGS_EXT]) ext_flags = nla_get_u32(tb[NDA_FLAGS_EXT]); if (ext_flags & NTF_EXT_LOCKED) { NL_SET_ERR_MSG_MOD(extack, "Cannot add FDB entry with \"locked\" flag set"); return -EINVAL; } if (tb[NDA_FDB_EXT_ATTRS]) { attr = tb[NDA_FDB_EXT_ATTRS]; err = nla_parse_nested(nfea_tb, NFEA_MAX, attr, br_nda_fdb_pol, extack); if (err) return err; } else { memset(nfea_tb, 0, sizeof(struct nlattr *) * (NFEA_MAX + 1)); } if (vid) { v = br_vlan_find(vg, vid); if (!v || !br_vlan_should_use(v)) { pr_info("bridge: RTM_NEWNEIGH with unconfigured vlan %d on %s\n", vid, dev->name); return -EINVAL; } /* VID was specified, so use it. */ err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb, notified, extack); } else { err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb, notified, extack); if (err || !vg || !vg->num_vlans) goto out; /* We have vlans configured on this port and user didn't * specify a VLAN. To be nice, add/update entry for every * vlan on this port. */ list_for_each_entry(v, &vg->vlan_list, vlist) { if (!br_vlan_should_use(v)) continue; err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid, nfea_tb, notified, extack); if (err) goto out; } } out: return err; } static int fdb_delete_by_addr_and_port(struct net_bridge *br, const struct net_bridge_port *p, const u8 *addr, u16 vlan, bool *notified) { struct net_bridge_fdb_entry *fdb; fdb = br_fdb_find(br, addr, vlan); if (!fdb || READ_ONCE(fdb->dst) != p) return -ENOENT; fdb_delete(br, fdb, true); *notified = true; return 0; } static int __br_fdb_delete(struct net_bridge *br, const struct net_bridge_port *p, const unsigned char *addr, u16 vid, bool *notified) { int err; spin_lock_bh(&br->hash_lock); err = fdb_delete_by_addr_and_port(br, p, addr, vid, notified); spin_unlock_bh(&br->hash_lock); return err; } /* Remove neighbor entry with RTM_DELNEIGH */ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, bool *notified, struct netlink_ext_ack *extack) { struct net_bridge_vlan_group *vg; struct net_bridge_port *p = NULL; struct net_bridge *br; int err; if (netif_is_bridge_master(dev)) { br = netdev_priv(dev); vg = br_vlan_group(br); } else { p = br_port_get_rtnl(dev); if (!p) { pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n", dev->name); return -EINVAL; } vg = nbp_vlan_group(p); br = p->br; } if (vid) { err = __br_fdb_delete(br, p, addr, vid, notified); } else { struct net_bridge_vlan *v; err = -ENOENT; err &= __br_fdb_delete(br, p, addr, 0, notified); if (!vg || !vg->num_vlans) return err; list_for_each_entry(v, &vg->vlan_list, vlist) { if (!br_vlan_should_use(v)) continue; err &= __br_fdb_delete(br, p, addr, v->vid, notified); } } return err; } int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p) { struct net_bridge_fdb_entry *f, *tmp; int err = 0; ASSERT_RTNL(); /* the key here is that static entries change only under rtnl */ rcu_read_lock(); hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { /* We only care for static entries */ if (!test_bit(BR_FDB_STATIC, &f->flags)) continue; err = dev_uc_add(p->dev, f->key.addr.addr); if (err) goto rollback; } done: rcu_read_unlock(); return err; rollback: hlist_for_each_entry_rcu(tmp, &br->fdb_list, fdb_node) { /* We only care for static entries */ if (!test_bit(BR_FDB_STATIC, &tmp->flags)) continue; if (tmp == f) break; dev_uc_del(p->dev, tmp->key.addr.addr); } goto done; } void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p) { struct net_bridge_fdb_entry *f; ASSERT_RTNL(); rcu_read_lock(); hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { /* We only care for static entries */ if (!test_bit(BR_FDB_STATIC, &f->flags)) continue; dev_uc_del(p->dev, f->key.addr.addr); } rcu_read_unlock(); } int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid, bool locked, bool swdev_notify) { struct net_bridge_fdb_entry *fdb; bool modified = false; int err = 0; trace_br_fdb_external_learn_add(br, p, addr, vid); if (locked && (!p || !(p->flags & BR_PORT_MAB))) return -EINVAL; spin_lock_bh(&br->hash_lock); fdb = br_fdb_find(br, addr, vid); if (!fdb) { unsigned long flags = BIT(BR_FDB_ADDED_BY_EXT_LEARN); if (swdev_notify) flags |= BIT(BR_FDB_ADDED_BY_USER); if (!p) flags |= BIT(BR_FDB_LOCAL); if (locked) flags |= BIT(BR_FDB_LOCKED); fdb = fdb_create(br, p, addr, vid, flags); if (!fdb) { err = -ENOMEM; goto err_unlock; } fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify); } else { if (locked && (!test_bit(BR_FDB_LOCKED, &fdb->flags) || READ_ONCE(fdb->dst) != p)) { err = -EINVAL; goto err_unlock; } fdb->updated = jiffies; if (READ_ONCE(fdb->dst) != p) { WRITE_ONCE(fdb->dst, p); modified = true; } if (test_and_set_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags)) { /* Refresh entry */ fdb->used = jiffies; } else { modified = true; } if (locked != test_bit(BR_FDB_LOCKED, &fdb->flags)) { change_bit(BR_FDB_LOCKED, &fdb->flags); modified = true; } if (swdev_notify) set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); if (!p) set_bit(BR_FDB_LOCAL, &fdb->flags); if ((swdev_notify || !p) && test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &fdb->flags)) atomic_dec(&br->fdb_n_learned); if (modified) fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify); } err_unlock: spin_unlock_bh(&br->hash_lock); return err; } int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid, bool swdev_notify) { struct net_bridge_fdb_entry *fdb; int err = 0; spin_lock_bh(&br->hash_lock); fdb = br_fdb_find(br, addr, vid); if (fdb && test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags)) fdb_delete(br, fdb, swdev_notify); else err = -ENOENT; spin_unlock_bh(&br->hash_lock); return err; } void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid, bool offloaded) { struct net_bridge_fdb_entry *fdb; spin_lock_bh(&br->hash_lock); fdb = br_fdb_find(br, addr, vid); if (fdb && offloaded != test_bit(BR_FDB_OFFLOADED, &fdb->flags)) change_bit(BR_FDB_OFFLOADED, &fdb->flags); spin_unlock_bh(&br->hash_lock); } void br_fdb_clear_offload(const struct net_device *dev, u16 vid) { struct net_bridge_fdb_entry *f; struct net_bridge_port *p; ASSERT_RTNL(); p = br_port_get_rtnl(dev); if (!p) return; spin_lock_bh(&p->br->hash_lock); hlist_for_each_entry(f, &p->br->fdb_list, fdb_node) { if (f->dst == p && f->key.vlan_id == vid) clear_bit(BR_FDB_OFFLOADED, &f->flags); } spin_unlock_bh(&p->br->hash_lock); } EXPORT_SYMBOL_GPL(br_fdb_clear_offload); |
11 11 35 11 7 17 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 | // SPDX-License-Identifier: GPL-2.0 // // DVB device driver for em28xx // // (c) 2008-2011 Mauro Carvalho Chehab <mchehab@kernel.org> // // (c) 2008 Devin Heitmueller <devin.heitmueller@gmail.com> // - Fixes for the driver to properly work with HVR-950 // - Fixes for the driver to properly work with Pinnacle PCTV HD Pro Stick // - Fixes for the driver to properly work with AMD ATI TV Wonder HD 600 // // (c) 2008 Aidan Thornton <makosoft@googlemail.com> // // (c) 2012 Frank Schäfer <fschaefer.oss@googlemail.com> // // Based on cx88-dvb, saa7134-dvb and videobuf-dvb originally written by: // (c) 2004, 2005 Chris Pascoe <c.pascoe@itee.uq.edu.au> // (c) 2004 Gerd Knorr <kraxel@bytesex.org> [SuSE Labs] #include "em28xx.h" #include <linux/kernel.h> #include <linux/slab.h> #include <linux/usb.h> #include <media/v4l2-common.h> #include <media/dvb_demux.h> #include <media/dvb_net.h> #include <media/dmxdev.h> #include <media/tuner.h> #include "tuner-simple.h" #include <linux/gpio.h> #include "lgdt330x.h" #include "lgdt3305.h" #include "lgdt3306a.h" #include "zl10353.h" #include "s5h1409.h" #include "mt2060.h" #include "mt352.h" #include "mt352_priv.h" /* FIXME */ #include "tda1002x.h" #include "drx39xyj/drx39xxj.h" #include "tda18271.h" #include "s921.h" #include "drxd.h" #include "cxd2820r.h" #include "tda18271c2dd.h" #include "drxk.h" #include "tda10071.h" #include "tda18212.h" #include "a8293.h" #include "qt1010.h" #include "mb86a20s.h" #include "m88ds3103.h" #include "ts2020.h" #include "si2168.h" #include "si2157.h" #include "tc90522.h" #include "qm1d1c0042.h" #include "mxl692.h" MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>"); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION(DRIVER_DESC " - digital TV interface"); MODULE_VERSION(EM28XX_VERSION); static unsigned int debug; module_param(debug, int, 0644); MODULE_PARM_DESC(debug, "enable debug messages [dvb]"); DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr); #define dprintk(level, fmt, arg...) do { \ if (debug >= level) \ dev_printk(KERN_DEBUG, &dev->intf->dev, \ "dvb: " fmt, ## arg); \ } while (0) struct em28xx_dvb { struct dvb_frontend *fe[2]; /* feed count management */ struct mutex lock; int nfeeds; /* general boilerplate stuff */ struct dvb_adapter adapter; struct dvb_demux demux; struct dmxdev dmxdev; struct dmx_frontend fe_hw; struct dmx_frontend fe_mem; struct dvb_net net; /* Due to DRX-K - probably need changes */ int (*gate_ctrl)(struct dvb_frontend *fe, int gate); struct semaphore pll_mutex; bool dont_attach_fe1; int lna_gpio; struct i2c_client *i2c_client_demod; struct i2c_client *i2c_client_tuner; struct i2c_client *i2c_client_sec; }; static inline void print_err_status(struct em28xx *dev, int packet, int status) { char *errmsg = "Unknown"; switch (status) { case -ENOENT: errmsg = "unlinked synchronously"; break; case -ECONNRESET: errmsg = "unlinked asynchronously"; break; case -ENOSR: errmsg = "Buffer error (overrun)"; break; case -EPIPE: errmsg = "Stalled (device not responding)"; break; case -EOVERFLOW: errmsg = "Babble (bad cable?)"; break; case -EPROTO: errmsg = "Bit-stuff error (bad cable?)"; break; case -EILSEQ: errmsg = "CRC/Timeout (could be anything)"; break; case -ETIME: errmsg = "Device does not respond"; break; } if (packet < 0) { dprintk(1, "URB status %d [%s].\n", status, errmsg); } else { dprintk(1, "URB packet %d, status %d [%s].\n", packet, status, errmsg); } } static inline int em28xx_dvb_urb_data_copy(struct em28xx *dev, struct urb *urb) { int xfer_bulk, num_packets, i; if (!dev) return 0; if (dev->disconnected) return 0; if (urb->status < 0) print_err_status(dev, -1, urb->status); xfer_bulk = usb_pipebulk(urb->pipe); if (xfer_bulk) /* bulk */ num_packets = 1; else /* isoc */ num_packets = urb->number_of_packets; for (i = 0; i < num_packets; i++) { if (xfer_bulk) { if (urb->status < 0) { print_err_status(dev, i, urb->status); if (urb->status != -EPROTO) continue; } if (!urb->actual_length) continue; dvb_dmx_swfilter(&dev->dvb->demux, urb->transfer_buffer, urb->actual_length); } else { if (urb->iso_frame_desc[i].status < 0) { print_err_status(dev, i, urb->iso_frame_desc[i].status); if (urb->iso_frame_desc[i].status != -EPROTO) continue; } if (!urb->iso_frame_desc[i].actual_length) continue; dvb_dmx_swfilter(&dev->dvb->demux, urb->transfer_buffer + urb->iso_frame_desc[i].offset, urb->iso_frame_desc[i].actual_length); } } return 0; } static int em28xx_start_streaming(struct em28xx_dvb *dvb) { int rc; struct em28xx_i2c_bus *i2c_bus = dvb->adapter.priv; struct em28xx *dev = i2c_bus->dev; struct usb_device *udev = interface_to_usbdev(dev->intf); int dvb_max_packet_size, packet_multiplier, dvb_alt; if (dev->dvb_xfer_bulk) { if (!dev->dvb_ep_bulk) return -ENODEV; dvb_max_packet_size = 512; /* USB 2.0 spec */ packet_multiplier = EM28XX_DVB_BULK_PACKET_MULTIPLIER; dvb_alt = 0; } else { /* isoc */ if (!dev->dvb_ep_isoc) return -ENODEV; dvb_max_packet_size = dev->dvb_max_pkt_size_isoc; if (dvb_max_packet_size < 0) return dvb_max_packet_size; packet_multiplier = EM28XX_DVB_NUM_ISOC_PACKETS; dvb_alt = dev->dvb_alt_isoc; } if (!dev->board.has_dual_ts) usb_set_interface(udev, dev->ifnum, dvb_alt); rc = em28xx_set_mode(dev, EM28XX_DIGITAL_MODE); if (rc < 0) return rc; dprintk(1, "Using %d buffers each with %d x %d bytes, alternate %d\n", EM28XX_DVB_NUM_BUFS, packet_multiplier, dvb_max_packet_size, dvb_alt); return em28xx_init_usb_xfer(dev, EM28XX_DIGITAL_MODE, dev->dvb_xfer_bulk, EM28XX_DVB_NUM_BUFS, dvb_max_packet_size, packet_multiplier, em28xx_dvb_urb_data_copy); } static int em28xx_stop_streaming(struct em28xx_dvb *dvb) { struct em28xx_i2c_bus *i2c_bus = dvb->adapter.priv; struct em28xx *dev = i2c_bus->dev; em28xx_stop_urbs(dev); return 0; } static int em28xx_start_feed(struct dvb_demux_feed *feed) { struct dvb_demux *demux = feed->demux; struct em28xx_dvb *dvb = demux->priv; int rc, ret; if (!demux->dmx.frontend) return -EINVAL; mutex_lock(&dvb->lock); dvb->nfeeds++; rc = dvb->nfeeds; if (dvb->nfeeds == 1) { ret = em28xx_start_streaming(dvb); if (ret < 0) rc = ret; } mutex_unlock(&dvb->lock); return rc; } static int em28xx_stop_feed(struct dvb_demux_feed *feed) { struct dvb_demux *demux = feed->demux; struct em28xx_dvb *dvb = demux->priv; int err = 0; mutex_lock(&dvb->lock); dvb->nfeeds--; if (!dvb->nfeeds) err = em28xx_stop_streaming(dvb); mutex_unlock(&dvb->lock); return err; } /* ------------------------------------------------------------------ */ static int em28xx_dvb_bus_ctrl(struct dvb_frontend *fe, int acquire) { struct em28xx_i2c_bus *i2c_bus = fe->dvb->priv; struct em28xx *dev = i2c_bus->dev; if (acquire) return em28xx_set_mode(dev, EM28XX_DIGITAL_MODE); else return em28xx_set_mode(dev, EM28XX_SUSPEND); } /* ------------------------------------------------------------------ */ static struct lgdt330x_config em2880_lgdt3303_dev = { .demod_chip = LGDT3303, }; static struct lgdt3305_config em2870_lgdt3304_dev = { .i2c_addr = 0x0e, .demod_chip = LGDT3304, .spectral_inversion = 1, .deny_i2c_rptr = 1, .mpeg_mode = LGDT3305_MPEG_PARALLEL, .tpclk_edge = LGDT3305_TPCLK_FALLING_EDGE, .tpvalid_polarity = LGDT3305_TP_VALID_HIGH, .vsb_if_khz = 3250, .qam_if_khz = 4000, }; static struct lgdt3305_config em2874_lgdt3305_dev = { .i2c_addr = 0x0e, .demod_chip = LGDT3305, .spectral_inversion = 1, .deny_i2c_rptr = 0, .mpeg_mode = LGDT3305_MPEG_SERIAL, .tpclk_edge = LGDT3305_TPCLK_FALLING_EDGE, .tpvalid_polarity = LGDT3305_TP_VALID_HIGH, .vsb_if_khz = 3250, .qam_if_khz = 4000, }; static struct lgdt3305_config em2874_lgdt3305_nogate_dev = { .i2c_addr = 0x0e, .demod_chip = LGDT3305, .spectral_inversion = 1, .deny_i2c_rptr = 1, .mpeg_mode = LGDT3305_MPEG_SERIAL, .tpclk_edge = LGDT3305_TPCLK_FALLING_EDGE, .tpvalid_polarity = LGDT3305_TP_VALID_HIGH, .vsb_if_khz = 3600, .qam_if_khz = 3600, }; static struct s921_config sharp_isdbt = { .demod_address = 0x30 >> 1 }; static struct zl10353_config em28xx_zl10353_with_xc3028 = { .demod_address = (0x1e >> 1), .no_tuner = 1, .parallel_ts = 1, .if2 = 45600, }; static struct s5h1409_config em28xx_s5h1409_with_xc3028 = { .demod_address = 0x32 >> 1, .output_mode = S5H1409_PARALLEL_OUTPUT, .gpio = S5H1409_GPIO_OFF, .inversion = S5H1409_INVERSION_OFF, .status_mode = S5H1409_DEMODLOCKING, .mpeg_timing = S5H1409_MPEGTIMING_CONTINUOUS_NONINVERTING_CLOCK }; static struct tda18271_std_map kworld_a340_std_map = { .atsc_6 = { .if_freq = 3250, .agc_mode = 3, .std = 0, .if_lvl = 1, .rfagc_top = 0x37, }, .qam_6 = { .if_freq = 4000, .agc_mode = 3, .std = 1, .if_lvl = 1, .rfagc_top = 0x37, }, }; static struct tda18271_config kworld_a340_config = { .std_map = &kworld_a340_std_map, }; static struct tda18271_config kworld_ub435q_v2_config = { .std_map = &kworld_a340_std_map, .gate = TDA18271_GATE_DIGITAL, }; static struct tda18212_config kworld_ub435q_v3_config = { .if_atsc_vsb = 3600, .if_atsc_qam = 3600, }; static struct zl10353_config em28xx_zl10353_xc3028_no_i2c_gate = { .demod_address = (0x1e >> 1), .no_tuner = 1, .disable_i2c_gate_ctrl = 1, .parallel_ts = 1, .if2 = 45600, }; static struct drxd_config em28xx_drxd = { .demod_address = 0x70, .demod_revision = 0xa2, .pll_type = DRXD_PLL_NONE, .clock = 12000, .insert_rs_byte = 1, .IF = 42800000, .disable_i2c_gate_ctrl = 1, }; static struct drxk_config terratec_h5_drxk = { .adr = 0x29, .single_master = 1, .no_i2c_bridge = 1, .microcode_name = "dvb-usb-terratec-h5-drxk.fw", .qam_demod_parameter_count = 2, }; static struct drxk_config hauppauge_930c_drxk = { .adr = 0x29, .single_master = 1, .no_i2c_bridge = 1, .microcode_name = "dvb-usb-hauppauge-hvr930c-drxk.fw", .chunk_size = 56, .qam_demod_parameter_count = 2, }; static struct drxk_config terratec_htc_stick_drxk = { .adr = 0x29, .single_master = 1, .no_i2c_bridge = 1, .microcode_name = "dvb-usb-terratec-htc-stick-drxk.fw", .chunk_size = 54, .qam_demod_parameter_count = 2, /* Required for the antenna_gpio to disable LNA. */ .antenna_dvbt = true, /* The windows driver uses the same. This will disable LNA. */ .antenna_gpio = 0x6, }; static struct drxk_config maxmedia_ub425_tc_drxk = { .adr = 0x29, .single_master = 1, .no_i2c_bridge = 1, .microcode_name = "dvb-demod-drxk-01.fw", .chunk_size = 62, .qam_demod_parameter_count = 2, }; static struct drxk_config pctv_520e_drxk = { .adr = 0x29, .single_master = 1, .microcode_name = "dvb-demod-drxk-pctv.fw", .qam_demod_parameter_count = 2, .chunk_size = 58, .antenna_dvbt = true, /* disable LNA */ .antenna_gpio = (1 << 2), /* disable LNA */ }; static int drxk_gate_ctrl(struct dvb_frontend *fe, int enable) { struct em28xx_dvb *dvb = fe->sec_priv; int status; if (!dvb) return -EINVAL; if (enable) { down(&dvb->pll_mutex); status = dvb->gate_ctrl(fe, 1); } else { status = dvb->gate_ctrl(fe, 0); up(&dvb->pll_mutex); } return status; } static void hauppauge_hvr930c_init(struct em28xx *dev) { int i; static const struct em28xx_reg_seq hauppauge_hvr930c_init[] = { {EM2874_R80_GPIO_P0_CTRL, 0xff, 0xff, 0x65}, {EM2874_R80_GPIO_P0_CTRL, 0xfb, 0xff, 0x32}, {EM2874_R80_GPIO_P0_CTRL, 0xff, 0xff, 0xb8}, { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq hauppauge_hvr930c_end[] = { {EM2874_R80_GPIO_P0_CTRL, 0xef, 0xff, 0x01}, {EM2874_R80_GPIO_P0_CTRL, 0xaf, 0xff, 0x65}, {EM2874_R80_GPIO_P0_CTRL, 0xef, 0xff, 0x76}, {EM2874_R80_GPIO_P0_CTRL, 0xef, 0xff, 0x01}, {EM2874_R80_GPIO_P0_CTRL, 0xcf, 0xff, 0x0b}, {EM2874_R80_GPIO_P0_CTRL, 0xef, 0xff, 0x40}, {EM2874_R80_GPIO_P0_CTRL, 0xcf, 0xff, 0x65}, {EM2874_R80_GPIO_P0_CTRL, 0xef, 0xff, 0x65}, {EM2874_R80_GPIO_P0_CTRL, 0xcf, 0xff, 0x0b}, {EM2874_R80_GPIO_P0_CTRL, 0xef, 0xff, 0x65}, { -1, -1, -1, -1}, }; static const struct { unsigned char r[4]; int len; } regs[] = { {{ 0x06, 0x02, 0x00, 0x31 }, 4}, {{ 0x01, 0x02 }, 2}, {{ 0x01, 0x02, 0x00, 0xc6 }, 4}, {{ 0x01, 0x00 }, 2}, {{ 0x01, 0x00, 0xff, 0xaf }, 4}, {{ 0x01, 0x00, 0x03, 0xa0 }, 4}, {{ 0x01, 0x00 }, 2}, {{ 0x01, 0x00, 0x73, 0xaf }, 4}, {{ 0x04, 0x00 }, 2}, {{ 0x00, 0x04 }, 2}, {{ 0x00, 0x04, 0x00, 0x0a }, 4}, {{ 0x04, 0x14 }, 2}, {{ 0x04, 0x14, 0x00, 0x00 }, 4}, }; em28xx_gpio_set(dev, hauppauge_hvr930c_init); em28xx_write_reg(dev, EM28XX_R06_I2C_CLK, 0x40); usleep_range(10000, 11000); em28xx_write_reg(dev, EM28XX_R06_I2C_CLK, 0x44); usleep_range(10000, 11000); dev->i2c_client[dev->def_i2c_bus].addr = 0x82 >> 1; for (i = 0; i < ARRAY_SIZE(regs); i++) i2c_master_send(&dev->i2c_client[dev->def_i2c_bus], regs[i].r, regs[i].len); em28xx_gpio_set(dev, hauppauge_hvr930c_end); msleep(100); em28xx_write_reg(dev, EM28XX_R06_I2C_CLK, 0x44); msleep(30); em28xx_write_reg(dev, EM28XX_R06_I2C_CLK, 0x45); usleep_range(10000, 11000); } static void terratec_h5_init(struct em28xx *dev) { int i; static const struct em28xx_reg_seq terratec_h5_init[] = { {EM2820_R08_GPIO_CTRL, 0xff, 0xff, 10}, {EM2874_R80_GPIO_P0_CTRL, 0xf6, 0xff, 100}, {EM2874_R80_GPIO_P0_CTRL, 0xf2, 0xff, 50}, {EM2874_R80_GPIO_P0_CTRL, 0xf6, 0xff, 100}, { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq terratec_h5_end[] = { {EM2874_R80_GPIO_P0_CTRL, 0xe6, 0xff, 100}, {EM2874_R80_GPIO_P0_CTRL, 0xa6, 0xff, 50}, {EM2874_R80_GPIO_P0_CTRL, 0xe6, 0xff, 100}, { -1, -1, -1, -1}, }; static const struct { unsigned char r[4]; int len; } regs[] = { {{ 0x06, 0x02, 0x00, 0x31 }, 4}, {{ 0x01, 0x02 }, 2}, {{ 0x01, 0x02, 0x00, 0xc6 }, 4}, {{ 0x01, 0x00 }, 2}, {{ 0x01, 0x00, 0xff, 0xaf }, 4}, {{ 0x01, 0x00, 0x03, 0xa0 }, 4}, {{ 0x01, 0x00 }, 2}, {{ 0x01, 0x00, 0x73, 0xaf }, 4}, {{ 0x04, 0x00 }, 2}, {{ 0x00, 0x04 }, 2}, {{ 0x00, 0x04, 0x00, 0x0a }, 4}, {{ 0x04, 0x14 }, 2}, {{ 0x04, 0x14, 0x00, 0x00 }, 4}, }; em28xx_gpio_set(dev, terratec_h5_init); em28xx_write_reg(dev, EM28XX_R06_I2C_CLK, 0x40); usleep_range(10000, 11000); em28xx_write_reg(dev, EM28XX_R06_I2C_CLK, 0x45); usleep_range(10000, 11000); dev->i2c_client[dev->def_i2c_bus].addr = 0x82 >> 1; for (i = 0; i < ARRAY_SIZE(regs); i++) i2c_master_send(&dev->i2c_client[dev->def_i2c_bus], regs[i].r, regs[i].len); em28xx_gpio_set(dev, terratec_h5_end); }; static void terratec_htc_stick_init(struct em28xx *dev) { int i; /* * GPIO configuration: * 0xff: unknown (does not affect DVB-T). * 0xf6: DRX-K (demodulator). * 0xe6: unknown (does not affect DVB-T). * 0xb6: unknown (does not affect DVB-T). */ static const struct em28xx_reg_seq terratec_htc_stick_init[] = { {EM2820_R08_GPIO_CTRL, 0xff, 0xff, 10}, {EM2874_R80_GPIO_P0_CTRL, 0xf6, 0xff, 100}, {EM2874_R80_GPIO_P0_CTRL, 0xe6, 0xff, 50}, {EM2874_R80_GPIO_P0_CTRL, 0xf6, 0xff, 100}, { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq terratec_htc_stick_end[] = { {EM2874_R80_GPIO_P0_CTRL, 0xb6, 0xff, 100}, {EM2874_R80_GPIO_P0_CTRL, 0xf6, 0xff, 50}, { -1, -1, -1, -1}, }; /* * Init the analog decoder (not yet supported), but * it's probably still a good idea. */ static const struct { unsigned char r[4]; int len; } regs[] = { {{ 0x06, 0x02, 0x00, 0x31 }, 4}, {{ 0x01, 0x02 }, 2}, {{ 0x01, 0x02, 0x00, 0xc6 }, 4}, {{ 0x01, 0x00 }, 2}, {{ 0x01, 0x00, 0xff, 0xaf }, 4}, }; em28xx_gpio_set(dev, terratec_htc_stick_init); em28xx_write_reg(dev, EM28XX_R06_I2C_CLK, 0x40); usleep_range(10000, 11000); em28xx_write_reg(dev, EM28XX_R06_I2C_CLK, 0x44); usleep_range(10000, 11000); dev->i2c_client[dev->def_i2c_bus].addr = 0x82 >> 1; for (i = 0; i < ARRAY_SIZE(regs); i++) i2c_master_send(&dev->i2c_client[dev->def_i2c_bus], regs[i].r, regs[i].len); em28xx_gpio_set(dev, terratec_htc_stick_end); }; static void terratec_htc_usb_xs_init(struct em28xx *dev) { int i; static const struct em28xx_reg_seq terratec_htc_usb_xs_init[] = { {EM2820_R08_GPIO_CTRL, 0xff, 0xff, 10}, {EM2874_R80_GPIO_P0_CTRL, 0xb2, 0xff, 100}, {EM2874_R80_GPIO_P0_CTRL, 0xb2, 0xff, 50}, {EM2874_R80_GPIO_P0_CTRL, 0xb6, 0xff, 100}, { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq terratec_htc_usb_xs_end[] = { {EM2874_R80_GPIO_P0_CTRL, 0xa6, 0xff, 100}, {EM2874_R80_GPIO_P0_CTRL, 0xa6, 0xff, 50}, {EM2874_R80_GPIO_P0_CTRL, 0xe6, 0xff, 100}, { -1, -1, -1, -1}, }; /* * Init the analog decoder (not yet supported), but * it's probably still a good idea. */ static const struct { unsigned char r[4]; int len; } regs[] = { {{ 0x06, 0x02, 0x00, 0x31 }, 4}, {{ 0x01, 0x02 }, 2}, {{ 0x01, 0x02, 0x00, 0xc6 }, 4}, {{ 0x01, 0x00 }, 2}, {{ 0x01, 0x00, 0xff, 0xaf }, 4}, {{ 0x01, 0x00, 0x03, 0xa0 }, 4}, {{ 0x01, 0x00 }, 2}, {{ 0x01, 0x00, 0x73, 0xaf }, 4}, {{ 0x04, 0x00 }, 2}, {{ 0x00, 0x04 }, 2}, {{ 0x00, 0x04, 0x00, 0x0a }, 4}, {{ 0x04, 0x14 }, 2}, {{ 0x04, 0x14, 0x00, 0x00 }, 4}, }; em28xx_write_reg(dev, EM28XX_R06_I2C_CLK, 0x40); em28xx_gpio_set(dev, terratec_htc_usb_xs_init); em28xx_write_reg(dev, EM28XX_R06_I2C_CLK, 0x40); usleep_range(10000, 11000); em28xx_write_reg(dev, EM28XX_R06_I2C_CLK, 0x44); usleep_range(10000, 11000); dev->i2c_client[dev->def_i2c_bus].addr = 0x82 >> 1; for (i = 0; i < ARRAY_SIZE(regs); i++) i2c_master_send(&dev->i2c_client[dev->def_i2c_bus], regs[i].r, regs[i].len); em28xx_gpio_set(dev, terratec_htc_usb_xs_end); }; static void pctv_520e_init(struct em28xx *dev) { /* * Init AVF4910B analog decoder. Looks like I2C traffic to * digital demodulator and tuner are routed via AVF4910B. */ int i; static const struct { unsigned char r[4]; int len; } regs[] = { {{ 0x06, 0x02, 0x00, 0x31 }, 4}, {{ 0x01, 0x02 }, 2}, {{ 0x01, 0x02, 0x00, 0xc6 }, 4}, {{ 0x01, 0x00 }, 2}, {{ 0x01, 0x00, 0xff, 0xaf }, 4}, {{ 0x01, 0x00, 0x03, 0xa0 }, 4}, {{ 0x01, 0x00 }, 2}, {{ 0x01, 0x00, 0x73, 0xaf }, 4}, }; dev->i2c_client[dev->def_i2c_bus].addr = 0x82 >> 1; /* 0x41 */ for (i = 0; i < ARRAY_SIZE(regs); i++) i2c_master_send(&dev->i2c_client[dev->def_i2c_bus], regs[i].r, regs[i].len); }; static int em28xx_pctv_290e_set_lna(struct dvb_frontend *fe) { struct dtv_frontend_properties *c = &fe->dtv_property_cache; struct em28xx_i2c_bus *i2c_bus = fe->dvb->priv; struct em28xx *dev = i2c_bus->dev; #ifdef CONFIG_GPIOLIB struct em28xx_dvb *dvb = dev->dvb; int ret; unsigned long flags; if (c->lna == 1) flags = GPIOF_OUT_INIT_HIGH; /* enable LNA */ else flags = GPIOF_OUT_INIT_LOW; /* disable LNA */ ret = gpio_request_one(dvb->lna_gpio, flags, NULL); if (ret) dev_err(&dev->intf->dev, "gpio request failed %d\n", ret); else gpio_free(dvb->lna_gpio); return ret; #else dev_warn(&dev->intf->dev, "%s: LNA control is disabled (lna=%u)\n", KBUILD_MODNAME, c->lna); return 0; #endif } static int em28xx_pctv_292e_set_lna(struct dvb_frontend *fe) { struct dtv_frontend_properties *c = &fe->dtv_property_cache; struct em28xx_i2c_bus *i2c_bus = fe->dvb->priv; struct em28xx *dev = i2c_bus->dev; u8 lna; if (c->lna == 1) lna = 0x01; else lna = 0x00; return em28xx_write_reg_bits(dev, EM2874_R80_GPIO_P0_CTRL, lna, 0x01); } static int em28xx_mt352_terratec_xs_init(struct dvb_frontend *fe) { /* Values extracted from a USB trace of the Terratec Windows driver */ static u8 clock_config[] = { CLOCK_CTL, 0x38, 0x2c }; static u8 reset[] = { RESET, 0x80 }; static u8 adc_ctl_1_cfg[] = { ADC_CTL_1, 0x40 }; static u8 agc_cfg[] = { AGC_TARGET, 0x28, 0xa0 }; static u8 input_freq_cfg[] = { INPUT_FREQ_1, 0x31, 0xb8 }; static u8 rs_err_cfg[] = { RS_ERR_PER_1, 0x00, 0x4d }; static u8 capt_range_cfg[] = { CAPT_RANGE, 0x32 }; static u8 trl_nom_cfg[] = { TRL_NOMINAL_RATE_1, 0x64, 0x00 }; static u8 tps_given_cfg[] = { TPS_GIVEN_1, 0x40, 0x80, 0x50 }; static u8 tuner_go[] = { TUNER_GO, 0x01}; mt352_write(fe, clock_config, sizeof(clock_config)); usleep_range(200, 250); mt352_write(fe, reset, sizeof(reset)); mt352_write(fe, adc_ctl_1_cfg, sizeof(adc_ctl_1_cfg)); mt352_write(fe, agc_cfg, sizeof(agc_cfg)); mt352_write(fe, input_freq_cfg, sizeof(input_freq_cfg)); mt352_write(fe, rs_err_cfg, sizeof(rs_err_cfg)); mt352_write(fe, capt_range_cfg, sizeof(capt_range_cfg)); mt352_write(fe, trl_nom_cfg, sizeof(trl_nom_cfg)); mt352_write(fe, tps_given_cfg, sizeof(tps_given_cfg)); mt352_write(fe, tuner_go, sizeof(tuner_go)); return 0; } static void px_bcud_init(struct em28xx *dev) { int i; static const struct { unsigned char r[4]; int len; } regs1[] = { {{ 0x0e, 0x77 }, 2}, {{ 0x0f, 0x77 }, 2}, {{ 0x03, 0x90 }, 2}, }, regs2[] = { {{ 0x07, 0x01 }, 2}, {{ 0x08, 0x10 }, 2}, {{ 0x13, 0x00 }, 2}, {{ 0x17, 0x00 }, 2}, {{ 0x03, 0x01 }, 2}, {{ 0x10, 0xb1 }, 2}, {{ 0x11, 0x40 }, 2}, {{ 0x85, 0x7a }, 2}, {{ 0x87, 0x04 }, 2}, }; static const struct em28xx_reg_seq gpio[] = { {EM28XX_R06_I2C_CLK, 0x40, 0xff, 300}, {EM2874_R80_GPIO_P0_CTRL, 0xfd, 0xff, 60}, {EM28XX_R15_RGAIN, 0x20, 0xff, 0}, {EM28XX_R16_GGAIN, 0x20, 0xff, 0}, {EM28XX_R17_BGAIN, 0x20, 0xff, 0}, {EM28XX_R18_ROFFSET, 0x00, 0xff, 0}, {EM28XX_R19_GOFFSET, 0x00, 0xff, 0}, {EM28XX_R1A_BOFFSET, 0x00, 0xff, 0}, {EM28XX_R23_UOFFSET, 0x00, 0xff, 0}, {EM28XX_R24_VOFFSET, 0x00, 0xff, 0}, {EM28XX_R26_COMPR, 0x00, 0xff, 0}, {0x13, 0x08, 0xff, 0}, {EM28XX_R12_VINENABLE, 0x27, 0xff, 0}, {EM28XX_R0C_USBSUSP, 0x10, 0xff, 0}, {EM28XX_R27_OUTFMT, 0x00, 0xff, 0}, {EM28XX_R10_VINMODE, 0x00, 0xff, 0}, {EM28XX_R11_VINCTRL, 0x11, 0xff, 0}, {EM2874_R50_IR_CONFIG, 0x01, 0xff, 0}, {EM2874_R5F_TS_ENABLE, 0x80, 0xff, 0}, {EM28XX_R06_I2C_CLK, 0x46, 0xff, 0}, }; em28xx_write_reg(dev, EM28XX_R06_I2C_CLK, 0x46); /* sleeping ISDB-T */ dev->dvb->i2c_client_demod->addr = 0x14; for (i = 0; i < ARRAY_SIZE(regs1); i++) i2c_master_send(dev->dvb->i2c_client_demod, regs1[i].r, regs1[i].len); /* sleeping ISDB-S */ dev->dvb->i2c_client_demod->addr = 0x15; for (i = 0; i < ARRAY_SIZE(regs2); i++) i2c_master_send(dev->dvb->i2c_client_demod, regs2[i].r, regs2[i].len); for (i = 0; i < ARRAY_SIZE(gpio); i++) { em28xx_write_reg_bits(dev, gpio[i].reg, gpio[i].val, gpio[i].mask); if (gpio[i].sleep > 0) msleep(gpio[i].sleep); } }; static struct mt352_config terratec_xs_mt352_cfg = { .demod_address = (0x1e >> 1), .no_tuner = 1, .if2 = 45600, .demod_init = em28xx_mt352_terratec_xs_init, }; static struct tda10023_config em28xx_tda10023_config = { .demod_address = 0x0c, .invert = 1, }; static struct cxd2820r_config em28xx_cxd2820r_config = { .i2c_address = (0xd8 >> 1), .ts_mode = CXD2820R_TS_SERIAL, }; static struct tda18271_config em28xx_cxd2820r_tda18271_config = { .output_opt = TDA18271_OUTPUT_LT_OFF, .gate = TDA18271_GATE_DIGITAL, }; static struct zl10353_config em28xx_zl10353_no_i2c_gate_dev = { .demod_address = (0x1e >> 1), .disable_i2c_gate_ctrl = 1, .no_tuner = 1, .parallel_ts = 1, }; static struct mt2060_config em28xx_mt2060_config = { .i2c_address = 0x60, }; static struct qt1010_config em28xx_qt1010_config = { .i2c_address = 0x62 }; static const struct mb86a20s_config c3tech_duo_mb86a20s_config = { .demod_address = 0x10, .is_serial = true, }; static struct tda18271_std_map mb86a20s_tda18271_config = { .dvbt_6 = { .if_freq = 4000, .agc_mode = 3, .std = 4, .if_lvl = 1, .rfagc_top = 0x37, }, }; static struct tda18271_config c3tech_duo_tda18271_config = { .std_map = &mb86a20s_tda18271_config, .gate = TDA18271_GATE_DIGITAL, .small_i2c = TDA18271_03_BYTE_CHUNK_INIT, }; static struct tda18271_std_map drx_j_std_map = { .atsc_6 = { .if_freq = 5000, .agc_mode = 3, .std = 0, .if_lvl = 1, .rfagc_top = 0x37, }, .qam_6 = { .if_freq = 5380, .agc_mode = 3, .std = 3, .if_lvl = 1, .rfagc_top = 0x37, }, }; static struct tda18271_config pinnacle_80e_dvb_config = { .std_map = &drx_j_std_map, .gate = TDA18271_GATE_DIGITAL, .role = TDA18271_MASTER, }; static struct lgdt3306a_config hauppauge_01595_lgdt3306a_config = { .qam_if_khz = 4000, .vsb_if_khz = 3250, .spectral_inversion = 0, .deny_i2c_rptr = 0, .mpeg_mode = LGDT3306A_MPEG_SERIAL, .tpclk_edge = LGDT3306A_TPCLK_RISING_EDGE, .tpvalid_polarity = LGDT3306A_TP_VALID_HIGH, .xtalMHz = 25, }; /* ------------------------------------------------------------------ */ static noinline_for_stack int em28xx_attach_xc3028(u8 addr, struct em28xx *dev) { struct dvb_frontend *fe; struct xc2028_config cfg; struct xc2028_ctrl ctl; memset(&cfg, 0, sizeof(cfg)); cfg.i2c_adap = &dev->i2c_adap[dev->def_i2c_bus]; cfg.i2c_addr = addr; memset(&ctl, 0, sizeof(ctl)); em28xx_setup_xc3028(dev, &ctl); cfg.ctrl = &ctl; if (!dev->dvb->fe[0]) { dev_err(&dev->intf->dev, "dvb frontend not attached. Can't attach xc3028\n"); return -EINVAL; } fe = dvb_attach(xc2028_attach, dev->dvb->fe[0], &cfg); if (!fe) { dev_err(&dev->intf->dev, "xc3028 attach failed\n"); dvb_frontend_detach(dev->dvb->fe[0]); dev->dvb->fe[0] = NULL; return -EINVAL; } dev_info(&dev->intf->dev, "xc3028 attached\n"); return 0; } /* ------------------------------------------------------------------ */ static int em28xx_register_dvb(struct em28xx_dvb *dvb, struct module *module, struct em28xx *dev, struct device *device) { int result; bool create_rf_connector = false; mutex_init(&dvb->lock); /* register adapter */ result = dvb_register_adapter(&dvb->adapter, dev_name(&dev->intf->dev), module, device, adapter_nr); if (result < 0) { dev_warn(&dev->intf->dev, "dvb_register_adapter failed (errno = %d)\n", result); goto fail_adapter; } #ifdef CONFIG_MEDIA_CONTROLLER_DVB dvb->adapter.mdev = dev->media_dev; #endif /* Ensure all frontends negotiate bus access */ dvb->fe[0]->ops.ts_bus_ctrl = em28xx_dvb_bus_ctrl; if (dvb->fe[1]) dvb->fe[1]->ops.ts_bus_ctrl = em28xx_dvb_bus_ctrl; dvb->adapter.priv = &dev->i2c_bus[dev->def_i2c_bus]; /* register frontend */ result = dvb_register_frontend(&dvb->adapter, dvb->fe[0]); if (result < 0) { dev_warn(&dev->intf->dev, "dvb_register_frontend failed (errno = %d)\n", result); goto fail_frontend0; } /* register 2nd frontend */ if (dvb->fe[1]) { result = dvb_register_frontend(&dvb->adapter, dvb->fe[1]); if (result < 0) { dev_warn(&dev->intf->dev, "2nd dvb_register_frontend failed (errno = %d)\n", result); goto fail_frontend1; } } /* register demux stuff */ dvb->demux.dmx.capabilities = DMX_TS_FILTERING | DMX_SECTION_FILTERING | DMX_MEMORY_BASED_FILTERING; dvb->demux.priv = dvb; dvb->demux.filternum = 256; dvb->demux.feednum = 256; dvb->demux.start_feed = em28xx_start_feed; dvb->demux.stop_feed = em28xx_stop_feed; result = dvb_dmx_init(&dvb->demux); if (result < 0) { dev_warn(&dev->intf->dev, "dvb_dmx_init failed (errno = %d)\n", result); goto fail_dmx; } dvb->dmxdev.filternum = 256; dvb->dmxdev.demux = &dvb->demux.dmx; dvb->dmxdev.capabilities = 0; result = dvb_dmxdev_init(&dvb->dmxdev, &dvb->adapter); if (result < 0) { dev_warn(&dev->intf->dev, "dvb_dmxdev_init failed (errno = %d)\n", result); goto fail_dmxdev; } dvb->fe_hw.source = DMX_FRONTEND_0; result = dvb->demux.dmx.add_frontend(&dvb->demux.dmx, &dvb->fe_hw); if (result < 0) { dev_warn(&dev->intf->dev, "add_frontend failed (DMX_FRONTEND_0, errno = %d)\n", result); goto fail_fe_hw; } dvb->fe_mem.source = DMX_MEMORY_FE; result = dvb->demux.dmx.add_frontend(&dvb->demux.dmx, &dvb->fe_mem); if (result < 0) { dev_warn(&dev->intf->dev, "add_frontend failed (DMX_MEMORY_FE, errno = %d)\n", result); goto fail_fe_mem; } result = dvb->demux.dmx.connect_frontend(&dvb->demux.dmx, &dvb->fe_hw); if (result < 0) { dev_warn(&dev->intf->dev, "connect_frontend failed (errno = %d)\n", result); goto fail_fe_conn; } /* register network adapter */ dvb_net_init(&dvb->adapter, &dvb->net, &dvb->demux.dmx); /* If the analog part won't create RF connectors, DVB will do it */ if (!dev->has_video || dev->tuner_type == TUNER_ABSENT) create_rf_connector = true; result = dvb_create_media_graph(&dvb->adapter, create_rf_connector); if (result < 0) goto fail_create_graph; return 0; fail_create_graph: dvb_net_release(&dvb->net); fail_fe_conn: dvb->demux.dmx.remove_frontend(&dvb->demux.dmx, &dvb->fe_mem); fail_fe_mem: dvb->demux.dmx.remove_frontend(&dvb->demux.dmx, &dvb->fe_hw); fail_fe_hw: dvb_dmxdev_release(&dvb->dmxdev); fail_dmxdev: dvb_dmx_release(&dvb->demux); fail_dmx: if (dvb->fe[1]) dvb_unregister_frontend(dvb->fe[1]); dvb_unregister_frontend(dvb->fe[0]); fail_frontend1: if (dvb->fe[1]) dvb_frontend_detach(dvb->fe[1]); fail_frontend0: dvb_frontend_detach(dvb->fe[0]); dvb_unregister_adapter(&dvb->adapter); fail_adapter: return result; } static void em28xx_unregister_dvb(struct em28xx_dvb *dvb) { dvb_net_release(&dvb->net); dvb->demux.dmx.remove_frontend(&dvb->demux.dmx, &dvb->fe_mem); dvb->demux.dmx.remove_frontend(&dvb->demux.dmx, &dvb->fe_hw); dvb_dmxdev_release(&dvb->dmxdev); dvb_dmx_release(&dvb->demux); if (dvb->fe[1]) dvb_unregister_frontend(dvb->fe[1]); dvb_unregister_frontend(dvb->fe[0]); if (dvb->fe[1] && !dvb->dont_attach_fe1) dvb_frontend_detach(dvb->fe[1]); dvb_frontend_detach(dvb->fe[0]); dvb_unregister_adapter(&dvb->adapter); } static int em28174_dvb_init_pctv_460e(struct em28xx *dev) { struct em28xx_dvb *dvb = dev->dvb; struct tda10071_platform_data tda10071_pdata = {}; struct a8293_platform_data a8293_pdata = {}; /* attach demod + tuner combo */ tda10071_pdata.clk = 40444000; /* 40.444 MHz */ tda10071_pdata.i2c_wr_max = 64; tda10071_pdata.ts_mode = TDA10071_TS_SERIAL; tda10071_pdata.pll_multiplier = 20; tda10071_pdata.tuner_i2c_addr = 0x14; dvb->i2c_client_demod = dvb_module_probe("tda10071", "tda10071_cx24118", &dev->i2c_adap[dev->def_i2c_bus], 0x55, &tda10071_pdata); if (!dvb->i2c_client_demod) return -ENODEV; dvb->fe[0] = tda10071_pdata.get_dvb_frontend(dvb->i2c_client_demod); /* attach SEC */ a8293_pdata.dvb_frontend = dvb->fe[0]; dvb->i2c_client_sec = dvb_module_probe("a8293", NULL, &dev->i2c_adap[dev->def_i2c_bus], 0x08, &a8293_pdata); if (!dvb->i2c_client_sec) { dvb_module_release(dvb->i2c_client_demod); return -ENODEV; } return 0; } static int em28178_dvb_init_pctv_461e(struct em28xx *dev) { struct em28xx_dvb *dvb = dev->dvb; struct i2c_adapter *i2c_adapter; struct m88ds3103_platform_data m88ds3103_pdata = {}; struct ts2020_config ts2020_config = {}; struct a8293_platform_data a8293_pdata = {}; /* attach demod */ m88ds3103_pdata.clk = 27000000; m88ds3103_pdata.i2c_wr_max = 33; m88ds3103_pdata.ts_mode = M88DS3103_TS_PARALLEL; m88ds3103_pdata.ts_clk = 16000; m88ds3103_pdata.ts_clk_pol = 1; m88ds3103_pdata.agc = 0x99; dvb->i2c_client_demod = dvb_module_probe("m88ds3103", NULL, &dev->i2c_adap[dev->def_i2c_bus], 0x68, &m88ds3103_pdata); if (!dvb->i2c_client_demod) return -ENODEV; dvb->fe[0] = m88ds3103_pdata.get_dvb_frontend(dvb->i2c_client_demod); i2c_adapter = m88ds3103_pdata.get_i2c_adapter(dvb->i2c_client_demod); /* attach tuner */ ts2020_config.fe = dvb->fe[0]; dvb->i2c_client_tuner = dvb_module_probe("ts2020", "ts2022", i2c_adapter, 0x60, &ts2020_config); if (!dvb->i2c_client_tuner) { dvb_module_release(dvb->i2c_client_demod); return -ENODEV; } /* delegate signal strength measurement to tuner */ dvb->fe[0]->ops.read_signal_strength = dvb->fe[0]->ops.tuner_ops.get_rf_strength; /* attach SEC */ a8293_pdata.dvb_frontend = dvb->fe[0]; /* * 461e has a tendency to have vIN undervoltage troubles. * Slew mitigates this. */ a8293_pdata.volt_slew_nanos_per_mv = 20; dvb->i2c_client_sec = dvb_module_probe("a8293", NULL, &dev->i2c_adap[dev->def_i2c_bus], 0x08, &a8293_pdata); if (!dvb->i2c_client_sec) { dvb_module_release(dvb->i2c_client_tuner); dvb_module_release(dvb->i2c_client_demod); return -ENODEV; } return 0; } static int em28178_dvb_init_pctv_461e_v2(struct em28xx *dev) { struct em28xx_dvb *dvb = dev->dvb; struct i2c_adapter *i2c_adapter; struct m88ds3103_platform_data m88ds3103_pdata = {}; struct ts2020_config ts2020_config = {}; struct a8293_platform_data a8293_pdata = {}; /* attach demod */ m88ds3103_pdata.clk = 27000000; m88ds3103_pdata.i2c_wr_max = 33; m88ds3103_pdata.ts_mode = M88DS3103_TS_PARALLEL; m88ds3103_pdata.ts_clk = 16000; m88ds3103_pdata.ts_clk_pol = 0; m88ds3103_pdata.agc = 0x99; m88ds3103_pdata.agc_inv = 0; m88ds3103_pdata.spec_inv = 0; dvb->i2c_client_demod = dvb_module_probe("m88ds3103", "m88ds3103b", &dev->i2c_adap[dev->def_i2c_bus], 0x6a, &m88ds3103_pdata); if (!dvb->i2c_client_demod) return -ENODEV; dvb->fe[0] = m88ds3103_pdata.get_dvb_frontend(dvb->i2c_client_demod); i2c_adapter = m88ds3103_pdata.get_i2c_adapter(dvb->i2c_client_demod); /* attach tuner */ ts2020_config.fe = dvb->fe[0]; dvb->i2c_client_tuner = dvb_module_probe("ts2020", "ts2022", i2c_adapter, 0x60, &ts2020_config); if (!dvb->i2c_client_tuner) { dvb_module_release(dvb->i2c_client_demod); return -ENODEV; } /* delegate signal strength measurement to tuner */ dvb->fe[0]->ops.read_signal_strength = dvb->fe[0]->ops.tuner_ops.get_rf_strength; /* attach SEC */ a8293_pdata.dvb_frontend = dvb->fe[0]; dvb->i2c_client_sec = dvb_module_probe("a8293", NULL, &dev->i2c_adap[dev->def_i2c_bus], 0x08, &a8293_pdata); if (!dvb->i2c_client_sec) { dvb_module_release(dvb->i2c_client_tuner); dvb_module_release(dvb->i2c_client_demod); return -ENODEV; } return 0; } static int em28178_dvb_init_pctv_292e(struct em28xx *dev) { struct em28xx_dvb *dvb = dev->dvb; struct i2c_adapter *adapter; struct si2168_config si2168_config = {}; struct si2157_config si2157_config = {}; /* attach demod */ si2168_config.i2c_adapter = &adapter; si2168_config.fe = &dvb->fe[0]; si2168_config.ts_mode = SI2168_TS_PARALLEL; si2168_config.spectral_inversion = true; dvb->i2c_client_demod = dvb_module_probe("si2168", NULL, &dev->i2c_adap[dev->def_i2c_bus], 0x64, &si2168_config); if (!dvb->i2c_client_demod) return -ENODEV; /* attach tuner */ si2157_config.fe = dvb->fe[0]; si2157_config.if_port = 1; #ifdef CONFIG_MEDIA_CONTROLLER_DVB si2157_config.mdev = dev->media_dev; #endif dvb->i2c_client_tuner = dvb_module_probe("si2157", NULL, adapter, 0x60, &si2157_config); if (!dvb->i2c_client_tuner) { dvb_module_release(dvb->i2c_client_demod); return -ENODEV; } dvb->fe[0]->ops.set_lna = em28xx_pctv_292e_set_lna; return 0; } static int em28178_dvb_init_terratec_t2_stick_hd(struct em28xx *dev) { struct em28xx_dvb *dvb = dev->dvb; struct i2c_adapter *adapter; struct si2168_config si2168_config = {}; struct si2157_config si2157_config = {}; /* attach demod */ si2168_config.i2c_adapter = &adapter; si2168_config.fe = &dvb->fe[0]; si2168_config.ts_mode = SI2168_TS_PARALLEL; dvb->i2c_client_demod = dvb_module_probe("si2168", NULL, &dev->i2c_adap[dev->def_i2c_bus], 0x64, &si2168_config); if (!dvb->i2c_client_demod) return -ENODEV; /* attach tuner */ memset(&si2157_config, 0, sizeof(si2157_config)); si2157_config.fe = dvb->fe[0]; si2157_config.if_port = 0; #ifdef CONFIG_MEDIA_CONTROLLER_DVB si2157_config.mdev = dev->media_dev; #endif dvb->i2c_client_tuner = dvb_module_probe("si2157", "si2146", adapter, 0x60, &si2157_config); if (!dvb->i2c_client_tuner) { dvb_module_release(dvb->i2c_client_demod); return -ENODEV; } return 0; } static int em28178_dvb_init_plex_px_bcud(struct em28xx *dev) { struct em28xx_dvb *dvb = dev->dvb; struct tc90522_config tc90522_config = {}; struct qm1d1c0042_config qm1d1c0042_config = {}; /* attach demod */ dvb->i2c_client_demod = dvb_module_probe("tc90522", "tc90522sat", &dev->i2c_adap[dev->def_i2c_bus], 0x15, &tc90522_config); if (!dvb->i2c_client_demod) return -ENODEV; /* attach tuner */ qm1d1c0042_config.fe = tc90522_config.fe; qm1d1c0042_config.lpf = 1; dvb->i2c_client_tuner = dvb_module_probe("qm1d1c0042", NULL, tc90522_config.tuner_i2c, 0x61, &qm1d1c0042_config); if (!dvb->i2c_client_tuner) { dvb_module_release(dvb->i2c_client_demod); return -ENODEV; } dvb->fe[0] = tc90522_config.fe; px_bcud_init(dev); return 0; } static int em28174_dvb_init_hauppauge_wintv_dualhd_dvb(struct em28xx *dev) { struct em28xx_dvb *dvb = dev->dvb; struct i2c_adapter *adapter; struct si2168_config si2168_config = {}; struct si2157_config si2157_config = {}; unsigned char addr; /* attach demod */ si2168_config.i2c_adapter = &adapter; si2168_config.fe = &dvb->fe[0]; si2168_config.ts_mode = SI2168_TS_SERIAL; si2168_config.spectral_inversion = true; addr = (dev->ts == PRIMARY_TS) ? 0x64 : 0x67; dvb->i2c_client_demod = dvb_module_probe("si2168", NULL, &dev->i2c_adap[dev->def_i2c_bus], addr, &si2168_config); if (!dvb->i2c_client_demod) return -ENODEV; /* attach tuner */ memset(&si2157_config, 0, sizeof(si2157_config)); si2157_config.fe = dvb->fe[0]; si2157_config.if_port = 1; #ifdef CONFIG_MEDIA_CONTROLLER_DVB si2157_config.mdev = dev->media_dev; #endif addr = (dev->ts == PRIMARY_TS) ? 0x60 : 0x63; dvb->i2c_client_tuner = dvb_module_probe("si2157", NULL, adapter, addr, &si2157_config); if (!dvb->i2c_client_tuner) { dvb_module_release(dvb->i2c_client_demod); return -ENODEV; } return 0; } static int em28174_dvb_init_hauppauge_wintv_dualhd_01595(struct em28xx *dev) { struct em28xx_dvb *dvb = dev->dvb; struct i2c_adapter *adapter; struct lgdt3306a_config lgdt3306a_config = {}; struct si2157_config si2157_config = {}; unsigned char addr; /* attach demod */ lgdt3306a_config = hauppauge_01595_lgdt3306a_config; lgdt3306a_config.fe = &dvb->fe[0]; lgdt3306a_config.i2c_adapter = &adapter; addr = (dev->ts == PRIMARY_TS) ? 0x59 : 0x0e; dvb->i2c_client_demod = dvb_module_probe("lgdt3306a", NULL, &dev->i2c_adap[dev->def_i2c_bus], addr, &lgdt3306a_config); if (!dvb->i2c_client_demod) return -ENODEV; /* attach tuner */ si2157_config.fe = dvb->fe[0]; si2157_config.if_port = 1; si2157_config.inversion = 1; #ifdef CONFIG_MEDIA_CONTROLLER_DVB si2157_config.mdev = dev->media_dev; #endif addr = (dev->ts == PRIMARY_TS) ? 0x60 : 0x62; dvb->i2c_client_tuner = dvb_module_probe("si2157", NULL, adapter, addr, &si2157_config); if (!dvb->i2c_client_tuner) { dvb_module_release(dvb->i2c_client_demod); return -ENODEV; } return 0; } static int em2874_dvb_init_hauppauge_usb_quadhd(struct em28xx *dev) { struct em28xx_dvb *dvb = dev->dvb; struct mxl692_config mxl692_config = {}; unsigned char addr; /* attach demod/tuner combo */ mxl692_config.id = (dev->ts == PRIMARY_TS) ? 0 : 1; mxl692_config.fe = &dvb->fe[0]; addr = (dev->ts == PRIMARY_TS) ? 0x60 : 0x63; dvb->i2c_client_demod = dvb_module_probe("mxl692", NULL, &dev->i2c_adap[dev->def_i2c_bus], addr, &mxl692_config); if (!dvb->i2c_client_demod) return -ENODEV; return 0; } static int em28xx_dvb_init(struct em28xx *dev) { int result = 0, dvb_alt = 0; struct em28xx_dvb *dvb; struct usb_device *udev; if (dev->is_audio_only) { /* Shouldn't initialize IR for this interface */ return 0; } if (!dev->board.has_dvb) { /* This device does not support the extension */ return 0; } dev_info(&dev->intf->dev, "Binding DVB extension\n"); dvb = kzalloc(sizeof(*dvb), GFP_KERNEL); if (!dvb) return -ENOMEM; dev->dvb = dvb; dvb->fe[0] = NULL; dvb->fe[1] = NULL; /* pre-allocate DVB usb transfer buffers */ if (dev->dvb_xfer_bulk) { result = em28xx_alloc_urbs(dev, EM28XX_DIGITAL_MODE, dev->dvb_xfer_bulk, EM28XX_DVB_NUM_BUFS, 512, EM28XX_DVB_BULK_PACKET_MULTIPLIER); } else { result = em28xx_alloc_urbs(dev, EM28XX_DIGITAL_MODE, dev->dvb_xfer_bulk, EM28XX_DVB_NUM_BUFS, dev->dvb_max_pkt_size_isoc, EM28XX_DVB_NUM_ISOC_PACKETS); } if (result) { dev_err(&dev->intf->dev, "failed to pre-allocate USB transfer buffers for DVB.\n"); kfree(dvb); dev->dvb = NULL; return result; } mutex_lock(&dev->lock); em28xx_set_mode(dev, EM28XX_DIGITAL_MODE); /* init frontend */ switch (dev->model) { case EM2874_BOARD_LEADERSHIP_ISDBT: dvb->fe[0] = dvb_attach(s921_attach, &sharp_isdbt, &dev->i2c_adap[dev->def_i2c_bus]); if (!dvb->fe[0]) { result = -EINVAL; goto out_free; } break; case EM2883_BOARD_HAUPPAUGE_WINTV_HVR_850: case EM2883_BOARD_HAUPPAUGE_WINTV_HVR_950: case EM2880_BOARD_PINNACLE_PCTV_HD_PRO: case EM2880_BOARD_AMD_ATI_TV_WONDER_HD_600: dvb->fe[0] = dvb_attach(lgdt330x_attach, &em2880_lgdt3303_dev, 0x0e, &dev->i2c_adap[dev->def_i2c_bus]); if (em28xx_attach_xc3028(0x61, dev) < 0) { result = -EINVAL; goto out_free; } break; case EM2880_BOARD_KWORLD_DVB_310U: dvb->fe[0] = dvb_attach(zl10353_attach, &em28xx_zl10353_with_xc3028, &dev->i2c_adap[dev->def_i2c_bus]); if (em28xx_attach_xc3028(0x61, dev) < 0) { result = -EINVAL; goto out_free; } break; case EM2880_BOARD_HAUPPAUGE_WINTV_HVR_900: case EM2882_BOARD_TERRATEC_HYBRID_XS: case EM2880_BOARD_EMPIRE_DUAL_TV: case EM2882_BOARD_ZOLID_HYBRID_TV_STICK: dvb->fe[0] = dvb_attach(zl10353_attach, &em28xx_zl10353_xc3028_no_i2c_gate, &dev->i2c_adap[dev->def_i2c_bus]); if (em28xx_attach_xc3028(0x61, dev) < 0) { result = -EINVAL; goto out_free; } break; case EM2880_BOARD_TERRATEC_HYBRID_XS: case EM2880_BOARD_TERRATEC_HYBRID_XS_FR: case EM2881_BOARD_PINNACLE_HYBRID_PRO: case EM2882_BOARD_DIKOM_DK300: case EM2882_BOARD_KWORLD_VS_DVBT: /* * Those boards could have either a zl10353 or a mt352. * If the chip id isn't for zl10353, try mt352. */ dvb->fe[0] = dvb_attach(zl10353_attach, &em28xx_zl10353_xc3028_no_i2c_gate, &dev->i2c_adap[dev->def_i2c_bus]); if (!dvb->fe[0]) dvb->fe[0] = dvb_attach(mt352_attach, &terratec_xs_mt352_cfg, &dev->i2c_adap[dev->def_i2c_bus]); if (em28xx_attach_xc3028(0x61, dev) < 0) { result = -EINVAL; goto out_free; } break; case EM2870_BOARD_TERRATEC_XS_MT2060: dvb->fe[0] = dvb_attach(zl10353_attach, &em28xx_zl10353_no_i2c_gate_dev, &dev->i2c_adap[dev->def_i2c_bus]); if (dvb->fe[0]) { dvb_attach(mt2060_attach, dvb->fe[0], &dev->i2c_adap[dev->def_i2c_bus], &em28xx_mt2060_config, 1220); } break; case EM2870_BOARD_KWORLD_355U: dvb->fe[0] = dvb_attach(zl10353_attach, &em28xx_zl10353_no_i2c_gate_dev, &dev->i2c_adap[dev->def_i2c_bus]); if (dvb->fe[0]) dvb_attach(qt1010_attach, dvb->fe[0], &dev->i2c_adap[dev->def_i2c_bus], &em28xx_qt1010_config); break; case EM2883_BOARD_KWORLD_HYBRID_330U: case EM2882_BOARD_EVGA_INDTUBE: dvb->fe[0] = dvb_attach(s5h1409_attach, &em28xx_s5h1409_with_xc3028, &dev->i2c_adap[dev->def_i2c_bus]); if (em28xx_attach_xc3028(0x61, dev) < 0) { result = -EINVAL; goto out_free; } break; case EM2882_BOARD_KWORLD_ATSC_315U: dvb->fe[0] = dvb_attach(lgdt330x_attach, &em2880_lgdt3303_dev, 0x0e, &dev->i2c_adap[dev->def_i2c_bus]); if (dvb->fe[0]) { if (!dvb_attach(simple_tuner_attach, dvb->fe[0], &dev->i2c_adap[dev->def_i2c_bus], 0x61, TUNER_THOMSON_DTT761X)) { result = -EINVAL; goto out_free; } } break; case EM2880_BOARD_HAUPPAUGE_WINTV_HVR_900_R2: case EM2882_BOARD_PINNACLE_HYBRID_PRO_330E: dvb->fe[0] = dvb_attach(drxd_attach, &em28xx_drxd, NULL, &dev->i2c_adap[dev->def_i2c_bus], &dev->intf->dev); if (em28xx_attach_xc3028(0x61, dev) < 0) { result = -EINVAL; goto out_free; } break; case EM2870_BOARD_REDDO_DVB_C_USB_BOX: /* Philips CU1216L NIM (Philips TDA10023 + Infineon TUA6034) */ dvb->fe[0] = dvb_attach(tda10023_attach, &em28xx_tda10023_config, &dev->i2c_adap[dev->def_i2c_bus], 0x48); if (dvb->fe[0]) { if (!dvb_attach(simple_tuner_attach, dvb->fe[0], &dev->i2c_adap[dev->def_i2c_bus], 0x60, TUNER_PHILIPS_CU1216L)) { result = -EINVAL; goto out_free; } } break; case EM2870_BOARD_KWORLD_A340: dvb->fe[0] = dvb_attach(lgdt3305_attach, &em2870_lgdt3304_dev, &dev->i2c_adap[dev->def_i2c_bus]); if (!dvb->fe[0]) { result = -EINVAL; goto out_free; } if (!dvb_attach(tda18271_attach, dvb->fe[0], 0x60, &dev->i2c_adap[dev->def_i2c_bus], &kworld_a340_config)) { dvb_frontend_detach(dvb->fe[0]); result = -EINVAL; goto out_free; } break; case EM28174_BOARD_PCTV_290E: /* set default GPIO0 for LNA, used if GPIOLIB is undefined */ dvb->lna_gpio = CXD2820R_GPIO_E | CXD2820R_GPIO_O | CXD2820R_GPIO_L; dvb->fe[0] = dvb_attach(cxd2820r_attach, &em28xx_cxd2820r_config, &dev->i2c_adap[dev->def_i2c_bus], &dvb->lna_gpio); if (dvb->fe[0]) { /* FE 0 attach tuner */ if (!dvb_attach(tda18271_attach, dvb->fe[0], 0x60, &dev->i2c_adap[dev->def_i2c_bus], &em28xx_cxd2820r_tda18271_config)) { dvb_frontend_detach(dvb->fe[0]); result = -EINVAL; goto out_free; } #ifdef CONFIG_GPIOLIB /* enable LNA for DVB-T, DVB-T2 and DVB-C */ result = gpio_request_one(dvb->lna_gpio, GPIOF_OUT_INIT_LOW, NULL); if (result) dev_err(&dev->intf->dev, "gpio request failed %d\n", result); else gpio_free(dvb->lna_gpio); result = 0; /* continue even set LNA fails */ #endif dvb->fe[0]->ops.set_lna = em28xx_pctv_290e_set_lna; } break; case EM2884_BOARD_HAUPPAUGE_WINTV_HVR_930C: { struct xc5000_config cfg = {}; hauppauge_hvr930c_init(dev); dvb->fe[0] = dvb_attach(drxk_attach, &hauppauge_930c_drxk, &dev->i2c_adap[dev->def_i2c_bus]); if (!dvb->fe[0]) { result = -EINVAL; goto out_free; } /* FIXME: do we need a pll semaphore? */ dvb->fe[0]->sec_priv = dvb; sema_init(&dvb->pll_mutex, 1); dvb->gate_ctrl = dvb->fe[0]->ops.i2c_gate_ctrl; dvb->fe[0]->ops.i2c_gate_ctrl = drxk_gate_ctrl; /* Attach xc5000 */ cfg.i2c_address = 0x61; cfg.if_khz = 4000; if (dvb->fe[0]->ops.i2c_gate_ctrl) dvb->fe[0]->ops.i2c_gate_ctrl(dvb->fe[0], 1); if (!dvb_attach(xc5000_attach, dvb->fe[0], &dev->i2c_adap[dev->def_i2c_bus], &cfg)) { result = -EINVAL; goto out_free; } if (dvb->fe[0]->ops.i2c_gate_ctrl) dvb->fe[0]->ops.i2c_gate_ctrl(dvb->fe[0], 0); break; } case EM2884_BOARD_TERRATEC_H5: terratec_h5_init(dev); dvb->fe[0] = dvb_attach(drxk_attach, &terratec_h5_drxk, &dev->i2c_adap[dev->def_i2c_bus]); if (!dvb->fe[0]) { result = -EINVAL; goto out_free; } /* FIXME: do we need a pll semaphore? */ dvb->fe[0]->sec_priv = dvb; sema_init(&dvb->pll_mutex, 1); dvb->gate_ctrl = dvb->fe[0]->ops.i2c_gate_ctrl; dvb->fe[0]->ops.i2c_gate_ctrl = drxk_gate_ctrl; /* Attach tda18271 to DVB-C frontend */ if (dvb->fe[0]->ops.i2c_gate_ctrl) dvb->fe[0]->ops.i2c_gate_ctrl(dvb->fe[0], 1); if (!dvb_attach(tda18271c2dd_attach, dvb->fe[0], &dev->i2c_adap[dev->def_i2c_bus], 0x60)) { result = -EINVAL; goto out_free; } if (dvb->fe[0]->ops.i2c_gate_ctrl) dvb->fe[0]->ops.i2c_gate_ctrl(dvb->fe[0], 0); break; case EM2884_BOARD_C3TECH_DIGITAL_DUO: dvb->fe[0] = dvb_attach(mb86a20s_attach, &c3tech_duo_mb86a20s_config, &dev->i2c_adap[dev->def_i2c_bus]); if (dvb->fe[0]) dvb_attach(tda18271_attach, dvb->fe[0], 0x60, &dev->i2c_adap[dev->def_i2c_bus], &c3tech_duo_tda18271_config); break; case EM28174_BOARD_PCTV_460E: result = em28174_dvb_init_pctv_460e(dev); if (result) goto out_free; break; case EM2874_BOARD_DELOCK_61959: case EM2874_BOARD_MAXMEDIA_UB425_TC: /* attach demodulator */ dvb->fe[0] = dvb_attach(drxk_attach, &maxmedia_ub425_tc_drxk, &dev->i2c_adap[dev->def_i2c_bus]); if (dvb->fe[0]) { /* disable I2C-gate */ dvb->fe[0]->ops.i2c_gate_ctrl = NULL; /* attach tuner */ if (!dvb_attach(tda18271_attach, dvb->fe[0], 0x60, &dev->i2c_adap[dev->def_i2c_bus], &em28xx_cxd2820r_tda18271_config)) { dvb_frontend_detach(dvb->fe[0]); result = -EINVAL; goto out_free; } } break; case EM2884_BOARD_PCTV_510E: case EM2884_BOARD_PCTV_520E: pctv_520e_init(dev); /* attach demodulator */ dvb->fe[0] = dvb_attach(drxk_attach, &pctv_520e_drxk, &dev->i2c_adap[dev->def_i2c_bus]); if (dvb->fe[0]) { /* attach tuner */ if (!dvb_attach(tda18271_attach, dvb->fe[0], 0x60, &dev->i2c_adap[dev->def_i2c_bus], &em28xx_cxd2820r_tda18271_config)) { dvb_frontend_detach(dvb->fe[0]); result = -EINVAL; goto out_free; } } break; case EM2884_BOARD_ELGATO_EYETV_HYBRID_2008: case EM2884_BOARD_CINERGY_HTC_STICK: case EM2884_BOARD_TERRATEC_H6: terratec_htc_stick_init(dev); /* attach demodulator */ dvb->fe[0] = dvb_attach(drxk_attach, &terratec_htc_stick_drxk, &dev->i2c_adap[dev->def_i2c_bus]); if (!dvb->fe[0]) { result = -EINVAL; goto out_free; } /* Attach the demodulator. */ if (!dvb_attach(tda18271_attach, dvb->fe[0], 0x60, &dev->i2c_adap[dev->def_i2c_bus], &em28xx_cxd2820r_tda18271_config)) { result = -EINVAL; goto out_free; } break; case EM2884_BOARD_TERRATEC_HTC_USB_XS: terratec_htc_usb_xs_init(dev); /* attach demodulator */ dvb->fe[0] = dvb_attach(drxk_attach, &terratec_htc_stick_drxk, &dev->i2c_adap[dev->def_i2c_bus]); if (!dvb->fe[0]) { result = -EINVAL; goto out_free; } /* Attach the demodulator. */ if (!dvb_attach(tda18271_attach, dvb->fe[0], 0x60, &dev->i2c_adap[dev->def_i2c_bus], &em28xx_cxd2820r_tda18271_config)) { result = -EINVAL; goto out_free; } break; case EM2874_BOARD_KWORLD_UB435Q_V2: dvb->fe[0] = dvb_attach(lgdt3305_attach, &em2874_lgdt3305_dev, &dev->i2c_adap[dev->def_i2c_bus]); if (!dvb->fe[0]) { result = -EINVAL; goto out_free; } /* Attach the demodulator. */ if (!dvb_attach(tda18271_attach, dvb->fe[0], 0x60, &dev->i2c_adap[dev->def_i2c_bus], &kworld_ub435q_v2_config)) { result = -EINVAL; goto out_free; } break; case EM2874_BOARD_KWORLD_UB435Q_V3: { struct i2c_adapter *adapter = &dev->i2c_adap[dev->def_i2c_bus]; dvb->fe[0] = dvb_attach(lgdt3305_attach, &em2874_lgdt3305_nogate_dev, &dev->i2c_adap[dev->def_i2c_bus]); if (!dvb->fe[0]) { result = -EINVAL; goto out_free; } /* attach tuner */ kworld_ub435q_v3_config.fe = dvb->fe[0]; dvb->i2c_client_tuner = dvb_module_probe("tda18212", NULL, adapter, 0x60, &kworld_ub435q_v3_config); if (!dvb->i2c_client_tuner) { dvb_frontend_detach(dvb->fe[0]); result = -ENODEV; goto out_free; } break; } case EM2874_BOARD_PCTV_HD_MINI_80E: dvb->fe[0] = dvb_attach(drx39xxj_attach, &dev->i2c_adap[dev->def_i2c_bus]); if (dvb->fe[0]) { dvb->fe[0] = dvb_attach(tda18271_attach, dvb->fe[0], 0x60, &dev->i2c_adap[dev->def_i2c_bus], &pinnacle_80e_dvb_config); if (!dvb->fe[0]) { result = -EINVAL; goto out_free; } } break; case EM28178_BOARD_PCTV_461E: result = em28178_dvb_init_pctv_461e(dev); if (result) goto out_free; break; case EM28178_BOARD_PCTV_461E_V2: result = em28178_dvb_init_pctv_461e_v2(dev); if (result) goto out_free; break; case EM28178_BOARD_PCTV_292E: result = em28178_dvb_init_pctv_292e(dev); if (result) goto out_free; break; case EM28178_BOARD_TERRATEC_T2_STICK_HD: result = em28178_dvb_init_terratec_t2_stick_hd(dev); if (result) goto out_free; break; case EM28178_BOARD_PLEX_PX_BCUD: result = em28178_dvb_init_plex_px_bcud(dev); if (result) goto out_free; break; case EM28174_BOARD_HAUPPAUGE_WINTV_DUALHD_DVB: result = em28174_dvb_init_hauppauge_wintv_dualhd_dvb(dev); if (result) goto out_free; break; case EM28174_BOARD_HAUPPAUGE_WINTV_DUALHD_01595: result = em28174_dvb_init_hauppauge_wintv_dualhd_01595(dev); if (result) goto out_free; break; case EM2874_BOARD_HAUPPAUGE_USB_QUADHD: result = em2874_dvb_init_hauppauge_usb_quadhd(dev); if (result) goto out_free; break; default: dev_err(&dev->intf->dev, "The frontend of your DVB/ATSC card isn't supported yet\n"); break; } if (!dvb->fe[0]) { dev_err(&dev->intf->dev, "frontend initialization failed\n"); result = -EINVAL; goto out_free; } /* define general-purpose callback pointer */ dvb->fe[0]->callback = em28xx_tuner_callback; if (dvb->fe[1]) dvb->fe[1]->callback = em28xx_tuner_callback; /* register everything */ result = em28xx_register_dvb(dvb, THIS_MODULE, dev, &dev->intf->dev); if (result < 0) goto out_free; if (dev->dvb_xfer_bulk) { dvb_alt = 0; } else { /* isoc */ dvb_alt = dev->dvb_alt_isoc; } udev = interface_to_usbdev(dev->intf); usb_set_interface(udev, dev->ifnum, dvb_alt); dev_info(&dev->intf->dev, "DVB extension successfully initialized\n"); kref_get(&dev->ref); ret: em28xx_set_mode(dev, EM28XX_SUSPEND); mutex_unlock(&dev->lock); return result; out_free: em28xx_uninit_usb_xfer(dev, EM28XX_DIGITAL_MODE); kfree(dvb); dev->dvb = NULL; goto ret; } static inline void prevent_sleep(struct dvb_frontend_ops *ops) { ops->set_voltage = NULL; ops->sleep = NULL; ops->tuner_ops.sleep = NULL; } static int em28xx_dvb_fini(struct em28xx *dev) { struct em28xx_dvb *dvb; if (dev->is_audio_only) { /* Shouldn't initialize IR for this interface */ return 0; } if (!dev->board.has_dvb) { /* This device does not support the extension */ return 0; } if (!dev->dvb) return 0; dev_info(&dev->intf->dev, "Closing DVB extension\n"); dvb = dev->dvb; em28xx_uninit_usb_xfer(dev, EM28XX_DIGITAL_MODE); if (dev->disconnected) { /* * We cannot tell the device to sleep * once it has been unplugged. */ if (dvb->fe[0]) { prevent_sleep(&dvb->fe[0]->ops); dvb->fe[0]->exit = DVB_FE_DEVICE_REMOVED; } if (dvb->fe[1]) { prevent_sleep(&dvb->fe[1]->ops); dvb->fe[1]->exit = DVB_FE_DEVICE_REMOVED; } } em28xx_unregister_dvb(dvb); /* release I2C module bindings */ dvb_module_release(dvb->i2c_client_sec); dvb_module_release(dvb->i2c_client_tuner); dvb_module_release(dvb->i2c_client_demod); kfree(dvb); dev->dvb = NULL; kref_put(&dev->ref, em28xx_free_device); return 0; } static int em28xx_dvb_suspend(struct em28xx *dev) { int ret = 0; if (dev->is_audio_only) return 0; if (!dev->board.has_dvb) return 0; dev_info(&dev->intf->dev, "Suspending DVB extension\n"); if (dev->dvb) { struct em28xx_dvb *dvb = dev->dvb; if (dvb->fe[0]) { ret = dvb_frontend_suspend(dvb->fe[0]); dev_info(&dev->intf->dev, "fe0 suspend %d\n", ret); } if (dvb->fe[1]) { dvb_frontend_suspend(dvb->fe[1]); dev_info(&dev->intf->dev, "fe1 suspend %d\n", ret); } } return 0; } static int em28xx_dvb_resume(struct em28xx *dev) { int ret = 0; if (dev->is_audio_only) return 0; if (!dev->board.has_dvb) return 0; dev_info(&dev->intf->dev, "Resuming DVB extension\n"); if (dev->dvb) { struct em28xx_dvb *dvb = dev->dvb; if (dvb->fe[0]) { ret = dvb_frontend_resume(dvb->fe[0]); dev_info(&dev->intf->dev, "fe0 resume %d\n", ret); } if (dvb->fe[1]) { ret = dvb_frontend_resume(dvb->fe[1]); dev_info(&dev->intf->dev, "fe1 resume %d\n", ret); } } return 0; } static struct em28xx_ops dvb_ops = { .id = EM28XX_DVB, .name = "Em28xx dvb Extension", .init = em28xx_dvb_init, .fini = em28xx_dvb_fini, .suspend = em28xx_dvb_suspend, .resume = em28xx_dvb_resume, }; static int __init em28xx_dvb_register(void) { return em28xx_register_extension(&dvb_ops); } static void __exit em28xx_dvb_unregister(void) { em28xx_unregister_extension(&dvb_ops); } module_init(em28xx_dvb_register); module_exit(em28xx_dvb_unregister); |
10 6 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | // SPDX-License-Identifier: GPL-2.0-or-later /* Null security operations. * * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <net/af_rxrpc.h> #include "ar-internal.h" static int none_init_connection_security(struct rxrpc_connection *conn, struct rxrpc_key_token *token) { return 0; } /* * Allocate an appropriately sized buffer for the amount of data remaining. */ static struct rxrpc_txbuf *none_alloc_txbuf(struct rxrpc_call *call, size_t remain, gfp_t gfp) { return rxrpc_alloc_data_txbuf(call, umin(remain, RXRPC_JUMBO_DATALEN), 1, gfp); } static int none_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) { txb->pkt_len = txb->len; if (txb->len == RXRPC_JUMBO_DATALEN) txb->jumboable = true; return 0; } static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); sp->flags |= RXRPC_RX_VERIFIED; return 0; } static void none_free_call_crypto(struct rxrpc_call *call) { } static int none_respond_to_challenge(struct rxrpc_connection *conn, struct sk_buff *skb) { return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, rxrpc_eproto_rxnull_challenge); } static int none_verify_response(struct rxrpc_connection *conn, struct sk_buff *skb) { return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, rxrpc_eproto_rxnull_response); } static void none_clear(struct rxrpc_connection *conn) { } static int none_init(void) { return 0; } static void none_exit(void) { } /* * RxRPC Kerberos-based security */ const struct rxrpc_security rxrpc_no_security = { .name = "none", .security_index = RXRPC_SECURITY_NONE, .init = none_init, .exit = none_exit, .init_connection_security = none_init_connection_security, .free_call_crypto = none_free_call_crypto, .alloc_txbuf = none_alloc_txbuf, .secure_packet = none_secure_packet, .verify_packet = none_verify_packet, .respond_to_challenge = none_respond_to_challenge, .verify_response = none_verify_response, .clear = none_clear, }; |
21 12 21 21 21 21 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner */ #include "hash.h" #include "main.h" #include <linux/gfp.h> #include <linux/lockdep.h> #include <linux/slab.h> /* clears the hash */ static void batadv_hash_init(struct batadv_hashtable *hash) { u32 i; for (i = 0; i < hash->size; i++) { INIT_HLIST_HEAD(&hash->table[i]); spin_lock_init(&hash->list_locks[i]); } atomic_set(&hash->generation, 0); } /** * batadv_hash_destroy() - Free only the hashtable and the hash itself * @hash: hash object to destroy */ void batadv_hash_destroy(struct batadv_hashtable *hash) { kfree(hash->list_locks); kfree(hash->table); kfree(hash); } /** * batadv_hash_new() - Allocates and clears the hashtable * @size: number of hash buckets to allocate * * Return: newly allocated hashtable, NULL on errors */ struct batadv_hashtable *batadv_hash_new(u32 size) { struct batadv_hashtable *hash; hash = kmalloc(sizeof(*hash), GFP_ATOMIC); if (!hash) return NULL; hash->table = kmalloc_array(size, sizeof(*hash->table), GFP_ATOMIC); if (!hash->table) goto free_hash; hash->list_locks = kmalloc_array(size, sizeof(*hash->list_locks), GFP_ATOMIC); if (!hash->list_locks) goto free_table; hash->size = size; batadv_hash_init(hash); return hash; free_table: kfree(hash->table); free_hash: kfree(hash); return NULL; } /** * batadv_hash_set_lock_class() - Set specific lockdep class for hash spinlocks * @hash: hash object to modify * @key: lockdep class key address */ void batadv_hash_set_lock_class(struct batadv_hashtable *hash, struct lock_class_key *key) { u32 i; for (i = 0; i < hash->size; i++) lockdep_set_class(&hash->list_locks[i], key); } |
25 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_TTY_BUFFER_H #define _LINUX_TTY_BUFFER_H #include <linux/atomic.h> #include <linux/llist.h> #include <linux/mutex.h> #include <linux/workqueue.h> struct tty_buffer { union { struct tty_buffer *next; struct llist_node free; }; unsigned int used; unsigned int size; unsigned int commit; unsigned int lookahead; /* Lazy update on recv, can become less than "read" */ unsigned int read; bool flags; /* Data points here */ u8 data[] __aligned(sizeof(unsigned long)); }; static inline u8 *char_buf_ptr(struct tty_buffer *b, unsigned int ofs) { return b->data + ofs; } static inline u8 *flag_buf_ptr(struct tty_buffer *b, unsigned int ofs) { return char_buf_ptr(b, ofs) + b->size; } struct tty_bufhead { struct tty_buffer *head; /* Queue head */ struct work_struct work; struct mutex lock; atomic_t priority; struct tty_buffer sentinel; struct llist_head free; /* Free queue head */ atomic_t mem_used; /* In-use buffers excluding free list */ int mem_limit; struct tty_buffer *tail; /* Active buffer */ }; /* * When a break, frame error, or parity error happens, these codes are * stuffed into the flags buffer. */ #define TTY_NORMAL 0 #define TTY_BREAK 1 #define TTY_FRAME 2 #define TTY_PARITY 3 #define TTY_OVERRUN 4 #endif |
28 9 37 36 9 6 2 2 9 37 9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 | /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/usb.h> #include <linux/usb/hcd.h> #include "usb.h" static int uas_is_interface(struct usb_host_interface *intf) { return (intf->desc.bInterfaceClass == USB_CLASS_MASS_STORAGE && intf->desc.bInterfaceSubClass == USB_SC_SCSI && intf->desc.bInterfaceProtocol == USB_PR_UAS); } static struct usb_host_interface *uas_find_uas_alt_setting( struct usb_interface *intf) { int i; for (i = 0; i < intf->num_altsetting; i++) { struct usb_host_interface *alt = &intf->altsetting[i]; if (uas_is_interface(alt)) return alt; } return NULL; } static int uas_find_endpoints(struct usb_host_interface *alt, struct usb_host_endpoint *eps[]) { struct usb_host_endpoint *endpoint = alt->endpoint; unsigned i, n_endpoints = alt->desc.bNumEndpoints; for (i = 0; i < n_endpoints; i++) { unsigned char *extra = endpoint[i].extra; int len = endpoint[i].extralen; while (len >= 3) { if (extra[1] == USB_DT_PIPE_USAGE) { unsigned pipe_id = extra[2]; if (pipe_id > 0 && pipe_id < 5) eps[pipe_id - 1] = &endpoint[i]; break; } len -= extra[0]; extra += extra[0]; } } if (!eps[0] || !eps[1] || !eps[2] || !eps[3]) return -ENODEV; return 0; } static int uas_use_uas_driver(struct usb_interface *intf, const struct usb_device_id *id, u64 *flags_ret) { struct usb_host_endpoint *eps[4] = { }; struct usb_device *udev = interface_to_usbdev(intf); struct usb_hcd *hcd = bus_to_hcd(udev->bus); u64 flags = id->driver_info; struct usb_host_interface *alt; int r; alt = uas_find_uas_alt_setting(intf); if (!alt) return 0; r = uas_find_endpoints(alt, eps); if (r < 0) return 0; /* * ASMedia has a number of usb3 to sata bridge chips, at the time of * this writing the following versions exist: * ASM1051 - no uas support version * ASM1051 - with broken (*) uas support * ASM1053 - with working uas support, but problems with large xfers * ASM1153 - with working uas support * * Devices with these chips re-use a number of device-ids over the * entire line, so the device-id is useless to determine if we're * dealing with an ASM1051 (which we want to avoid). * * The ASM1153 can be identified by config.MaxPower == 0, * where as the ASM105x models have config.MaxPower == 36. * * Differentiating between the ASM1053 and ASM1051 is trickier, when * connected over USB-3 we can look at the number of streams supported, * ASM1051 supports 32 streams, where as early ASM1053 versions support * 16 streams, newer ASM1053-s also support 32 streams, but have a * different prod-id. * * (*) ASM1051 chips do work with UAS with some disks (with the * US_FL_NO_REPORT_OPCODES quirk), but are broken with other disks */ if (le16_to_cpu(udev->descriptor.idVendor) == 0x174c && (le16_to_cpu(udev->descriptor.idProduct) == 0x5106 || le16_to_cpu(udev->descriptor.idProduct) == 0x55aa)) { if (udev->actconfig->desc.bMaxPower == 0) { /* ASM1153, do nothing */ } else if (udev->speed < USB_SPEED_SUPER) { /* No streams info, assume ASM1051 */ flags |= US_FL_IGNORE_UAS; } else if (usb_ss_max_streams(&eps[1]->ss_ep_comp) == 32) { /* Possibly an ASM1051, disable uas */ flags |= US_FL_IGNORE_UAS; } else { /* ASM1053, these have issues with large transfers */ flags |= US_FL_MAX_SECTORS_240; } } /* All Seagate disk enclosures have broken ATA pass-through support */ if (le16_to_cpu(udev->descriptor.idVendor) == 0x0bc2) flags |= US_FL_NO_ATA_1X; /* * RTL9210-based enclosure from HIKSEMI, MD202 reportedly have issues * with UAS. This isn't distinguishable with just idVendor and * idProduct, use manufacturer and product too. * * Reported-by: Hongling Zeng <zenghongling@kylinos.cn> */ if (le16_to_cpu(udev->descriptor.idVendor) == 0x0bda && le16_to_cpu(udev->descriptor.idProduct) == 0x9210 && (udev->manufacturer && !strcmp(udev->manufacturer, "HIKSEMI")) && (udev->product && !strcmp(udev->product, "MD202"))) flags |= US_FL_IGNORE_UAS; usb_stor_adjust_quirks(udev, &flags); if (flags & US_FL_IGNORE_UAS) { dev_warn(&udev->dev, "UAS is ignored for this device, using usb-storage instead\n"); return 0; } if (udev->bus->sg_tablesize == 0) { dev_warn(&udev->dev, "The driver for the USB controller %s does not support scatter-gather which is\n", hcd->driver->description); dev_warn(&udev->dev, "required by the UAS driver. Please try an other USB controller if you wish to use UAS.\n"); return 0; } if (udev->speed >= USB_SPEED_SUPER && !hcd->can_do_streams) { dev_warn(&udev->dev, "USB controller %s does not support streams, which are required by the UAS driver.\n", hcd_to_bus(hcd)->bus_name); dev_warn(&udev->dev, "Please try an other USB controller if you wish to use UAS.\n"); return 0; } if (flags_ret) *flags_ret = flags; return 1; } |
1 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Stadia controller rumble support. * * Copyright 2023 Google LLC */ #include <linux/hid.h> #include <linux/input.h> #include <linux/slab.h> #include <linux/module.h> #include "hid-ids.h" #define STADIA_FF_REPORT_ID 5 struct stadiaff_device { struct hid_device *hid; struct hid_report *report; spinlock_t lock; bool removed; uint16_t strong_magnitude; uint16_t weak_magnitude; struct work_struct work; }; static void stadiaff_work(struct work_struct *work) { struct stadiaff_device *stadiaff = container_of(work, struct stadiaff_device, work); struct hid_field *rumble_field = stadiaff->report->field[0]; unsigned long flags; spin_lock_irqsave(&stadiaff->lock, flags); rumble_field->value[0] = stadiaff->strong_magnitude; rumble_field->value[1] = stadiaff->weak_magnitude; spin_unlock_irqrestore(&stadiaff->lock, flags); hid_hw_request(stadiaff->hid, stadiaff->report, HID_REQ_SET_REPORT); } static int stadiaff_play(struct input_dev *dev, void *data, struct ff_effect *effect) { struct hid_device *hid = input_get_drvdata(dev); struct stadiaff_device *stadiaff = hid_get_drvdata(hid); unsigned long flags; spin_lock_irqsave(&stadiaff->lock, flags); if (!stadiaff->removed) { stadiaff->strong_magnitude = effect->u.rumble.strong_magnitude; stadiaff->weak_magnitude = effect->u.rumble.weak_magnitude; schedule_work(&stadiaff->work); } spin_unlock_irqrestore(&stadiaff->lock, flags); return 0; } static int stadiaff_init(struct hid_device *hid) { struct stadiaff_device *stadiaff; struct hid_report *report; struct hid_input *hidinput; struct input_dev *dev; int error; if (list_empty(&hid->inputs)) { hid_err(hid, "no inputs found\n"); return -ENODEV; } hidinput = list_entry(hid->inputs.next, struct hid_input, list); dev = hidinput->input; report = hid_validate_values(hid, HID_OUTPUT_REPORT, STADIA_FF_REPORT_ID, 0, 2); if (!report) return -ENODEV; stadiaff = devm_kzalloc(&hid->dev, sizeof(struct stadiaff_device), GFP_KERNEL); if (!stadiaff) return -ENOMEM; hid_set_drvdata(hid, stadiaff); input_set_capability(dev, EV_FF, FF_RUMBLE); error = input_ff_create_memless(dev, NULL, stadiaff_play); if (error) return error; stadiaff->removed = false; stadiaff->hid = hid; stadiaff->report = report; INIT_WORK(&stadiaff->work, stadiaff_work); spin_lock_init(&stadiaff->lock); hid_info(hid, "Force Feedback for Google Stadia controller\n"); return 0; } static int stadia_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); return ret; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF); if (ret) { hid_err(hdev, "hw start failed\n"); return ret; } ret = stadiaff_init(hdev); if (ret) { hid_err(hdev, "force feedback init failed\n"); hid_hw_stop(hdev); return ret; } return 0; } static void stadia_remove(struct hid_device *hid) { struct stadiaff_device *stadiaff = hid_get_drvdata(hid); unsigned long flags; spin_lock_irqsave(&stadiaff->lock, flags); stadiaff->removed = true; spin_unlock_irqrestore(&stadiaff->lock, flags); cancel_work_sync(&stadiaff->work); hid_hw_stop(hid); } static const struct hid_device_id stadia_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_STADIA) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_STADIA) }, { } }; MODULE_DEVICE_TABLE(hid, stadia_devices); static struct hid_driver stadia_driver = { .name = "stadia", .id_table = stadia_devices, .probe = stadia_probe, .remove = stadia_remove, }; module_hid_driver(stadia_driver); MODULE_DESCRIPTION("Google Stadia controller rumble support."); MODULE_LICENSE("GPL"); |
3 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 | // SPDX-License-Identifier: GPL-2.0-only /* * (C) 2007 Patrick McHardy <kaber@trash.net> */ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/gen_stats.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_rateest.h> #include <net/netfilter/xt_rateest.h> static bool xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_rateest_match_info *info = par->matchinfo; struct gnet_stats_rate_est64 sample = {0}; u_int32_t bps1, bps2, pps1, pps2; bool ret = true; gen_estimator_read(&info->est1->rate_est, &sample); if (info->flags & XT_RATEEST_MATCH_DELTA) { bps1 = info->bps1 >= sample.bps ? info->bps1 - sample.bps : 0; pps1 = info->pps1 >= sample.pps ? info->pps1 - sample.pps : 0; } else { bps1 = sample.bps; pps1 = sample.pps; } if (info->flags & XT_RATEEST_MATCH_ABS) { bps2 = info->bps2; pps2 = info->pps2; } else { gen_estimator_read(&info->est2->rate_est, &sample); if (info->flags & XT_RATEEST_MATCH_DELTA) { bps2 = info->bps2 >= sample.bps ? info->bps2 - sample.bps : 0; pps2 = info->pps2 >= sample.pps ? info->pps2 - sample.pps : 0; } else { bps2 = sample.bps; pps2 = sample.pps; } } switch (info->mode) { case XT_RATEEST_MATCH_LT: if (info->flags & XT_RATEEST_MATCH_BPS) ret &= bps1 < bps2; if (info->flags & XT_RATEEST_MATCH_PPS) ret &= pps1 < pps2; break; case XT_RATEEST_MATCH_GT: if (info->flags & XT_RATEEST_MATCH_BPS) ret &= bps1 > bps2; if (info->flags & XT_RATEEST_MATCH_PPS) ret &= pps1 > pps2; break; case XT_RATEEST_MATCH_EQ: if (info->flags & XT_RATEEST_MATCH_BPS) ret &= bps1 == bps2; if (info->flags & XT_RATEEST_MATCH_PPS) ret &= pps1 == pps2; break; } ret ^= info->flags & XT_RATEEST_MATCH_INVERT ? true : false; return ret; } static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) { struct xt_rateest_match_info *info = par->matchinfo; struct xt_rateest *est1, *est2; int ret = -EINVAL; if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS | XT_RATEEST_MATCH_REL)) != 1) goto err1; if (!(info->flags & (XT_RATEEST_MATCH_BPS | XT_RATEEST_MATCH_PPS))) goto err1; switch (info->mode) { case XT_RATEEST_MATCH_EQ: case XT_RATEEST_MATCH_LT: case XT_RATEEST_MATCH_GT: break; default: goto err1; } ret = -ENOENT; est1 = xt_rateest_lookup(par->net, info->name1); if (!est1) goto err1; est2 = NULL; if (info->flags & XT_RATEEST_MATCH_REL) { est2 = xt_rateest_lookup(par->net, info->name2); if (!est2) goto err2; } info->est1 = est1; info->est2 = est2; return 0; err2: xt_rateest_put(par->net, est1); err1: return ret; } static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par) { struct xt_rateest_match_info *info = par->matchinfo; xt_rateest_put(par->net, info->est1); if (info->est2) xt_rateest_put(par->net, info->est2); } static struct xt_match xt_rateest_mt_reg __read_mostly = { .name = "rateest", .revision = 0, .family = NFPROTO_UNSPEC, .match = xt_rateest_mt, .checkentry = xt_rateest_mt_checkentry, .destroy = xt_rateest_mt_destroy, .matchsize = sizeof(struct xt_rateest_match_info), .usersize = offsetof(struct xt_rateest_match_info, est1), .me = THIS_MODULE, }; static int __init xt_rateest_mt_init(void) { return xt_register_match(&xt_rateest_mt_reg); } static void __exit xt_rateest_mt_fini(void) { xt_unregister_match(&xt_rateest_mt_reg); } MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("xtables rate estimator match"); MODULE_ALIAS("ipt_rateest"); MODULE_ALIAS("ip6t_rateest"); module_init(xt_rateest_mt_init); module_exit(xt_rateest_mt_fini); |
60 2 19 39 3 2 2 55 1 1 1 3 2 1 1 1 1 1 1 1 1 1 1 1 1 3 12 4 2 18 2 15 4 33 11 32 17 35 10 22 3 28 4 29 6 32 11 26 3 11 3 7 34 10 13 28 3 3 23 20 3 13 8 29 33 11 57 5 33 21 53 17 17 17 17 45 7 40 13 12 41 12 24 10 6 8 17 6 26 19 5 6 34 4 4 37 16 54 26 31 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */ #include "netlink.h" #include "device.h" #include "peer.h" #include "socket.h" #include "queueing.h" #include "messages.h" #include <uapi/linux/wireguard.h> #include <linux/if.h> #include <net/genetlink.h> #include <net/sock.h> #include <crypto/utils.h> static struct genl_family genl_family; static const struct nla_policy device_policy[WGDEVICE_A_MAX + 1] = { [WGDEVICE_A_IFINDEX] = { .type = NLA_U32 }, [WGDEVICE_A_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 }, [WGDEVICE_A_PRIVATE_KEY] = NLA_POLICY_EXACT_LEN(NOISE_PUBLIC_KEY_LEN), [WGDEVICE_A_PUBLIC_KEY] = NLA_POLICY_EXACT_LEN(NOISE_PUBLIC_KEY_LEN), [WGDEVICE_A_FLAGS] = { .type = NLA_U32 }, [WGDEVICE_A_LISTEN_PORT] = { .type = NLA_U16 }, [WGDEVICE_A_FWMARK] = { .type = NLA_U32 }, [WGDEVICE_A_PEERS] = { .type = NLA_NESTED } }; static const struct nla_policy peer_policy[WGPEER_A_MAX + 1] = { [WGPEER_A_PUBLIC_KEY] = NLA_POLICY_EXACT_LEN(NOISE_PUBLIC_KEY_LEN), [WGPEER_A_PRESHARED_KEY] = NLA_POLICY_EXACT_LEN(NOISE_SYMMETRIC_KEY_LEN), [WGPEER_A_FLAGS] = { .type = NLA_U32 }, [WGPEER_A_ENDPOINT] = NLA_POLICY_MIN_LEN(sizeof(struct sockaddr)), [WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL] = { .type = NLA_U16 }, [WGPEER_A_LAST_HANDSHAKE_TIME] = NLA_POLICY_EXACT_LEN(sizeof(struct __kernel_timespec)), [WGPEER_A_RX_BYTES] = { .type = NLA_U64 }, [WGPEER_A_TX_BYTES] = { .type = NLA_U64 }, [WGPEER_A_ALLOWEDIPS] = { .type = NLA_NESTED }, [WGPEER_A_PROTOCOL_VERSION] = { .type = NLA_U32 } }; static const struct nla_policy allowedip_policy[WGALLOWEDIP_A_MAX + 1] = { [WGALLOWEDIP_A_FAMILY] = { .type = NLA_U16 }, [WGALLOWEDIP_A_IPADDR] = NLA_POLICY_MIN_LEN(sizeof(struct in_addr)), [WGALLOWEDIP_A_CIDR_MASK] = { .type = NLA_U8 } }; static struct wg_device *lookup_interface(struct nlattr **attrs, struct sk_buff *skb) { struct net_device *dev = NULL; if (!attrs[WGDEVICE_A_IFINDEX] == !attrs[WGDEVICE_A_IFNAME]) return ERR_PTR(-EBADR); if (attrs[WGDEVICE_A_IFINDEX]) dev = dev_get_by_index(sock_net(skb->sk), nla_get_u32(attrs[WGDEVICE_A_IFINDEX])); else if (attrs[WGDEVICE_A_IFNAME]) dev = dev_get_by_name(sock_net(skb->sk), nla_data(attrs[WGDEVICE_A_IFNAME])); if (!dev) return ERR_PTR(-ENODEV); if (!dev->rtnl_link_ops || !dev->rtnl_link_ops->kind || strcmp(dev->rtnl_link_ops->kind, KBUILD_MODNAME)) { dev_put(dev); return ERR_PTR(-EOPNOTSUPP); } return netdev_priv(dev); } static int get_allowedips(struct sk_buff *skb, const u8 *ip, u8 cidr, int family) { struct nlattr *allowedip_nest; allowedip_nest = nla_nest_start(skb, 0); if (!allowedip_nest) return -EMSGSIZE; if (nla_put_u8(skb, WGALLOWEDIP_A_CIDR_MASK, cidr) || nla_put_u16(skb, WGALLOWEDIP_A_FAMILY, family) || nla_put(skb, WGALLOWEDIP_A_IPADDR, family == AF_INET6 ? sizeof(struct in6_addr) : sizeof(struct in_addr), ip)) { nla_nest_cancel(skb, allowedip_nest); return -EMSGSIZE; } nla_nest_end(skb, allowedip_nest); return 0; } struct dump_ctx { struct wg_device *wg; struct wg_peer *next_peer; u64 allowedips_seq; struct allowedips_node *next_allowedip; }; #define DUMP_CTX(cb) ((struct dump_ctx *)(cb)->args) static int get_peer(struct wg_peer *peer, struct sk_buff *skb, struct dump_ctx *ctx) { struct nlattr *allowedips_nest, *peer_nest = nla_nest_start(skb, 0); struct allowedips_node *allowedips_node = ctx->next_allowedip; bool fail; if (!peer_nest) return -EMSGSIZE; down_read(&peer->handshake.lock); fail = nla_put(skb, WGPEER_A_PUBLIC_KEY, NOISE_PUBLIC_KEY_LEN, peer->handshake.remote_static); up_read(&peer->handshake.lock); if (fail) goto err; if (!allowedips_node) { const struct __kernel_timespec last_handshake = { .tv_sec = peer->walltime_last_handshake.tv_sec, .tv_nsec = peer->walltime_last_handshake.tv_nsec }; down_read(&peer->handshake.lock); fail = nla_put(skb, WGPEER_A_PRESHARED_KEY, NOISE_SYMMETRIC_KEY_LEN, peer->handshake.preshared_key); up_read(&peer->handshake.lock); if (fail) goto err; if (nla_put(skb, WGPEER_A_LAST_HANDSHAKE_TIME, sizeof(last_handshake), &last_handshake) || nla_put_u16(skb, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, peer->persistent_keepalive_interval) || nla_put_u64_64bit(skb, WGPEER_A_TX_BYTES, peer->tx_bytes, WGPEER_A_UNSPEC) || nla_put_u64_64bit(skb, WGPEER_A_RX_BYTES, peer->rx_bytes, WGPEER_A_UNSPEC) || nla_put_u32(skb, WGPEER_A_PROTOCOL_VERSION, 1)) goto err; read_lock_bh(&peer->endpoint_lock); if (peer->endpoint.addr.sa_family == AF_INET) fail = nla_put(skb, WGPEER_A_ENDPOINT, sizeof(peer->endpoint.addr4), &peer->endpoint.addr4); else if (peer->endpoint.addr.sa_family == AF_INET6) fail = nla_put(skb, WGPEER_A_ENDPOINT, sizeof(peer->endpoint.addr6), &peer->endpoint.addr6); read_unlock_bh(&peer->endpoint_lock); if (fail) goto err; allowedips_node = list_first_entry_or_null(&peer->allowedips_list, struct allowedips_node, peer_list); } if (!allowedips_node) goto no_allowedips; if (!ctx->allowedips_seq) ctx->allowedips_seq = ctx->wg->peer_allowedips.seq; else if (ctx->allowedips_seq != ctx->wg->peer_allowedips.seq) goto no_allowedips; allowedips_nest = nla_nest_start(skb, WGPEER_A_ALLOWEDIPS); if (!allowedips_nest) goto err; list_for_each_entry_from(allowedips_node, &peer->allowedips_list, peer_list) { u8 cidr, ip[16] __aligned(__alignof(u64)); int family; family = wg_allowedips_read_node(allowedips_node, ip, &cidr); if (get_allowedips(skb, ip, cidr, family)) { nla_nest_end(skb, allowedips_nest); nla_nest_end(skb, peer_nest); ctx->next_allowedip = allowedips_node; return -EMSGSIZE; } } nla_nest_end(skb, allowedips_nest); no_allowedips: nla_nest_end(skb, peer_nest); ctx->next_allowedip = NULL; ctx->allowedips_seq = 0; return 0; err: nla_nest_cancel(skb, peer_nest); return -EMSGSIZE; } static int wg_get_device_start(struct netlink_callback *cb) { struct wg_device *wg; wg = lookup_interface(genl_info_dump(cb)->attrs, cb->skb); if (IS_ERR(wg)) return PTR_ERR(wg); DUMP_CTX(cb)->wg = wg; return 0; } static int wg_get_device_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct wg_peer *peer, *next_peer_cursor; struct dump_ctx *ctx = DUMP_CTX(cb); struct wg_device *wg = ctx->wg; struct nlattr *peers_nest; int ret = -EMSGSIZE; bool done = true; void *hdr; rtnl_lock(); mutex_lock(&wg->device_update_lock); cb->seq = wg->device_update_gen; next_peer_cursor = ctx->next_peer; hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &genl_family, NLM_F_MULTI, WG_CMD_GET_DEVICE); if (!hdr) goto out; genl_dump_check_consistent(cb, hdr); if (!ctx->next_peer) { if (nla_put_u16(skb, WGDEVICE_A_LISTEN_PORT, wg->incoming_port) || nla_put_u32(skb, WGDEVICE_A_FWMARK, wg->fwmark) || nla_put_u32(skb, WGDEVICE_A_IFINDEX, wg->dev->ifindex) || nla_put_string(skb, WGDEVICE_A_IFNAME, wg->dev->name)) goto out; down_read(&wg->static_identity.lock); if (wg->static_identity.has_identity) { if (nla_put(skb, WGDEVICE_A_PRIVATE_KEY, NOISE_PUBLIC_KEY_LEN, wg->static_identity.static_private) || nla_put(skb, WGDEVICE_A_PUBLIC_KEY, NOISE_PUBLIC_KEY_LEN, wg->static_identity.static_public)) { up_read(&wg->static_identity.lock); goto out; } } up_read(&wg->static_identity.lock); } peers_nest = nla_nest_start(skb, WGDEVICE_A_PEERS); if (!peers_nest) goto out; ret = 0; lockdep_assert_held(&wg->device_update_lock); /* If the last cursor was removed in peer_remove or peer_remove_all, then * we just treat this the same as there being no more peers left. The * reason is that seq_nr should indicate to userspace that this isn't a * coherent dump anyway, so they'll try again. */ if (list_empty(&wg->peer_list) || (ctx->next_peer && ctx->next_peer->is_dead)) { nla_nest_cancel(skb, peers_nest); goto out; } peer = list_prepare_entry(ctx->next_peer, &wg->peer_list, peer_list); list_for_each_entry_continue(peer, &wg->peer_list, peer_list) { if (get_peer(peer, skb, ctx)) { done = false; break; } next_peer_cursor = peer; } nla_nest_end(skb, peers_nest); out: if (!ret && !done && next_peer_cursor) wg_peer_get(next_peer_cursor); wg_peer_put(ctx->next_peer); mutex_unlock(&wg->device_update_lock); rtnl_unlock(); if (ret) { genlmsg_cancel(skb, hdr); return ret; } genlmsg_end(skb, hdr); if (done) { ctx->next_peer = NULL; return 0; } ctx->next_peer = next_peer_cursor; return skb->len; /* At this point, we can't really deal ourselves with safely zeroing out * the private key material after usage. This will need an additional API * in the kernel for marking skbs as zero_on_free. */ } static int wg_get_device_done(struct netlink_callback *cb) { struct dump_ctx *ctx = DUMP_CTX(cb); if (ctx->wg) dev_put(ctx->wg->dev); wg_peer_put(ctx->next_peer); return 0; } static int set_port(struct wg_device *wg, u16 port) { struct wg_peer *peer; if (wg->incoming_port == port) return 0; list_for_each_entry(peer, &wg->peer_list, peer_list) wg_socket_clear_peer_endpoint_src(peer); if (!netif_running(wg->dev)) { wg->incoming_port = port; return 0; } return wg_socket_init(wg, port); } static int set_allowedip(struct wg_peer *peer, struct nlattr **attrs) { int ret = -EINVAL; u16 family; u8 cidr; if (!attrs[WGALLOWEDIP_A_FAMILY] || !attrs[WGALLOWEDIP_A_IPADDR] || !attrs[WGALLOWEDIP_A_CIDR_MASK]) return ret; family = nla_get_u16(attrs[WGALLOWEDIP_A_FAMILY]); cidr = nla_get_u8(attrs[WGALLOWEDIP_A_CIDR_MASK]); if (family == AF_INET && cidr <= 32 && nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in_addr)) ret = wg_allowedips_insert_v4( &peer->device->peer_allowedips, nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer, &peer->device->device_update_lock); else if (family == AF_INET6 && cidr <= 128 && nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in6_addr)) ret = wg_allowedips_insert_v6( &peer->device->peer_allowedips, nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer, &peer->device->device_update_lock); return ret; } static int set_peer(struct wg_device *wg, struct nlattr **attrs) { u8 *public_key = NULL, *preshared_key = NULL; struct wg_peer *peer = NULL; u32 flags = 0; int ret; ret = -EINVAL; if (attrs[WGPEER_A_PUBLIC_KEY] && nla_len(attrs[WGPEER_A_PUBLIC_KEY]) == NOISE_PUBLIC_KEY_LEN) public_key = nla_data(attrs[WGPEER_A_PUBLIC_KEY]); else goto out; if (attrs[WGPEER_A_PRESHARED_KEY] && nla_len(attrs[WGPEER_A_PRESHARED_KEY]) == NOISE_SYMMETRIC_KEY_LEN) preshared_key = nla_data(attrs[WGPEER_A_PRESHARED_KEY]); if (attrs[WGPEER_A_FLAGS]) flags = nla_get_u32(attrs[WGPEER_A_FLAGS]); ret = -EOPNOTSUPP; if (flags & ~__WGPEER_F_ALL) goto out; ret = -EPFNOSUPPORT; if (attrs[WGPEER_A_PROTOCOL_VERSION]) { if (nla_get_u32(attrs[WGPEER_A_PROTOCOL_VERSION]) != 1) goto out; } peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, nla_data(attrs[WGPEER_A_PUBLIC_KEY])); ret = 0; if (!peer) { /* Peer doesn't exist yet. Add a new one. */ if (flags & (WGPEER_F_REMOVE_ME | WGPEER_F_UPDATE_ONLY)) goto out; /* The peer is new, so there aren't allowed IPs to remove. */ flags &= ~WGPEER_F_REPLACE_ALLOWEDIPS; down_read(&wg->static_identity.lock); if (wg->static_identity.has_identity && !memcmp(nla_data(attrs[WGPEER_A_PUBLIC_KEY]), wg->static_identity.static_public, NOISE_PUBLIC_KEY_LEN)) { /* We silently ignore peers that have the same public * key as the device. The reason we do it silently is * that we'd like for people to be able to reuse the * same set of API calls across peers. */ up_read(&wg->static_identity.lock); ret = 0; goto out; } up_read(&wg->static_identity.lock); peer = wg_peer_create(wg, public_key, preshared_key); if (IS_ERR(peer)) { ret = PTR_ERR(peer); peer = NULL; goto out; } /* Take additional reference, as though we've just been * looked up. */ wg_peer_get(peer); } if (flags & WGPEER_F_REMOVE_ME) { wg_peer_remove(peer); goto out; } if (preshared_key) { down_write(&peer->handshake.lock); memcpy(&peer->handshake.preshared_key, preshared_key, NOISE_SYMMETRIC_KEY_LEN); up_write(&peer->handshake.lock); } if (attrs[WGPEER_A_ENDPOINT]) { struct sockaddr *addr = nla_data(attrs[WGPEER_A_ENDPOINT]); size_t len = nla_len(attrs[WGPEER_A_ENDPOINT]); struct endpoint endpoint = { { { 0 } } }; if (len == sizeof(struct sockaddr_in) && addr->sa_family == AF_INET) { endpoint.addr4 = *(struct sockaddr_in *)addr; wg_socket_set_peer_endpoint(peer, &endpoint); } else if (len == sizeof(struct sockaddr_in6) && addr->sa_family == AF_INET6) { endpoint.addr6 = *(struct sockaddr_in6 *)addr; wg_socket_set_peer_endpoint(peer, &endpoint); } } if (flags & WGPEER_F_REPLACE_ALLOWEDIPS) wg_allowedips_remove_by_peer(&wg->peer_allowedips, peer, &wg->device_update_lock); if (attrs[WGPEER_A_ALLOWEDIPS]) { struct nlattr *attr, *allowedip[WGALLOWEDIP_A_MAX + 1]; int rem; nla_for_each_nested(attr, attrs[WGPEER_A_ALLOWEDIPS], rem) { ret = nla_parse_nested(allowedip, WGALLOWEDIP_A_MAX, attr, allowedip_policy, NULL); if (ret < 0) goto out; ret = set_allowedip(peer, allowedip); if (ret < 0) goto out; } } if (attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]) { const u16 persistent_keepalive_interval = nla_get_u16( attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]); const bool send_keepalive = !peer->persistent_keepalive_interval && persistent_keepalive_interval && netif_running(wg->dev); peer->persistent_keepalive_interval = persistent_keepalive_interval; if (send_keepalive) wg_packet_send_keepalive(peer); } if (netif_running(wg->dev)) wg_packet_send_staged_packets(peer); out: wg_peer_put(peer); if (attrs[WGPEER_A_PRESHARED_KEY]) memzero_explicit(nla_data(attrs[WGPEER_A_PRESHARED_KEY]), nla_len(attrs[WGPEER_A_PRESHARED_KEY])); return ret; } static int wg_set_device(struct sk_buff *skb, struct genl_info *info) { struct wg_device *wg = lookup_interface(info->attrs, skb); u32 flags = 0; int ret; if (IS_ERR(wg)) { ret = PTR_ERR(wg); goto out_nodev; } rtnl_lock(); mutex_lock(&wg->device_update_lock); if (info->attrs[WGDEVICE_A_FLAGS]) flags = nla_get_u32(info->attrs[WGDEVICE_A_FLAGS]); ret = -EOPNOTSUPP; if (flags & ~__WGDEVICE_F_ALL) goto out; if (info->attrs[WGDEVICE_A_LISTEN_PORT] || info->attrs[WGDEVICE_A_FWMARK]) { struct net *net; rcu_read_lock(); net = rcu_dereference(wg->creating_net); ret = !net || !ns_capable(net->user_ns, CAP_NET_ADMIN) ? -EPERM : 0; rcu_read_unlock(); if (ret) goto out; } ++wg->device_update_gen; if (info->attrs[WGDEVICE_A_FWMARK]) { struct wg_peer *peer; wg->fwmark = nla_get_u32(info->attrs[WGDEVICE_A_FWMARK]); list_for_each_entry(peer, &wg->peer_list, peer_list) wg_socket_clear_peer_endpoint_src(peer); } if (info->attrs[WGDEVICE_A_LISTEN_PORT]) { ret = set_port(wg, nla_get_u16(info->attrs[WGDEVICE_A_LISTEN_PORT])); if (ret) goto out; } if (flags & WGDEVICE_F_REPLACE_PEERS) wg_peer_remove_all(wg); if (info->attrs[WGDEVICE_A_PRIVATE_KEY] && nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY]) == NOISE_PUBLIC_KEY_LEN) { u8 *private_key = nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]); u8 public_key[NOISE_PUBLIC_KEY_LEN]; struct wg_peer *peer, *temp; bool send_staged_packets; if (!crypto_memneq(wg->static_identity.static_private, private_key, NOISE_PUBLIC_KEY_LEN)) goto skip_set_private_key; /* We remove before setting, to prevent race, which means doing * two 25519-genpub ops. */ if (curve25519_generate_public(public_key, private_key)) { peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, public_key); if (peer) { wg_peer_put(peer); wg_peer_remove(peer); } } down_write(&wg->static_identity.lock); send_staged_packets = !wg->static_identity.has_identity && netif_running(wg->dev); wg_noise_set_static_identity_private_key(&wg->static_identity, private_key); send_staged_packets = send_staged_packets && wg->static_identity.has_identity; wg_cookie_checker_precompute_device_keys(&wg->cookie_checker); list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list) { wg_noise_precompute_static_static(peer); wg_noise_expire_current_peer_keypairs(peer); if (send_staged_packets) wg_packet_send_staged_packets(peer); } up_write(&wg->static_identity.lock); } skip_set_private_key: if (info->attrs[WGDEVICE_A_PEERS]) { struct nlattr *attr, *peer[WGPEER_A_MAX + 1]; int rem; nla_for_each_nested(attr, info->attrs[WGDEVICE_A_PEERS], rem) { ret = nla_parse_nested(peer, WGPEER_A_MAX, attr, peer_policy, NULL); if (ret < 0) goto out; ret = set_peer(wg, peer); if (ret < 0) goto out; } } ret = 0; out: mutex_unlock(&wg->device_update_lock); rtnl_unlock(); dev_put(wg->dev); out_nodev: if (info->attrs[WGDEVICE_A_PRIVATE_KEY]) memzero_explicit(nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]), nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY])); return ret; } static const struct genl_ops genl_ops[] = { { .cmd = WG_CMD_GET_DEVICE, .start = wg_get_device_start, .dumpit = wg_get_device_dump, .done = wg_get_device_done, .flags = GENL_UNS_ADMIN_PERM }, { .cmd = WG_CMD_SET_DEVICE, .doit = wg_set_device, .flags = GENL_UNS_ADMIN_PERM } }; static struct genl_family genl_family __ro_after_init = { .ops = genl_ops, .n_ops = ARRAY_SIZE(genl_ops), .resv_start_op = WG_CMD_SET_DEVICE + 1, .name = WG_GENL_NAME, .version = WG_GENL_VERSION, .maxattr = WGDEVICE_A_MAX, .module = THIS_MODULE, .policy = device_policy, .netnsok = true }; int __init wg_genetlink_init(void) { return genl_register_family(&genl_family); } void __exit wg_genetlink_uninit(void) { genl_unregister_family(&genl_family); } |
13 13 13 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 | // SPDX-License-Identifier: GPL-2.0-or-later /* * cgroups support for the BFQ I/O scheduler. */ #include <linux/module.h> #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/cgroup.h> #include <linux/ktime.h> #include <linux/rbtree.h> #include <linux/ioprio.h> #include <linux/sbitmap.h> #include <linux/delay.h> #include "elevator.h" #include "bfq-iosched.h" #ifdef CONFIG_BFQ_CGROUP_DEBUG static int bfq_stat_init(struct bfq_stat *stat, gfp_t gfp) { int ret; ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp); if (ret) return ret; atomic64_set(&stat->aux_cnt, 0); return 0; } static void bfq_stat_exit(struct bfq_stat *stat) { percpu_counter_destroy(&stat->cpu_cnt); } /** * bfq_stat_add - add a value to a bfq_stat * @stat: target bfq_stat * @val: value to add * * Add @val to @stat. The caller must ensure that IRQ on the same CPU * don't re-enter this function for the same counter. */ static inline void bfq_stat_add(struct bfq_stat *stat, uint64_t val) { percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH); } /** * bfq_stat_read - read the current value of a bfq_stat * @stat: bfq_stat to read */ static inline uint64_t bfq_stat_read(struct bfq_stat *stat) { return percpu_counter_sum_positive(&stat->cpu_cnt); } /** * bfq_stat_reset - reset a bfq_stat * @stat: bfq_stat to reset */ static inline void bfq_stat_reset(struct bfq_stat *stat) { percpu_counter_set(&stat->cpu_cnt, 0); atomic64_set(&stat->aux_cnt, 0); } /** * bfq_stat_add_aux - add a bfq_stat into another's aux count * @to: the destination bfq_stat * @from: the source * * Add @from's count including the aux one to @to's aux count. */ static inline void bfq_stat_add_aux(struct bfq_stat *to, struct bfq_stat *from) { atomic64_add(bfq_stat_read(from) + atomic64_read(&from->aux_cnt), &to->aux_cnt); } /** * blkg_prfill_stat - prfill callback for bfq_stat * @sf: seq_file to print to * @pd: policy private data of interest * @off: offset to the bfq_stat in @pd * * prfill callback for printing a bfq_stat. */ static u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off) { return __blkg_prfill_u64(sf, pd, bfq_stat_read((void *)pd + off)); } /* bfqg stats flags */ enum bfqg_stats_flags { BFQG_stats_waiting = 0, BFQG_stats_idling, BFQG_stats_empty, }; #define BFQG_FLAG_FNS(name) \ static void bfqg_stats_mark_##name(struct bfqg_stats *stats) \ { \ stats->flags |= (1 << BFQG_stats_##name); \ } \ static void bfqg_stats_clear_##name(struct bfqg_stats *stats) \ { \ stats->flags &= ~(1 << BFQG_stats_##name); \ } \ static int bfqg_stats_##name(struct bfqg_stats *stats) \ { \ return (stats->flags & (1 << BFQG_stats_##name)) != 0; \ } \ BFQG_FLAG_FNS(waiting) BFQG_FLAG_FNS(idling) BFQG_FLAG_FNS(empty) #undef BFQG_FLAG_FNS /* This should be called with the scheduler lock held. */ static void bfqg_stats_update_group_wait_time(struct bfqg_stats *stats) { u64 now; if (!bfqg_stats_waiting(stats)) return; now = blk_time_get_ns(); if (now > stats->start_group_wait_time) bfq_stat_add(&stats->group_wait_time, now - stats->start_group_wait_time); bfqg_stats_clear_waiting(stats); } /* This should be called with the scheduler lock held. */ static void bfqg_stats_set_start_group_wait_time(struct bfq_group *bfqg, struct bfq_group *curr_bfqg) { struct bfqg_stats *stats = &bfqg->stats; if (bfqg_stats_waiting(stats)) return; if (bfqg == curr_bfqg) return; stats->start_group_wait_time = blk_time_get_ns(); bfqg_stats_mark_waiting(stats); } /* This should be called with the scheduler lock held. */ static void bfqg_stats_end_empty_time(struct bfqg_stats *stats) { u64 now; if (!bfqg_stats_empty(stats)) return; now = blk_time_get_ns(); if (now > stats->start_empty_time) bfq_stat_add(&stats->empty_time, now - stats->start_empty_time); bfqg_stats_clear_empty(stats); } void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { bfq_stat_add(&bfqg->stats.dequeue, 1); } void bfqg_stats_set_start_empty_time(struct bfq_group *bfqg) { struct bfqg_stats *stats = &bfqg->stats; if (blkg_rwstat_total(&stats->queued)) return; /* * group is already marked empty. This can happen if bfqq got new * request in parent group and moved to this group while being added * to service tree. Just ignore the event and move on. */ if (bfqg_stats_empty(stats)) return; stats->start_empty_time = blk_time_get_ns(); bfqg_stats_mark_empty(stats); } void bfqg_stats_update_idle_time(struct bfq_group *bfqg) { struct bfqg_stats *stats = &bfqg->stats; if (bfqg_stats_idling(stats)) { u64 now = blk_time_get_ns(); if (now > stats->start_idle_time) bfq_stat_add(&stats->idle_time, now - stats->start_idle_time); bfqg_stats_clear_idling(stats); } } void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { struct bfqg_stats *stats = &bfqg->stats; stats->start_idle_time = blk_time_get_ns(); bfqg_stats_mark_idling(stats); } void bfqg_stats_update_avg_queue_size(struct bfq_group *bfqg) { struct bfqg_stats *stats = &bfqg->stats; bfq_stat_add(&stats->avg_queue_size_sum, blkg_rwstat_total(&stats->queued)); bfq_stat_add(&stats->avg_queue_size_samples, 1); bfqg_stats_update_group_wait_time(stats); } void bfqg_stats_update_io_add(struct bfq_group *bfqg, struct bfq_queue *bfqq, blk_opf_t opf) { blkg_rwstat_add(&bfqg->stats.queued, opf, 1); bfqg_stats_end_empty_time(&bfqg->stats); if (!(bfqq == bfqg->bfqd->in_service_queue)) bfqg_stats_set_start_group_wait_time(bfqg, bfqq_group(bfqq)); } void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf) { blkg_rwstat_add(&bfqg->stats.queued, opf, -1); } void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf) { blkg_rwstat_add(&bfqg->stats.merged, opf, 1); } void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns, u64 io_start_time_ns, blk_opf_t opf) { struct bfqg_stats *stats = &bfqg->stats; u64 now = blk_time_get_ns(); if (now > io_start_time_ns) blkg_rwstat_add(&stats->service_time, opf, now - io_start_time_ns); if (io_start_time_ns > start_time_ns) blkg_rwstat_add(&stats->wait_time, opf, io_start_time_ns - start_time_ns); } #else /* CONFIG_BFQ_CGROUP_DEBUG */ void bfqg_stats_update_io_remove(struct bfq_group *bfqg, blk_opf_t opf) { } void bfqg_stats_update_io_merged(struct bfq_group *bfqg, blk_opf_t opf) { } void bfqg_stats_update_completion(struct bfq_group *bfqg, u64 start_time_ns, u64 io_start_time_ns, blk_opf_t opf) { } void bfqg_stats_update_dequeue(struct bfq_group *bfqg) { } void bfqg_stats_set_start_idle_time(struct bfq_group *bfqg) { } #endif /* CONFIG_BFQ_CGROUP_DEBUG */ #ifdef CONFIG_BFQ_GROUP_IOSCHED /* * blk-cgroup policy-related handlers * The following functions help in converting between blk-cgroup * internal structures and BFQ-specific structures. */ static struct bfq_group *pd_to_bfqg(struct blkg_policy_data *pd) { return pd ? container_of(pd, struct bfq_group, pd) : NULL; } struct blkcg_gq *bfqg_to_blkg(struct bfq_group *bfqg) { return pd_to_blkg(&bfqg->pd); } static struct bfq_group *blkg_to_bfqg(struct blkcg_gq *blkg) { return pd_to_bfqg(blkg_to_pd(blkg, &blkcg_policy_bfq)); } /* * bfq_group handlers * The following functions help in navigating the bfq_group hierarchy * by allowing to find the parent of a bfq_group or the bfq_group * associated to a bfq_queue. */ static struct bfq_group *bfqg_parent(struct bfq_group *bfqg) { struct blkcg_gq *pblkg = bfqg_to_blkg(bfqg)->parent; return pblkg ? blkg_to_bfqg(pblkg) : NULL; } struct bfq_group *bfqq_group(struct bfq_queue *bfqq) { struct bfq_entity *group_entity = bfqq->entity.parent; return group_entity ? container_of(group_entity, struct bfq_group, entity) : bfqq->bfqd->root_group; } /* * The following two functions handle get and put of a bfq_group by * wrapping the related blk-cgroup hooks. */ static void bfqg_get(struct bfq_group *bfqg) { refcount_inc(&bfqg->ref); } static void bfqg_put(struct bfq_group *bfqg) { if (refcount_dec_and_test(&bfqg->ref)) kfree(bfqg); } static void bfqg_and_blkg_get(struct bfq_group *bfqg) { /* see comments in bfq_bic_update_cgroup for why refcounting bfqg */ bfqg_get(bfqg); blkg_get(bfqg_to_blkg(bfqg)); } void bfqg_and_blkg_put(struct bfq_group *bfqg) { blkg_put(bfqg_to_blkg(bfqg)); bfqg_put(bfqg); } void bfqg_stats_update_legacy_io(struct request_queue *q, struct request *rq) { struct bfq_group *bfqg = blkg_to_bfqg(rq->bio->bi_blkg); if (!bfqg) return; blkg_rwstat_add(&bfqg->stats.bytes, rq->cmd_flags, blk_rq_bytes(rq)); blkg_rwstat_add(&bfqg->stats.ios, rq->cmd_flags, 1); } /* @stats = 0 */ static void bfqg_stats_reset(struct bfqg_stats *stats) { #ifdef CONFIG_BFQ_CGROUP_DEBUG /* queued stats shouldn't be cleared */ blkg_rwstat_reset(&stats->merged); blkg_rwstat_reset(&stats->service_time); blkg_rwstat_reset(&stats->wait_time); bfq_stat_reset(&stats->time); bfq_stat_reset(&stats->avg_queue_size_sum); bfq_stat_reset(&stats->avg_queue_size_samples); bfq_stat_reset(&stats->dequeue); bfq_stat_reset(&stats->group_wait_time); bfq_stat_reset(&stats->idle_time); bfq_stat_reset(&stats->empty_time); #endif } /* @to += @from */ static void bfqg_stats_add_aux(struct bfqg_stats *to, struct bfqg_stats *from) { if (!to || !from) return; #ifdef CONFIG_BFQ_CGROUP_DEBUG /* queued stats shouldn't be cleared */ blkg_rwstat_add_aux(&to->merged, &from->merged); blkg_rwstat_add_aux(&to->service_time, &from->service_time); blkg_rwstat_add_aux(&to->wait_time, &from->wait_time); bfq_stat_add_aux(&from->time, &from->time); bfq_stat_add_aux(&to->avg_queue_size_sum, &from->avg_queue_size_sum); bfq_stat_add_aux(&to->avg_queue_size_samples, &from->avg_queue_size_samples); bfq_stat_add_aux(&to->dequeue, &from->dequeue); bfq_stat_add_aux(&to->group_wait_time, &from->group_wait_time); bfq_stat_add_aux(&to->idle_time, &from->idle_time); bfq_stat_add_aux(&to->empty_time, &from->empty_time); #endif } /* * Transfer @bfqg's stats to its parent's aux counts so that the ancestors' * recursive stats can still account for the amount used by this bfqg after * it's gone. */ static void bfqg_stats_xfer_dead(struct bfq_group *bfqg) { struct bfq_group *parent; if (!bfqg) /* root_group */ return; parent = bfqg_parent(bfqg); lockdep_assert_held(&bfqg_to_blkg(bfqg)->q->queue_lock); if (unlikely(!parent)) return; bfqg_stats_add_aux(&parent->stats, &bfqg->stats); bfqg_stats_reset(&bfqg->stats); } void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg) { struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); entity->weight = entity->new_weight; entity->orig_weight = entity->new_weight; if (bfqq) { bfqq->ioprio = bfqq->new_ioprio; bfqq->ioprio_class = bfqq->new_ioprio_class; /* * Make sure that bfqg and its associated blkg do not * disappear before entity. */ bfqg_and_blkg_get(bfqg); } entity->parent = bfqg->my_entity; /* NULL for root group */ entity->sched_data = &bfqg->sched_data; } static void bfqg_stats_exit(struct bfqg_stats *stats) { blkg_rwstat_exit(&stats->bytes); blkg_rwstat_exit(&stats->ios); #ifdef CONFIG_BFQ_CGROUP_DEBUG blkg_rwstat_exit(&stats->merged); blkg_rwstat_exit(&stats->service_time); blkg_rwstat_exit(&stats->wait_time); blkg_rwstat_exit(&stats->queued); bfq_stat_exit(&stats->time); bfq_stat_exit(&stats->avg_queue_size_sum); bfq_stat_exit(&stats->avg_queue_size_samples); bfq_stat_exit(&stats->dequeue); bfq_stat_exit(&stats->group_wait_time); bfq_stat_exit(&stats->idle_time); bfq_stat_exit(&stats->empty_time); #endif } static int bfqg_stats_init(struct bfqg_stats *stats, gfp_t gfp) { if (blkg_rwstat_init(&stats->bytes, gfp) || blkg_rwstat_init(&stats->ios, gfp)) goto error; #ifdef CONFIG_BFQ_CGROUP_DEBUG if (blkg_rwstat_init(&stats->merged, gfp) || blkg_rwstat_init(&stats->service_time, gfp) || blkg_rwstat_init(&stats->wait_time, gfp) || blkg_rwstat_init(&stats->queued, gfp) || bfq_stat_init(&stats->time, gfp) || bfq_stat_init(&stats->avg_queue_size_sum, gfp) || bfq_stat_init(&stats->avg_queue_size_samples, gfp) || bfq_stat_init(&stats->dequeue, gfp) || bfq_stat_init(&stats->group_wait_time, gfp) || bfq_stat_init(&stats->idle_time, gfp) || bfq_stat_init(&stats->empty_time, gfp)) goto error; #endif return 0; error: bfqg_stats_exit(stats); return -ENOMEM; } static struct bfq_group_data *cpd_to_bfqgd(struct blkcg_policy_data *cpd) { return cpd ? container_of(cpd, struct bfq_group_data, pd) : NULL; } static struct bfq_group_data *blkcg_to_bfqgd(struct blkcg *blkcg) { return cpd_to_bfqgd(blkcg_to_cpd(blkcg, &blkcg_policy_bfq)); } static struct blkcg_policy_data *bfq_cpd_alloc(gfp_t gfp) { struct bfq_group_data *bgd; bgd = kzalloc(sizeof(*bgd), gfp); if (!bgd) return NULL; bgd->weight = CGROUP_WEIGHT_DFL; return &bgd->pd; } static void bfq_cpd_free(struct blkcg_policy_data *cpd) { kfree(cpd_to_bfqgd(cpd)); } static struct blkg_policy_data *bfq_pd_alloc(struct gendisk *disk, struct blkcg *blkcg, gfp_t gfp) { struct bfq_group *bfqg; bfqg = kzalloc_node(sizeof(*bfqg), gfp, disk->node_id); if (!bfqg) return NULL; if (bfqg_stats_init(&bfqg->stats, gfp)) { kfree(bfqg); return NULL; } /* see comments in bfq_bic_update_cgroup for why refcounting */ refcount_set(&bfqg->ref, 1); return &bfqg->pd; } static void bfq_pd_init(struct blkg_policy_data *pd) { struct blkcg_gq *blkg = pd_to_blkg(pd); struct bfq_group *bfqg = blkg_to_bfqg(blkg); struct bfq_data *bfqd = blkg->q->elevator->elevator_data; struct bfq_entity *entity = &bfqg->entity; struct bfq_group_data *d = blkcg_to_bfqgd(blkg->blkcg); entity->orig_weight = entity->weight = entity->new_weight = d->weight; entity->my_sched_data = &bfqg->sched_data; entity->last_bfqq_created = NULL; bfqg->my_entity = entity; /* * the root_group's will be set to NULL * in bfq_init_queue() */ bfqg->bfqd = bfqd; bfqg->active_entities = 0; bfqg->num_queues_with_pending_reqs = 0; bfqg->rq_pos_tree = RB_ROOT; } static void bfq_pd_free(struct blkg_policy_data *pd) { struct bfq_group *bfqg = pd_to_bfqg(pd); bfqg_stats_exit(&bfqg->stats); bfqg_put(bfqg); } static void bfq_pd_reset_stats(struct blkg_policy_data *pd) { struct bfq_group *bfqg = pd_to_bfqg(pd); bfqg_stats_reset(&bfqg->stats); } static void bfq_group_set_parent(struct bfq_group *bfqg, struct bfq_group *parent) { struct bfq_entity *entity; entity = &bfqg->entity; entity->parent = parent->my_entity; entity->sched_data = &parent->sched_data; } static void bfq_link_bfqg(struct bfq_data *bfqd, struct bfq_group *bfqg) { struct bfq_group *parent; struct bfq_entity *entity; /* * Update chain of bfq_groups as we might be handling a leaf group * which, along with some of its relatives, has not been hooked yet * to the private hierarchy of BFQ. */ entity = &bfqg->entity; for_each_entity(entity) { struct bfq_group *curr_bfqg = container_of(entity, struct bfq_group, entity); if (curr_bfqg != bfqd->root_group) { parent = bfqg_parent(curr_bfqg); if (!parent) parent = bfqd->root_group; bfq_group_set_parent(curr_bfqg, parent); } } } struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) { struct blkcg_gq *blkg = bio->bi_blkg; struct bfq_group *bfqg; while (blkg) { if (!blkg->online) { blkg = blkg->parent; continue; } bfqg = blkg_to_bfqg(blkg); if (bfqg->pd.online) { bio_associate_blkg_from_css(bio, &blkg->blkcg->css); return bfqg; } blkg = blkg->parent; } bio_associate_blkg_from_css(bio, &bfqg_to_blkg(bfqd->root_group)->blkcg->css); return bfqd->root_group; } /** * bfq_bfqq_move - migrate @bfqq to @bfqg. * @bfqd: queue descriptor. * @bfqq: the queue to move. * @bfqg: the group to move to. * * Move @bfqq to @bfqg, deactivating it from its old group and reactivating * it on the new one. Avoid putting the entity on the old group idle tree. * * Must be called under the scheduler lock, to make sure that the blkg * owning @bfqg does not disappear (see comments in * bfq_bic_update_cgroup on guaranteeing the consistency of blkg * objects). */ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct bfq_group *bfqg) { struct bfq_entity *entity = &bfqq->entity; struct bfq_group *old_parent = bfqq_group(bfqq); bool has_pending_reqs = false; /* * No point to move bfqq to the same group, which can happen when * root group is offlined */ if (old_parent == bfqg) return; /* * oom_bfqq is not allowed to move, oom_bfqq will hold ref to root_group * until elevator exit. */ if (bfqq == &bfqd->oom_bfqq) return; /* * Get extra reference to prevent bfqq from being freed in * next possible expire or deactivate. */ bfqq->ref++; if (entity->in_groups_with_pending_reqs) { has_pending_reqs = true; bfq_del_bfqq_in_groups_with_pending_reqs(bfqq); } /* If bfqq is empty, then bfq_bfqq_expire also invokes * bfq_del_bfqq_busy, thereby removing bfqq and its entity * from data structures related to current group. Otherwise we * need to remove bfqq explicitly with bfq_deactivate_bfqq, as * we do below. */ if (bfqq == bfqd->in_service_queue) bfq_bfqq_expire(bfqd, bfqd->in_service_queue, false, BFQQE_PREEMPTED); if (bfq_bfqq_busy(bfqq)) bfq_deactivate_bfqq(bfqd, bfqq, false, false); else if (entity->on_st_or_in_serv) bfq_put_idle_entity(bfq_entity_service_tree(entity), entity); bfqg_and_blkg_put(old_parent); bfq_reassign_last_bfqq(bfqq, NULL); entity->parent = bfqg->my_entity; entity->sched_data = &bfqg->sched_data; /* pin down bfqg and its associated blkg */ bfqg_and_blkg_get(bfqg); if (has_pending_reqs) bfq_add_bfqq_in_groups_with_pending_reqs(bfqq); if (bfq_bfqq_busy(bfqq)) { if (unlikely(!bfqd->nonrot_with_queueing)) bfq_pos_tree_add_move(bfqd, bfqq); bfq_activate_bfqq(bfqd, bfqq); } if (!bfqd->in_service_queue && !bfqd->tot_rq_in_driver) bfq_schedule_dispatch(bfqd); /* release extra ref taken above, bfqq may happen to be freed now */ bfq_put_queue(bfqq); } static void bfq_sync_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *sync_bfqq, struct bfq_io_cq *bic, struct bfq_group *bfqg, unsigned int act_idx) { struct bfq_queue *bfqq; if (!sync_bfqq->new_bfqq && !bfq_bfqq_coop(sync_bfqq)) { /* We are the only user of this bfqq, just move it */ if (sync_bfqq->entity.sched_data != &bfqg->sched_data) bfq_bfqq_move(bfqd, sync_bfqq, bfqg); return; } /* * The queue was merged to a different queue. Check * that the merge chain still belongs to the same * cgroup. */ for (bfqq = sync_bfqq; bfqq; bfqq = bfqq->new_bfqq) if (bfqq->entity.sched_data != &bfqg->sched_data) break; if (bfqq) { /* * Some queue changed cgroup so the merge is not valid * anymore. We cannot easily just cancel the merge (by * clearing new_bfqq) as there may be other processes * using this queue and holding refs to all queues * below sync_bfqq->new_bfqq. Similarly if the merge * already happened, we need to detach from bfqq now * so that we cannot merge bio to a request from the * old cgroup. */ bfq_put_cooperator(sync_bfqq); bic_set_bfqq(bic, NULL, true, act_idx); bfq_release_process_ref(bfqd, sync_bfqq); } } /** * __bfq_bic_change_cgroup - move @bic to @bfqg. * @bfqd: the queue descriptor. * @bic: the bic to move. * @bfqg: the group to move to. * * Move bic to blkcg, assuming that bfqd->lock is held; which makes * sure that the reference to cgroup is valid across the call (see * comments in bfq_bic_update_cgroup on this issue) */ static void __bfq_bic_change_cgroup(struct bfq_data *bfqd, struct bfq_io_cq *bic, struct bfq_group *bfqg) { unsigned int act_idx; for (act_idx = 0; act_idx < bfqd->num_actuators; act_idx++) { struct bfq_queue *async_bfqq = bic_to_bfqq(bic, false, act_idx); struct bfq_queue *sync_bfqq = bic_to_bfqq(bic, true, act_idx); if (async_bfqq && async_bfqq->entity.sched_data != &bfqg->sched_data) { bic_set_bfqq(bic, NULL, false, act_idx); bfq_release_process_ref(bfqd, async_bfqq); } if (sync_bfqq) bfq_sync_bfqq_move(bfqd, sync_bfqq, bic, bfqg, act_idx); } } void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) { struct bfq_data *bfqd = bic_to_bfqd(bic); struct bfq_group *bfqg = bfq_bio_bfqg(bfqd, bio); uint64_t serial_nr; serial_nr = bfqg_to_blkg(bfqg)->blkcg->css.serial_nr; /* * Check whether blkcg has changed. The condition may trigger * spuriously on a newly created cic but there's no harm. */ if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr)) return; /* * New cgroup for this process. Make sure it is linked to bfq internal * cgroup hierarchy. */ bfq_link_bfqg(bfqd, bfqg); __bfq_bic_change_cgroup(bfqd, bic, bfqg); bic->blkcg_serial_nr = serial_nr; } /** * bfq_flush_idle_tree - deactivate any entity on the idle tree of @st. * @st: the service tree being flushed. */ static void bfq_flush_idle_tree(struct bfq_service_tree *st) { struct bfq_entity *entity = st->first_idle; for (; entity ; entity = st->first_idle) __bfq_deactivate_entity(entity, false); } /** * bfq_reparent_leaf_entity - move leaf entity to the root_group. * @bfqd: the device data structure with the root group. * @entity: the entity to move, if entity is a leaf; or the parent entity * of an active leaf entity to move, if entity is not a leaf. * @ioprio_class: I/O priority class to reparent. */ static void bfq_reparent_leaf_entity(struct bfq_data *bfqd, struct bfq_entity *entity, int ioprio_class) { struct bfq_queue *bfqq; struct bfq_entity *child_entity = entity; while (child_entity->my_sched_data) { /* leaf not reached yet */ struct bfq_sched_data *child_sd = child_entity->my_sched_data; struct bfq_service_tree *child_st = child_sd->service_tree + ioprio_class; struct rb_root *child_active = &child_st->active; child_entity = bfq_entity_of(rb_first(child_active)); if (!child_entity) child_entity = child_sd->in_service_entity; } bfqq = bfq_entity_to_bfqq(child_entity); bfq_bfqq_move(bfqd, bfqq, bfqd->root_group); } /** * bfq_reparent_active_queues - move to the root group all active queues. * @bfqd: the device data structure with the root group. * @bfqg: the group to move from. * @st: the service tree to start the search from. * @ioprio_class: I/O priority class to reparent. */ static void bfq_reparent_active_queues(struct bfq_data *bfqd, struct bfq_group *bfqg, struct bfq_service_tree *st, int ioprio_class) { struct rb_root *active = &st->active; struct bfq_entity *entity; while ((entity = bfq_entity_of(rb_first(active)))) bfq_reparent_leaf_entity(bfqd, entity, ioprio_class); if (bfqg->sched_data.in_service_entity) bfq_reparent_leaf_entity(bfqd, bfqg->sched_data.in_service_entity, ioprio_class); } /** * bfq_pd_offline - deactivate the entity associated with @pd, * and reparent its children entities. * @pd: descriptor of the policy going offline. * * blkio already grabs the queue_lock for us, so no need to use * RCU-based magic */ static void bfq_pd_offline(struct blkg_policy_data *pd) { struct bfq_service_tree *st; struct bfq_group *bfqg = pd_to_bfqg(pd); struct bfq_data *bfqd = bfqg->bfqd; struct bfq_entity *entity = bfqg->my_entity; unsigned long flags; int i; spin_lock_irqsave(&bfqd->lock, flags); if (!entity) /* root group */ goto put_async_queues; /* * Empty all service_trees belonging to this group before * deactivating the group itself. */ for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) { st = bfqg->sched_data.service_tree + i; /* * It may happen that some queues are still active * (busy) upon group destruction (if the corresponding * processes have been forced to terminate). We move * all the leaf entities corresponding to these queues * to the root_group. * Also, it may happen that the group has an entity * in service, which is disconnected from the active * tree: it must be moved, too. * There is no need to put the sync queues, as the * scheduler has taken no reference. */ bfq_reparent_active_queues(bfqd, bfqg, st, i); /* * The idle tree may still contain bfq_queues * belonging to exited task because they never * migrated to a different cgroup from the one being * destroyed now. In addition, even * bfq_reparent_active_queues() may happen to add some * entities to the idle tree. It happens if, in some * of the calls to bfq_bfqq_move() performed by * bfq_reparent_active_queues(), the queue to move is * empty and gets expired. */ bfq_flush_idle_tree(st); } __bfq_deactivate_entity(entity, false); put_async_queues: bfq_put_async_queues(bfqd, bfqg); spin_unlock_irqrestore(&bfqd->lock, flags); /* * @blkg is going offline and will be ignored by * blkg_[rw]stat_recursive_sum(). Transfer stats to the parent so * that they don't get lost. If IOs complete after this point, the * stats for them will be lost. Oh well... */ bfqg_stats_xfer_dead(bfqg); } void bfq_end_wr_async(struct bfq_data *bfqd) { struct blkcg_gq *blkg; list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) { struct bfq_group *bfqg = blkg_to_bfqg(blkg); bfq_end_wr_async_queues(bfqd, bfqg); } bfq_end_wr_async_queues(bfqd, bfqd->root_group); } static int bfq_io_show_weight_legacy(struct seq_file *sf, void *v) { struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg); unsigned int val = 0; if (bfqgd) val = bfqgd->weight; seq_printf(sf, "%u\n", val); return 0; } static u64 bfqg_prfill_weight_device(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct bfq_group *bfqg = pd_to_bfqg(pd); if (!bfqg->entity.dev_weight) return 0; return __blkg_prfill_u64(sf, pd, bfqg->entity.dev_weight); } static int bfq_io_show_weight(struct seq_file *sf, void *v) { struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg); seq_printf(sf, "default %u\n", bfqgd->weight); blkcg_print_blkgs(sf, blkcg, bfqg_prfill_weight_device, &blkcg_policy_bfq, 0, false); return 0; } static void bfq_group_set_weight(struct bfq_group *bfqg, u64 weight, u64 dev_weight) { weight = dev_weight ?: weight; bfqg->entity.dev_weight = dev_weight; /* * Setting the prio_changed flag of the entity * to 1 with new_weight == weight would re-set * the value of the weight to its ioprio mapping. * Set the flag only if necessary. */ if ((unsigned short)weight != bfqg->entity.new_weight) { bfqg->entity.new_weight = (unsigned short)weight; /* * Make sure that the above new value has been * stored in bfqg->entity.new_weight before * setting the prio_changed flag. In fact, * this flag may be read asynchronously (in * critical sections protected by a different * lock than that held here), and finding this * flag set may cause the execution of the code * for updating parameters whose value may * depend also on bfqg->entity.new_weight (in * __bfq_entity_update_weight_prio). * This barrier makes sure that the new value * of bfqg->entity.new_weight is correctly * seen in that code. */ smp_wmb(); bfqg->entity.prio_changed = 1; } } static int bfq_io_set_weight_legacy(struct cgroup_subsys_state *css, struct cftype *cftype, u64 val) { struct blkcg *blkcg = css_to_blkcg(css); struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg); struct blkcg_gq *blkg; int ret = -ERANGE; if (val < BFQ_MIN_WEIGHT || val > BFQ_MAX_WEIGHT) return ret; ret = 0; spin_lock_irq(&blkcg->lock); bfqgd->weight = (unsigned short)val; hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { struct bfq_group *bfqg = blkg_to_bfqg(blkg); if (bfqg) bfq_group_set_weight(bfqg, val, 0); } spin_unlock_irq(&blkcg->lock); return ret; } static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { int ret; struct blkg_conf_ctx ctx; struct blkcg *blkcg = css_to_blkcg(of_css(of)); struct bfq_group *bfqg; u64 v; blkg_conf_init(&ctx, buf); ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, &ctx); if (ret) goto out; if (sscanf(ctx.body, "%llu", &v) == 1) { /* require "default" on dfl */ ret = -ERANGE; if (!v) goto out; } else if (!strcmp(strim(ctx.body), "default")) { v = 0; } else { ret = -EINVAL; goto out; } bfqg = blkg_to_bfqg(ctx.blkg); ret = -ERANGE; if (!v || (v >= BFQ_MIN_WEIGHT && v <= BFQ_MAX_WEIGHT)) { bfq_group_set_weight(bfqg, bfqg->entity.weight, v); ret = 0; } out: blkg_conf_exit(&ctx); return ret ?: nbytes; } static ssize_t bfq_io_set_weight(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { char *endp; int ret; u64 v; buf = strim(buf); /* "WEIGHT" or "default WEIGHT" sets the default weight */ v = simple_strtoull(buf, &endp, 0); if (*endp == '\0' || sscanf(buf, "default %llu", &v) == 1) { ret = bfq_io_set_weight_legacy(of_css(of), NULL, v); return ret ?: nbytes; } return bfq_io_set_device_weight(of, buf, nbytes, off); } static int bfqg_print_rwstat(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat, &blkcg_policy_bfq, seq_cft(sf)->private, true); return 0; } static u64 bfqg_prfill_rwstat_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct blkg_rwstat_sample sum; blkg_rwstat_recursive_sum(pd_to_blkg(pd), &blkcg_policy_bfq, off, &sum); return __blkg_prfill_rwstat(sf, pd, &sum); } static int bfqg_print_rwstat_recursive(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), bfqg_prfill_rwstat_recursive, &blkcg_policy_bfq, seq_cft(sf)->private, true); return 0; } #ifdef CONFIG_BFQ_CGROUP_DEBUG static int bfqg_print_stat(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat, &blkcg_policy_bfq, seq_cft(sf)->private, false); return 0; } static u64 bfqg_prfill_stat_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct blkcg_gq *blkg = pd_to_blkg(pd); struct blkcg_gq *pos_blkg; struct cgroup_subsys_state *pos_css; u64 sum = 0; lockdep_assert_held(&blkg->q->queue_lock); rcu_read_lock(); blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) { struct bfq_stat *stat; if (!pos_blkg->online) continue; stat = (void *)blkg_to_pd(pos_blkg, &blkcg_policy_bfq) + off; sum += bfq_stat_read(stat) + atomic64_read(&stat->aux_cnt); } rcu_read_unlock(); return __blkg_prfill_u64(sf, pd, sum); } static int bfqg_print_stat_recursive(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), bfqg_prfill_stat_recursive, &blkcg_policy_bfq, seq_cft(sf)->private, false); return 0; } static u64 bfqg_prfill_sectors(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct bfq_group *bfqg = blkg_to_bfqg(pd->blkg); u64 sum = blkg_rwstat_total(&bfqg->stats.bytes); return __blkg_prfill_u64(sf, pd, sum >> 9); } static int bfqg_print_stat_sectors(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), bfqg_prfill_sectors, &blkcg_policy_bfq, 0, false); return 0; } static u64 bfqg_prfill_sectors_recursive(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct blkg_rwstat_sample tmp; blkg_rwstat_recursive_sum(pd->blkg, &blkcg_policy_bfq, offsetof(struct bfq_group, stats.bytes), &tmp); return __blkg_prfill_u64(sf, pd, (tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]) >> 9); } static int bfqg_print_stat_sectors_recursive(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), bfqg_prfill_sectors_recursive, &blkcg_policy_bfq, 0, false); return 0; } static u64 bfqg_prfill_avg_queue_size(struct seq_file *sf, struct blkg_policy_data *pd, int off) { struct bfq_group *bfqg = pd_to_bfqg(pd); u64 samples = bfq_stat_read(&bfqg->stats.avg_queue_size_samples); u64 v = 0; if (samples) { v = bfq_stat_read(&bfqg->stats.avg_queue_size_sum); v = div64_u64(v, samples); } __blkg_prfill_u64(sf, pd, v); return 0; } /* print avg_queue_size */ static int bfqg_print_avg_queue_size(struct seq_file *sf, void *v) { blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), bfqg_prfill_avg_queue_size, &blkcg_policy_bfq, 0, false); return 0; } #endif /* CONFIG_BFQ_CGROUP_DEBUG */ struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) { int ret; ret = blkcg_activate_policy(bfqd->queue->disk, &blkcg_policy_bfq); if (ret) return NULL; return blkg_to_bfqg(bfqd->queue->root_blkg); } struct blkcg_policy blkcg_policy_bfq = { .dfl_cftypes = bfq_blkg_files, .legacy_cftypes = bfq_blkcg_legacy_files, .cpd_alloc_fn = bfq_cpd_alloc, .cpd_free_fn = bfq_cpd_free, .pd_alloc_fn = bfq_pd_alloc, .pd_init_fn = bfq_pd_init, .pd_offline_fn = bfq_pd_offline, .pd_free_fn = bfq_pd_free, .pd_reset_stats_fn = bfq_pd_reset_stats, }; struct cftype bfq_blkcg_legacy_files[] = { { .name = "bfq.weight", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = bfq_io_show_weight_legacy, .write_u64 = bfq_io_set_weight_legacy, }, { .name = "bfq.weight_device", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = bfq_io_show_weight, .write = bfq_io_set_weight, }, /* statistics, covers only the tasks in the bfqg */ { .name = "bfq.io_service_bytes", .private = offsetof(struct bfq_group, stats.bytes), .seq_show = bfqg_print_rwstat, }, { .name = "bfq.io_serviced", .private = offsetof(struct bfq_group, stats.ios), .seq_show = bfqg_print_rwstat, }, #ifdef CONFIG_BFQ_CGROUP_DEBUG { .name = "bfq.time", .private = offsetof(struct bfq_group, stats.time), .seq_show = bfqg_print_stat, }, { .name = "bfq.sectors", .seq_show = bfqg_print_stat_sectors, }, { .name = "bfq.io_service_time", .private = offsetof(struct bfq_group, stats.service_time), .seq_show = bfqg_print_rwstat, }, { .name = "bfq.io_wait_time", .private = offsetof(struct bfq_group, stats.wait_time), .seq_show = bfqg_print_rwstat, }, { .name = "bfq.io_merged", .private = offsetof(struct bfq_group, stats.merged), .seq_show = bfqg_print_rwstat, }, { .name = "bfq.io_queued", .private = offsetof(struct bfq_group, stats.queued), .seq_show = bfqg_print_rwstat, }, #endif /* CONFIG_BFQ_CGROUP_DEBUG */ /* the same statistics which cover the bfqg and its descendants */ { .name = "bfq.io_service_bytes_recursive", .private = offsetof(struct bfq_group, stats.bytes), .seq_show = bfqg_print_rwstat_recursive, }, { .name = "bfq.io_serviced_recursive", .private = offsetof(struct bfq_group, stats.ios), .seq_show = bfqg_print_rwstat_recursive, }, #ifdef CONFIG_BFQ_CGROUP_DEBUG { .name = "bfq.time_recursive", .private = offsetof(struct bfq_group, stats.time), .seq_show = bfqg_print_stat_recursive, }, { .name = "bfq.sectors_recursive", .seq_show = bfqg_print_stat_sectors_recursive, }, { .name = "bfq.io_service_time_recursive", .private = offsetof(struct bfq_group, stats.service_time), .seq_show = bfqg_print_rwstat_recursive, }, { .name = "bfq.io_wait_time_recursive", .private = offsetof(struct bfq_group, stats.wait_time), .seq_show = bfqg_print_rwstat_recursive, }, { .name = "bfq.io_merged_recursive", .private = offsetof(struct bfq_group, stats.merged), .seq_show = bfqg_print_rwstat_recursive, }, { .name = "bfq.io_queued_recursive", .private = offsetof(struct bfq_group, stats.queued), .seq_show = bfqg_print_rwstat_recursive, }, { .name = "bfq.avg_queue_size", .seq_show = bfqg_print_avg_queue_size, }, { .name = "bfq.group_wait_time", .private = offsetof(struct bfq_group, stats.group_wait_time), .seq_show = bfqg_print_stat, }, { .name = "bfq.idle_time", .private = offsetof(struct bfq_group, stats.idle_time), .seq_show = bfqg_print_stat, }, { .name = "bfq.empty_time", .private = offsetof(struct bfq_group, stats.empty_time), .seq_show = bfqg_print_stat, }, { .name = "bfq.dequeue", .private = offsetof(struct bfq_group, stats.dequeue), .seq_show = bfqg_print_stat, }, #endif /* CONFIG_BFQ_CGROUP_DEBUG */ { } /* terminate */ }; struct cftype bfq_blkg_files[] = { { .name = "bfq.weight", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = bfq_io_show_weight, .write = bfq_io_set_weight, }, {} /* terminate */ }; #else /* CONFIG_BFQ_GROUP_IOSCHED */ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct bfq_group *bfqg) {} void bfq_init_entity(struct bfq_entity *entity, struct bfq_group *bfqg) { struct bfq_queue *bfqq = bfq_entity_to_bfqq(entity); entity->weight = entity->new_weight; entity->orig_weight = entity->new_weight; if (bfqq) { bfqq->ioprio = bfqq->new_ioprio; bfqq->ioprio_class = bfqq->new_ioprio_class; } entity->sched_data = &bfqg->sched_data; } void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) {} void bfq_end_wr_async(struct bfq_data *bfqd) { bfq_end_wr_async_queues(bfqd, bfqd->root_group); } struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) { return bfqd->root_group; } struct bfq_group *bfqq_group(struct bfq_queue *bfqq) { return bfqq->bfqd->root_group; } void bfqg_and_blkg_put(struct bfq_group *bfqg) {} struct bfq_group *bfq_create_group_hierarchy(struct bfq_data *bfqd, int node) { struct bfq_group *bfqg; int i; bfqg = kmalloc_node(sizeof(*bfqg), GFP_KERNEL | __GFP_ZERO, node); if (!bfqg) return NULL; for (i = 0; i < BFQ_IOPRIO_CLASSES; i++) bfqg->sched_data.service_tree[i] = BFQ_SERVICE_TREE_INIT; return bfqg; } #endif /* CONFIG_BFQ_GROUP_IOSCHED */ |
34 35 35 35 35 35 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 | // SPDX-License-Identifier: GPL-2.0 /* * ACPI helpers for GPIO API * * Copyright (C) 2012, Intel Corporation * Authors: Mathias Nyman <mathias.nyman@linux.intel.com> * Mika Westerberg <mika.westerberg@linux.intel.com> */ #include <linux/acpi.h> #include <linux/dmi.h> #include <linux/errno.h> #include <linux/export.h> #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/mutex.h> #include <linux/pinctrl/pinctrl.h> #include <linux/gpio/consumer.h> #include <linux/gpio/driver.h> #include <linux/gpio/machine.h> #include "gpiolib.h" #include "gpiolib-acpi.h" static int run_edge_events_on_boot = -1; module_param(run_edge_events_on_boot, int, 0444); MODULE_PARM_DESC(run_edge_events_on_boot, "Run edge _AEI event-handlers at boot: 0=no, 1=yes, -1=auto"); static char *ignore_wake; module_param(ignore_wake, charp, 0444); MODULE_PARM_DESC(ignore_wake, "controller@pin combos on which to ignore the ACPI wake flag " "ignore_wake=controller@pin[,controller@pin[,...]]"); static char *ignore_interrupt; module_param(ignore_interrupt, charp, 0444); MODULE_PARM_DESC(ignore_interrupt, "controller@pin combos on which to ignore interrupt " "ignore_interrupt=controller@pin[,controller@pin[,...]]"); struct acpi_gpiolib_dmi_quirk { bool no_edge_events_on_boot; char *ignore_wake; char *ignore_interrupt; }; /** * struct acpi_gpio_event - ACPI GPIO event handler data * * @node: list-entry of the events list of the struct acpi_gpio_chip * @handle: handle of ACPI method to execute when the IRQ triggers * @handler: handler function to pass to request_irq() when requesting the IRQ * @pin: GPIO pin number on the struct gpio_chip * @irq: Linux IRQ number for the event, for request_irq() / free_irq() * @irqflags: flags to pass to request_irq() when requesting the IRQ * @irq_is_wake: If the ACPI flags indicate the IRQ is a wakeup source * @irq_requested:True if request_irq() has been done * @desc: struct gpio_desc for the GPIO pin for this event */ struct acpi_gpio_event { struct list_head node; acpi_handle handle; irq_handler_t handler; unsigned int pin; unsigned int irq; unsigned long irqflags; bool irq_is_wake; bool irq_requested; struct gpio_desc *desc; }; struct acpi_gpio_connection { struct list_head node; unsigned int pin; struct gpio_desc *desc; }; struct acpi_gpio_chip { /* * ACPICA requires that the first field of the context parameter * passed to acpi_install_address_space_handler() is large enough * to hold struct acpi_connection_info. */ struct acpi_connection_info conn_info; struct list_head conns; struct mutex conn_lock; struct gpio_chip *chip; struct list_head events; struct list_head deferred_req_irqs_list_entry; }; /** * struct acpi_gpio_info - ACPI GPIO specific information * @adev: reference to ACPI device which consumes GPIO resource * @flags: GPIO initialization flags * @gpioint: if %true this GPIO is of type GpioInt otherwise type is GpioIo * @pin_config: pin bias as provided by ACPI * @polarity: interrupt polarity as provided by ACPI * @triggering: triggering type as provided by ACPI * @wake_capable: wake capability as provided by ACPI * @debounce: debounce timeout as provided by ACPI * @quirks: Linux specific quirks as provided by struct acpi_gpio_mapping */ struct acpi_gpio_info { struct acpi_device *adev; enum gpiod_flags flags; bool gpioint; int pin_config; int polarity; int triggering; bool wake_capable; unsigned int debounce; unsigned int quirks; }; /* * For GPIO chips which call acpi_gpiochip_request_interrupts() before late_init * (so builtin drivers) we register the ACPI GpioInt IRQ handlers from a * late_initcall_sync() handler, so that other builtin drivers can register their * OpRegions before the event handlers can run. This list contains GPIO chips * for which the acpi_gpiochip_request_irqs() call has been deferred. */ static DEFINE_MUTEX(acpi_gpio_deferred_req_irqs_lock); static LIST_HEAD(acpi_gpio_deferred_req_irqs_list); static bool acpi_gpio_deferred_req_irqs_done; static int acpi_gpiochip_find(struct gpio_chip *gc, const void *data) { /* First check the actual GPIO device */ if (device_match_acpi_handle(&gc->gpiodev->dev, data)) return true; /* * When the ACPI device is artificially split to the banks of GPIOs, * where each of them is represented by a separate GPIO device, * the firmware node of the physical device may not be shared among * the banks as they may require different values for the same property, * e.g., number of GPIOs in a certain bank. In such case the ACPI handle * of a GPIO device is NULL and can not be used. Hence we have to check * the parent device to be sure that there is no match before bailing * out. */ if (gc->parent) return device_match_acpi_handle(gc->parent, data); return false; } /** * acpi_get_gpiod() - Translate ACPI GPIO pin to GPIO descriptor usable with GPIO API * @path: ACPI GPIO controller full path name, (e.g. "\\_SB.GPO1") * @pin: ACPI GPIO pin number (0-based, controller-relative) * * Returns: * GPIO descriptor to use with Linux generic GPIO API. * If the GPIO cannot be translated or there is an error an ERR_PTR is * returned. * * Specifically returns %-EPROBE_DEFER if the referenced GPIO * controller does not have GPIO chip registered at the moment. This is to * support probe deferral. */ static struct gpio_desc *acpi_get_gpiod(char *path, unsigned int pin) { acpi_handle handle; acpi_status status; status = acpi_get_handle(NULL, path, &handle); if (ACPI_FAILURE(status)) return ERR_PTR(-ENODEV); struct gpio_device *gdev __free(gpio_device_put) = gpio_device_find(handle, acpi_gpiochip_find); if (!gdev) return ERR_PTR(-EPROBE_DEFER); /* * FIXME: keep track of the reference to the GPIO device somehow * instead of putting it here. */ return gpio_device_get_desc(gdev, pin); } static irqreturn_t acpi_gpio_irq_handler(int irq, void *data) { struct acpi_gpio_event *event = data; acpi_evaluate_object(event->handle, NULL, NULL, NULL); return IRQ_HANDLED; } static irqreturn_t acpi_gpio_irq_handler_evt(int irq, void *data) { struct acpi_gpio_event *event = data; acpi_execute_simple_method(event->handle, NULL, event->pin); return IRQ_HANDLED; } static void acpi_gpio_chip_dh(acpi_handle handle, void *data) { /* The address of this function is used as a key. */ } bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio) { struct acpi_resource_gpio *gpio; if (ares->type != ACPI_RESOURCE_TYPE_GPIO) return false; gpio = &ares->data.gpio; if (gpio->connection_type != ACPI_RESOURCE_GPIO_TYPE_INT) return false; *agpio = gpio; return true; } EXPORT_SYMBOL_GPL(acpi_gpio_get_irq_resource); /** * acpi_gpio_get_io_resource - Fetch details of an ACPI resource if it is a GPIO * I/O resource or return False if not. * @ares: Pointer to the ACPI resource to fetch * @agpio: Pointer to a &struct acpi_resource_gpio to store the output pointer * * Returns: * %true if GpioIo resource is found, %false otherwise. */ bool acpi_gpio_get_io_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio) { struct acpi_resource_gpio *gpio; if (ares->type != ACPI_RESOURCE_TYPE_GPIO) return false; gpio = &ares->data.gpio; if (gpio->connection_type != ACPI_RESOURCE_GPIO_TYPE_IO) return false; *agpio = gpio; return true; } EXPORT_SYMBOL_GPL(acpi_gpio_get_io_resource); static void acpi_gpiochip_request_irq(struct acpi_gpio_chip *acpi_gpio, struct acpi_gpio_event *event) { struct device *parent = acpi_gpio->chip->parent; int ret, value; ret = request_threaded_irq(event->irq, NULL, event->handler, event->irqflags | IRQF_ONESHOT, "ACPI:Event", event); if (ret) { dev_err(parent, "Failed to setup interrupt handler for %d\n", event->irq); return; } if (event->irq_is_wake) enable_irq_wake(event->irq); event->irq_requested = true; /* Make sure we trigger the initial state of edge-triggered IRQs */ if (run_edge_events_on_boot && (event->irqflags & (IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING))) { value = gpiod_get_raw_value_cansleep(event->desc); if (((event->irqflags & IRQF_TRIGGER_RISING) && value == 1) || ((event->irqflags & IRQF_TRIGGER_FALLING) && value == 0)) event->handler(event->irq, event); } } static void acpi_gpiochip_request_irqs(struct acpi_gpio_chip *acpi_gpio) { struct acpi_gpio_event *event; list_for_each_entry(event, &acpi_gpio->events, node) acpi_gpiochip_request_irq(acpi_gpio, event); } static enum gpiod_flags acpi_gpio_to_gpiod_flags(const struct acpi_resource_gpio *agpio, int polarity) { /* GpioInt() implies input configuration */ if (agpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT) return GPIOD_IN; switch (agpio->io_restriction) { case ACPI_IO_RESTRICT_INPUT: return GPIOD_IN; case ACPI_IO_RESTRICT_OUTPUT: /* * ACPI GPIO resources don't contain an initial value for the * GPIO. Therefore we deduce that value from the pull field * and the polarity instead. If the pin is pulled up we assume * default to be high, if it is pulled down we assume default * to be low, otherwise we leave pin untouched. For active low * polarity values will be switched. See also * Documentation/firmware-guide/acpi/gpio-properties.rst. */ switch (agpio->pin_config) { case ACPI_PIN_CONFIG_PULLUP: return polarity == GPIO_ACTIVE_LOW ? GPIOD_OUT_LOW : GPIOD_OUT_HIGH; case ACPI_PIN_CONFIG_PULLDOWN: return polarity == GPIO_ACTIVE_LOW ? GPIOD_OUT_HIGH : GPIOD_OUT_LOW; default: break; } break; default: break; } /* * Assume that the BIOS has configured the direction and pull * accordingly. */ return GPIOD_ASIS; } static struct gpio_desc *acpi_request_own_gpiod(struct gpio_chip *chip, struct acpi_resource_gpio *agpio, unsigned int index, const char *label) { int polarity = GPIO_ACTIVE_HIGH; enum gpiod_flags flags = acpi_gpio_to_gpiod_flags(agpio, polarity); unsigned int pin = agpio->pin_table[index]; struct gpio_desc *desc; int ret; desc = gpiochip_request_own_desc(chip, pin, label, polarity, flags); if (IS_ERR(desc)) return desc; /* ACPI uses hundredths of milliseconds units */ ret = gpio_set_debounce_timeout(desc, agpio->debounce_timeout * 10); if (ret) dev_warn(chip->parent, "Failed to set debounce-timeout for pin 0x%04X, err %d\n", pin, ret); return desc; } static bool acpi_gpio_in_ignore_list(const char *ignore_list, const char *controller_in, unsigned int pin_in) { const char *controller, *pin_str; unsigned int pin; char *endp; int len; controller = ignore_list; while (controller) { pin_str = strchr(controller, '@'); if (!pin_str) goto err; len = pin_str - controller; if (len == strlen(controller_in) && strncmp(controller, controller_in, len) == 0) { pin = simple_strtoul(pin_str + 1, &endp, 10); if (*endp != 0 && *endp != ',') goto err; if (pin == pin_in) return true; } controller = strchr(controller, ','); if (controller) controller++; } return false; err: pr_err_once("Error: Invalid value for gpiolib_acpi.ignore_...: %s\n", ignore_list); return false; } static bool acpi_gpio_irq_is_wake(struct device *parent, const struct acpi_resource_gpio *agpio) { unsigned int pin = agpio->pin_table[0]; if (agpio->wake_capable != ACPI_WAKE_CAPABLE) return false; if (acpi_gpio_in_ignore_list(ignore_wake, dev_name(parent), pin)) { dev_info(parent, "Ignoring wakeup on pin %u\n", pin); return false; } return true; } /* Always returns AE_OK so that we keep looping over the resources */ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares, void *context) { struct acpi_gpio_chip *acpi_gpio = context; struct gpio_chip *chip = acpi_gpio->chip; struct acpi_resource_gpio *agpio; acpi_handle handle, evt_handle; struct acpi_gpio_event *event; irq_handler_t handler = NULL; struct gpio_desc *desc; unsigned int pin; int ret, irq; if (!acpi_gpio_get_irq_resource(ares, &agpio)) return AE_OK; handle = ACPI_HANDLE(chip->parent); pin = agpio->pin_table[0]; if (pin <= 255) { char ev_name[8]; sprintf(ev_name, "_%c%02X", agpio->triggering == ACPI_EDGE_SENSITIVE ? 'E' : 'L', pin); if (ACPI_SUCCESS(acpi_get_handle(handle, ev_name, &evt_handle))) handler = acpi_gpio_irq_handler; } if (!handler) { if (ACPI_SUCCESS(acpi_get_handle(handle, "_EVT", &evt_handle))) handler = acpi_gpio_irq_handler_evt; } if (!handler) return AE_OK; if (acpi_gpio_in_ignore_list(ignore_interrupt, dev_name(chip->parent), pin)) { dev_info(chip->parent, "Ignoring interrupt on pin %u\n", pin); return AE_OK; } desc = acpi_request_own_gpiod(chip, agpio, 0, "ACPI:Event"); if (IS_ERR(desc)) { dev_err(chip->parent, "Failed to request GPIO for pin 0x%04X, err %ld\n", pin, PTR_ERR(desc)); return AE_OK; } ret = gpiochip_lock_as_irq(chip, pin); if (ret) { dev_err(chip->parent, "Failed to lock GPIO pin 0x%04X as interrupt, err %d\n", pin, ret); goto fail_free_desc; } irq = gpiod_to_irq(desc); if (irq < 0) { dev_err(chip->parent, "Failed to translate GPIO pin 0x%04X to IRQ, err %d\n", pin, irq); goto fail_unlock_irq; } event = kzalloc(sizeof(*event), GFP_KERNEL); if (!event) goto fail_unlock_irq; event->irqflags = IRQF_ONESHOT; if (agpio->triggering == ACPI_LEVEL_SENSITIVE) { if (agpio->polarity == ACPI_ACTIVE_HIGH) event->irqflags |= IRQF_TRIGGER_HIGH; else event->irqflags |= IRQF_TRIGGER_LOW; } else { switch (agpio->polarity) { case ACPI_ACTIVE_HIGH: event->irqflags |= IRQF_TRIGGER_RISING; break; case ACPI_ACTIVE_LOW: event->irqflags |= IRQF_TRIGGER_FALLING; break; default: event->irqflags |= IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING; break; } } event->handle = evt_handle; event->handler = handler; event->irq = irq; event->irq_is_wake = acpi_gpio_irq_is_wake(chip->parent, agpio); event->pin = pin; event->desc = desc; list_add_tail(&event->node, &acpi_gpio->events); return AE_OK; fail_unlock_irq: gpiochip_unlock_as_irq(chip, pin); fail_free_desc: gpiochip_free_own_desc(desc); return AE_OK; } /** * acpi_gpiochip_request_interrupts() - Register isr for gpio chip ACPI events * @chip: GPIO chip * * ACPI5 platforms can use GPIO signaled ACPI events. These GPIO interrupts are * handled by ACPI event methods which need to be called from the GPIO * chip's interrupt handler. acpi_gpiochip_request_interrupts() finds out which * GPIO pins have ACPI event methods and assigns interrupt handlers that calls * the ACPI event methods for those pins. */ void acpi_gpiochip_request_interrupts(struct gpio_chip *chip) { struct acpi_gpio_chip *acpi_gpio; acpi_handle handle; acpi_status status; bool defer; if (!chip->parent || !chip->to_irq) return; handle = ACPI_HANDLE(chip->parent); if (!handle) return; status = acpi_get_data(handle, acpi_gpio_chip_dh, (void **)&acpi_gpio); if (ACPI_FAILURE(status)) return; if (acpi_quirk_skip_gpio_event_handlers()) return; acpi_walk_resources(handle, METHOD_NAME__AEI, acpi_gpiochip_alloc_event, acpi_gpio); mutex_lock(&acpi_gpio_deferred_req_irqs_lock); defer = !acpi_gpio_deferred_req_irqs_done; if (defer) list_add(&acpi_gpio->deferred_req_irqs_list_entry, &acpi_gpio_deferred_req_irqs_list); mutex_unlock(&acpi_gpio_deferred_req_irqs_lock); if (defer) return; acpi_gpiochip_request_irqs(acpi_gpio); } EXPORT_SYMBOL_GPL(acpi_gpiochip_request_interrupts); /** * acpi_gpiochip_free_interrupts() - Free GPIO ACPI event interrupts. * @chip: GPIO chip * * Free interrupts associated with GPIO ACPI event method for the given * GPIO chip. */ void acpi_gpiochip_free_interrupts(struct gpio_chip *chip) { struct acpi_gpio_chip *acpi_gpio; struct acpi_gpio_event *event, *ep; acpi_handle handle; acpi_status status; if (!chip->parent || !chip->to_irq) return; handle = ACPI_HANDLE(chip->parent); if (!handle) return; status = acpi_get_data(handle, acpi_gpio_chip_dh, (void **)&acpi_gpio); if (ACPI_FAILURE(status)) return; mutex_lock(&acpi_gpio_deferred_req_irqs_lock); if (!list_empty(&acpi_gpio->deferred_req_irqs_list_entry)) list_del_init(&acpi_gpio->deferred_req_irqs_list_entry); mutex_unlock(&acpi_gpio_deferred_req_irqs_lock); list_for_each_entry_safe_reverse(event, ep, &acpi_gpio->events, node) { if (event->irq_requested) { if (event->irq_is_wake) disable_irq_wake(event->irq); free_irq(event->irq, event); } gpiochip_unlock_as_irq(chip, event->pin); gpiochip_free_own_desc(event->desc); list_del(&event->node); kfree(event); } } EXPORT_SYMBOL_GPL(acpi_gpiochip_free_interrupts); int acpi_dev_add_driver_gpios(struct acpi_device *adev, const struct acpi_gpio_mapping *gpios) { if (adev && gpios) { adev->driver_gpios = gpios; return 0; } return -EINVAL; } EXPORT_SYMBOL_GPL(acpi_dev_add_driver_gpios); void acpi_dev_remove_driver_gpios(struct acpi_device *adev) { if (adev) adev->driver_gpios = NULL; } EXPORT_SYMBOL_GPL(acpi_dev_remove_driver_gpios); static void acpi_dev_release_driver_gpios(void *adev) { acpi_dev_remove_driver_gpios(adev); } int devm_acpi_dev_add_driver_gpios(struct device *dev, const struct acpi_gpio_mapping *gpios) { struct acpi_device *adev = ACPI_COMPANION(dev); int ret; ret = acpi_dev_add_driver_gpios(adev, gpios); if (ret) return ret; return devm_add_action_or_reset(dev, acpi_dev_release_driver_gpios, adev); } EXPORT_SYMBOL_GPL(devm_acpi_dev_add_driver_gpios); static bool acpi_get_driver_gpio_data(struct acpi_device *adev, const char *name, int index, struct fwnode_reference_args *args, unsigned int *quirks) { const struct acpi_gpio_mapping *gm; if (!adev || !adev->driver_gpios) return false; for (gm = adev->driver_gpios; gm->name; gm++) if (!strcmp(name, gm->name) && gm->data && index < gm->size) { const struct acpi_gpio_params *par = gm->data + index; args->fwnode = acpi_fwnode_handle(adev); args->args[0] = par->crs_entry_index; args->args[1] = par->line_index; args->args[2] = par->active_low; args->nargs = 3; *quirks = gm->quirks; return true; } return false; } static int __acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags, enum gpiod_flags update) { const enum gpiod_flags mask = GPIOD_FLAGS_BIT_DIR_SET | GPIOD_FLAGS_BIT_DIR_OUT | GPIOD_FLAGS_BIT_DIR_VAL; int ret = 0; /* * Check if the BIOS has IoRestriction with explicitly set direction * and update @flags accordingly. Otherwise use whatever caller asked * for. */ if (update & GPIOD_FLAGS_BIT_DIR_SET) { enum gpiod_flags diff = *flags ^ update; /* * Check if caller supplied incompatible GPIO initialization * flags. * * Return %-EINVAL to notify that firmware has different * settings and we are going to use them. */ if (((*flags & GPIOD_FLAGS_BIT_DIR_SET) && (diff & GPIOD_FLAGS_BIT_DIR_OUT)) || ((*flags & GPIOD_FLAGS_BIT_DIR_OUT) && (diff & GPIOD_FLAGS_BIT_DIR_VAL))) ret = -EINVAL; *flags = (*flags & ~mask) | (update & mask); } return ret; } static int acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags, struct acpi_gpio_info *info) { struct device *dev = &info->adev->dev; enum gpiod_flags old = *flags; int ret; ret = __acpi_gpio_update_gpiod_flags(&old, info->flags); if (info->quirks & ACPI_GPIO_QUIRK_NO_IO_RESTRICTION) { if (ret) dev_warn(dev, FW_BUG "GPIO not in correct mode, fixing\n"); } else { if (ret) dev_dbg(dev, "Override GPIO initialization flags\n"); *flags = old; } return ret; } static int acpi_gpio_update_gpiod_lookup_flags(unsigned long *lookupflags, struct acpi_gpio_info *info) { switch (info->pin_config) { case ACPI_PIN_CONFIG_PULLUP: *lookupflags |= GPIO_PULL_UP; break; case ACPI_PIN_CONFIG_PULLDOWN: *lookupflags |= GPIO_PULL_DOWN; break; case ACPI_PIN_CONFIG_NOPULL: *lookupflags |= GPIO_PULL_DISABLE; break; default: break; } if (info->polarity == GPIO_ACTIVE_LOW) *lookupflags |= GPIO_ACTIVE_LOW; return 0; } struct acpi_gpio_lookup { struct acpi_gpio_info info; int index; u16 pin_index; bool active_low; struct gpio_desc *desc; int n; }; static int acpi_populate_gpio_lookup(struct acpi_resource *ares, void *data) { struct acpi_gpio_lookup *lookup = data; if (ares->type != ACPI_RESOURCE_TYPE_GPIO) return 1; if (!lookup->desc) { const struct acpi_resource_gpio *agpio = &ares->data.gpio; bool gpioint = agpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT; struct gpio_desc *desc; u16 pin_index; if (lookup->info.quirks & ACPI_GPIO_QUIRK_ONLY_GPIOIO && gpioint) lookup->index++; if (lookup->n++ != lookup->index) return 1; pin_index = lookup->pin_index; if (pin_index >= agpio->pin_table_length) return 1; if (lookup->info.quirks & ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER) desc = gpio_to_desc(agpio->pin_table[pin_index]); else desc = acpi_get_gpiod(agpio->resource_source.string_ptr, agpio->pin_table[pin_index]); lookup->desc = desc; lookup->info.pin_config = agpio->pin_config; lookup->info.debounce = agpio->debounce_timeout; lookup->info.gpioint = gpioint; lookup->info.wake_capable = acpi_gpio_irq_is_wake(&lookup->info.adev->dev, agpio); /* * Polarity and triggering are only specified for GpioInt * resource. * Note: we expect here: * - ACPI_ACTIVE_LOW == GPIO_ACTIVE_LOW * - ACPI_ACTIVE_HIGH == GPIO_ACTIVE_HIGH */ if (lookup->info.gpioint) { lookup->info.polarity = agpio->polarity; lookup->info.triggering = agpio->triggering; } else { lookup->info.polarity = lookup->active_low; } lookup->info.flags = acpi_gpio_to_gpiod_flags(agpio, lookup->info.polarity); } return 1; } static int acpi_gpio_resource_lookup(struct acpi_gpio_lookup *lookup, struct acpi_gpio_info *info) { struct acpi_device *adev = lookup->info.adev; struct list_head res_list; int ret; INIT_LIST_HEAD(&res_list); ret = acpi_dev_get_resources(adev, &res_list, acpi_populate_gpio_lookup, lookup); if (ret < 0) return ret; acpi_dev_free_resource_list(&res_list); if (!lookup->desc) return -ENOENT; if (info) *info = lookup->info; return 0; } static int acpi_gpio_property_lookup(struct fwnode_handle *fwnode, const char *propname, int index, struct acpi_gpio_lookup *lookup) { struct fwnode_reference_args args; unsigned int quirks = 0; int ret; memset(&args, 0, sizeof(args)); ret = __acpi_node_get_property_reference(fwnode, propname, index, 3, &args); if (ret) { struct acpi_device *adev; adev = to_acpi_device_node(fwnode); if (!acpi_get_driver_gpio_data(adev, propname, index, &args, &quirks)) return ret; } /* * The property was found and resolved, so need to lookup the GPIO based * on returned args. */ if (!to_acpi_device_node(args.fwnode)) return -EINVAL; if (args.nargs != 3) return -EPROTO; lookup->index = args.args[0]; lookup->pin_index = args.args[1]; lookup->active_low = !!args.args[2]; lookup->info.adev = to_acpi_device_node(args.fwnode); lookup->info.quirks = quirks; return 0; } /** * acpi_get_gpiod_by_index() - get a GPIO descriptor from device resources * @adev: pointer to a ACPI device to get GPIO from * @propname: Property name of the GPIO (optional) * @index: index of GpioIo/GpioInt resource (starting from %0) * @info: info pointer to fill in (optional) * * Function goes through ACPI resources for @adev and based on @index looks * up a GpioIo/GpioInt resource, translates it to the Linux GPIO descriptor, * and returns it. @index matches GpioIo/GpioInt resources only so if there * are total %3 GPIO resources, the index goes from %0 to %2. * * If @propname is specified the GPIO is looked using device property. In * that case @index is used to select the GPIO entry in the property value * (in case of multiple). * * Returns: * GPIO descriptor to use with Linux generic GPIO API. * If the GPIO cannot be translated or there is an error an ERR_PTR is * returned. * * Note: if the GPIO resource has multiple entries in the pin list, this * function only returns the first. */ static struct gpio_desc *acpi_get_gpiod_by_index(struct acpi_device *adev, const char *propname, int index, struct acpi_gpio_info *info) { struct acpi_gpio_lookup lookup; int ret; memset(&lookup, 0, sizeof(lookup)); lookup.index = index; if (propname) { dev_dbg(&adev->dev, "GPIO: looking up %s\n", propname); ret = acpi_gpio_property_lookup(acpi_fwnode_handle(adev), propname, index, &lookup); if (ret) return ERR_PTR(ret); dev_dbg(&adev->dev, "GPIO: _DSD returned %s %d %u %u\n", dev_name(&lookup.info.adev->dev), lookup.index, lookup.pin_index, lookup.active_low); } else { dev_dbg(&adev->dev, "GPIO: looking up %d in _CRS\n", index); lookup.info.adev = adev; } ret = acpi_gpio_resource_lookup(&lookup, info); return ret ? ERR_PTR(ret) : lookup.desc; } /** * acpi_get_gpiod_from_data() - get a GPIO descriptor from ACPI data node * @fwnode: pointer to an ACPI firmware node to get the GPIO information from * @propname: Property name of the GPIO * @index: index of GpioIo/GpioInt resource (starting from %0) * @info: info pointer to fill in (optional) * * This function uses the property-based GPIO lookup to get to the GPIO * resource with the relevant information from a data-only ACPI firmware node * and uses that to obtain the GPIO descriptor to return. * * Returns: * GPIO descriptor to use with Linux generic GPIO API. * If the GPIO cannot be translated or there is an error an ERR_PTR is * returned. */ static struct gpio_desc *acpi_get_gpiod_from_data(struct fwnode_handle *fwnode, const char *propname, int index, struct acpi_gpio_info *info) { struct acpi_gpio_lookup lookup; int ret; if (!is_acpi_data_node(fwnode)) return ERR_PTR(-ENODEV); if (!propname) return ERR_PTR(-EINVAL); memset(&lookup, 0, sizeof(lookup)); lookup.index = index; ret = acpi_gpio_property_lookup(fwnode, propname, index, &lookup); if (ret) return ERR_PTR(ret); ret = acpi_gpio_resource_lookup(&lookup, info); return ret ? ERR_PTR(ret) : lookup.desc; } static bool acpi_can_fallback_to_crs(struct acpi_device *adev, const char *con_id) { /* If there is no ACPI device, there is no _CRS to fall back to */ if (!adev) return false; /* Never allow fallback if the device has properties */ if (acpi_dev_has_props(adev) || adev->driver_gpios) return false; return con_id == NULL; } static struct gpio_desc * __acpi_find_gpio(struct fwnode_handle *fwnode, const char *con_id, unsigned int idx, bool can_fallback, struct acpi_gpio_info *info) { struct acpi_device *adev = to_acpi_device_node(fwnode); struct gpio_desc *desc; char propname[32]; /* Try first from _DSD */ for_each_gpio_property_name(propname, con_id) { if (adev) desc = acpi_get_gpiod_by_index(adev, propname, idx, info); else desc = acpi_get_gpiod_from_data(fwnode, propname, idx, info); if (PTR_ERR(desc) == -EPROBE_DEFER) return desc; if (!IS_ERR(desc)) return desc; } /* Then from plain _CRS GPIOs */ if (can_fallback) return acpi_get_gpiod_by_index(adev, NULL, idx, info); return ERR_PTR(-ENOENT); } struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode, const char *con_id, unsigned int idx, enum gpiod_flags *dflags, unsigned long *lookupflags) { struct acpi_device *adev = to_acpi_device_node(fwnode); bool can_fallback = acpi_can_fallback_to_crs(adev, con_id); struct acpi_gpio_info info; struct gpio_desc *desc; desc = __acpi_find_gpio(fwnode, con_id, idx, can_fallback, &info); if (IS_ERR(desc)) return desc; if (info.gpioint && (*dflags == GPIOD_OUT_LOW || *dflags == GPIOD_OUT_HIGH)) { dev_dbg(&adev->dev, "refusing GpioInt() entry when doing GPIOD_OUT_* lookup\n"); return ERR_PTR(-ENOENT); } acpi_gpio_update_gpiod_flags(dflags, &info); acpi_gpio_update_gpiod_lookup_flags(lookupflags, &info); return desc; } /** * acpi_dev_gpio_irq_wake_get_by() - Find GpioInt and translate it to Linux IRQ number * @adev: pointer to a ACPI device to get IRQ from * @con_id: optional name of GpioInt resource * @index: index of GpioInt resource (starting from %0) * @wake_capable: Set to true if the IRQ is wake capable * * If the device has one or more GpioInt resources, this function can be * used to translate from the GPIO offset in the resource to the Linux IRQ * number. * * The function is idempotent, though each time it runs it will configure GPIO * pin direction according to the flags in GpioInt resource. * * The function takes optional @con_id parameter. If the resource has * a @con_id in a property, then only those will be taken into account. * * The GPIO is considered wake capable if the GpioInt resource specifies * SharedAndWake or ExclusiveAndWake. * * Returns: * Linux IRQ number (> 0) on success, negative errno on failure. */ int acpi_dev_gpio_irq_wake_get_by(struct acpi_device *adev, const char *con_id, int index, bool *wake_capable) { struct fwnode_handle *fwnode = acpi_fwnode_handle(adev); int idx, i; unsigned int irq_flags; int ret; for (i = 0, idx = 0; idx <= index; i++) { struct acpi_gpio_info info; struct gpio_desc *desc; /* Ignore -EPROBE_DEFER, it only matters if idx matches */ desc = __acpi_find_gpio(fwnode, con_id, i, true, &info); if (IS_ERR(desc) && PTR_ERR(desc) != -EPROBE_DEFER) return PTR_ERR(desc); if (info.gpioint && idx++ == index) { unsigned long lflags = GPIO_LOOKUP_FLAGS_DEFAULT; enum gpiod_flags dflags = GPIOD_ASIS; char label[32]; int irq; if (IS_ERR(desc)) return PTR_ERR(desc); irq = gpiod_to_irq(desc); if (irq < 0) return irq; acpi_gpio_update_gpiod_flags(&dflags, &info); acpi_gpio_update_gpiod_lookup_flags(&lflags, &info); snprintf(label, sizeof(label), "%pfwP GpioInt(%d)", fwnode, index); ret = gpiod_set_consumer_name(desc, con_id ?: label); if (ret) return ret; ret = gpiod_configure_flags(desc, label, lflags, dflags); if (ret < 0) return ret; /* ACPI uses hundredths of milliseconds units */ ret = gpio_set_debounce_timeout(desc, info.debounce * 10); if (ret) return ret; irq_flags = acpi_dev_get_irq_type(info.triggering, info.polarity); /* * If the IRQ is not already in use then set type * if specified and different than the current one. */ if (can_request_irq(irq, irq_flags)) { if (irq_flags != IRQ_TYPE_NONE && irq_flags != irq_get_trigger_type(irq)) irq_set_irq_type(irq, irq_flags); } else { dev_dbg(&adev->dev, "IRQ %d already in use\n", irq); } /* avoid suspend issues with GPIOs when systems are using S3 */ if (wake_capable && acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) *wake_capable = info.wake_capable; return irq; } } return -ENOENT; } EXPORT_SYMBOL_GPL(acpi_dev_gpio_irq_wake_get_by); static acpi_status acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address, u32 bits, u64 *value, void *handler_context, void *region_context) { struct acpi_gpio_chip *achip = region_context; struct gpio_chip *chip = achip->chip; struct acpi_resource_gpio *agpio; struct acpi_resource *ares; u16 pin_index = address; acpi_status status; int length; int i; status = acpi_buffer_to_resource(achip->conn_info.connection, achip->conn_info.length, &ares); if (ACPI_FAILURE(status)) return status; if (WARN_ON(ares->type != ACPI_RESOURCE_TYPE_GPIO)) { ACPI_FREE(ares); return AE_BAD_PARAMETER; } agpio = &ares->data.gpio; if (WARN_ON(agpio->io_restriction == ACPI_IO_RESTRICT_INPUT && function == ACPI_WRITE)) { ACPI_FREE(ares); return AE_BAD_PARAMETER; } length = min_t(u16, agpio->pin_table_length, pin_index + bits); for (i = pin_index; i < length; ++i) { unsigned int pin = agpio->pin_table[i]; struct acpi_gpio_connection *conn; struct gpio_desc *desc; bool found; mutex_lock(&achip->conn_lock); found = false; list_for_each_entry(conn, &achip->conns, node) { if (conn->pin == pin) { found = true; desc = conn->desc; break; } } /* * The same GPIO can be shared between operation region and * event but only if the access here is ACPI_READ. In that * case we "borrow" the event GPIO instead. */ if (!found && agpio->shareable == ACPI_SHARED && function == ACPI_READ) { struct acpi_gpio_event *event; list_for_each_entry(event, &achip->events, node) { if (event->pin == pin) { desc = event->desc; found = true; break; } } } if (!found) { desc = acpi_request_own_gpiod(chip, agpio, i, "ACPI:OpRegion"); if (IS_ERR(desc)) { mutex_unlock(&achip->conn_lock); status = AE_ERROR; goto out; } conn = kzalloc(sizeof(*conn), GFP_KERNEL); if (!conn) { gpiochip_free_own_desc(desc); mutex_unlock(&achip->conn_lock); status = AE_NO_MEMORY; goto out; } conn->pin = pin; conn->desc = desc; list_add_tail(&conn->node, &achip->conns); } mutex_unlock(&achip->conn_lock); if (function == ACPI_WRITE) gpiod_set_raw_value_cansleep(desc, !!(*value & BIT(i))); else *value |= (u64)gpiod_get_raw_value_cansleep(desc) << i; } out: ACPI_FREE(ares); return status; } static void acpi_gpiochip_request_regions(struct acpi_gpio_chip *achip) { struct gpio_chip *chip = achip->chip; acpi_handle handle = ACPI_HANDLE(chip->parent); acpi_status status; INIT_LIST_HEAD(&achip->conns); mutex_init(&achip->conn_lock); status = acpi_install_address_space_handler(handle, ACPI_ADR_SPACE_GPIO, acpi_gpio_adr_space_handler, NULL, achip); if (ACPI_FAILURE(status)) dev_err(chip->parent, "Failed to install GPIO OpRegion handler\n"); } static void acpi_gpiochip_free_regions(struct acpi_gpio_chip *achip) { struct gpio_chip *chip = achip->chip; acpi_handle handle = ACPI_HANDLE(chip->parent); struct acpi_gpio_connection *conn, *tmp; acpi_status status; status = acpi_remove_address_space_handler(handle, ACPI_ADR_SPACE_GPIO, acpi_gpio_adr_space_handler); if (ACPI_FAILURE(status)) { dev_err(chip->parent, "Failed to remove GPIO OpRegion handler\n"); return; } list_for_each_entry_safe_reverse(conn, tmp, &achip->conns, node) { gpiochip_free_own_desc(conn->desc); list_del(&conn->node); kfree(conn); } } static struct gpio_desc * acpi_gpiochip_parse_own_gpio(struct acpi_gpio_chip *achip, struct fwnode_handle *fwnode, const char **name, unsigned long *lflags, enum gpiod_flags *dflags) { struct gpio_chip *chip = achip->chip; struct gpio_desc *desc; u32 gpios[2]; int ret; *lflags = GPIO_LOOKUP_FLAGS_DEFAULT; *dflags = GPIOD_ASIS; *name = NULL; ret = fwnode_property_read_u32_array(fwnode, "gpios", gpios, ARRAY_SIZE(gpios)); if (ret < 0) return ERR_PTR(ret); desc = gpiochip_get_desc(chip, gpios[0]); if (IS_ERR(desc)) return desc; if (gpios[1]) *lflags |= GPIO_ACTIVE_LOW; if (fwnode_property_present(fwnode, "input")) *dflags |= GPIOD_IN; else if (fwnode_property_present(fwnode, "output-low")) *dflags |= GPIOD_OUT_LOW; else if (fwnode_property_present(fwnode, "output-high")) *dflags |= GPIOD_OUT_HIGH; else return ERR_PTR(-EINVAL); fwnode_property_read_string(fwnode, "line-name", name); return desc; } static void acpi_gpiochip_scan_gpios(struct acpi_gpio_chip *achip) { struct gpio_chip *chip = achip->chip; device_for_each_child_node_scoped(chip->parent, fwnode) { unsigned long lflags; enum gpiod_flags dflags; struct gpio_desc *desc; const char *name; int ret; if (!fwnode_property_present(fwnode, "gpio-hog")) continue; desc = acpi_gpiochip_parse_own_gpio(achip, fwnode, &name, &lflags, &dflags); if (IS_ERR(desc)) continue; ret = gpiod_hog(desc, name, lflags, dflags); if (ret) { dev_err(chip->parent, "Failed to hog GPIO\n"); return; } } } void acpi_gpiochip_add(struct gpio_chip *chip) { struct acpi_gpio_chip *acpi_gpio; struct acpi_device *adev; acpi_status status; if (!chip || !chip->parent) return; adev = ACPI_COMPANION(chip->parent); if (!adev) return; acpi_gpio = kzalloc(sizeof(*acpi_gpio), GFP_KERNEL); if (!acpi_gpio) { dev_err(chip->parent, "Failed to allocate memory for ACPI GPIO chip\n"); return; } acpi_gpio->chip = chip; INIT_LIST_HEAD(&acpi_gpio->events); INIT_LIST_HEAD(&acpi_gpio->deferred_req_irqs_list_entry); status = acpi_attach_data(adev->handle, acpi_gpio_chip_dh, acpi_gpio); if (ACPI_FAILURE(status)) { dev_err(chip->parent, "Failed to attach ACPI GPIO chip\n"); kfree(acpi_gpio); return; } acpi_gpiochip_request_regions(acpi_gpio); acpi_gpiochip_scan_gpios(acpi_gpio); acpi_dev_clear_dependencies(adev); } void acpi_gpiochip_remove(struct gpio_chip *chip) { struct acpi_gpio_chip *acpi_gpio; acpi_handle handle; acpi_status status; if (!chip || !chip->parent) return; handle = ACPI_HANDLE(chip->parent); if (!handle) return; status = acpi_get_data(handle, acpi_gpio_chip_dh, (void **)&acpi_gpio); if (ACPI_FAILURE(status)) { dev_warn(chip->parent, "Failed to retrieve ACPI GPIO chip\n"); return; } acpi_gpiochip_free_regions(acpi_gpio); acpi_detach_data(handle, acpi_gpio_chip_dh); kfree(acpi_gpio); } static int acpi_gpio_package_count(const union acpi_object *obj) { const union acpi_object *element = obj->package.elements; const union acpi_object *end = element + obj->package.count; unsigned int count = 0; while (element < end) { switch (element->type) { case ACPI_TYPE_LOCAL_REFERENCE: element += 3; fallthrough; case ACPI_TYPE_INTEGER: element++; count++; break; default: return -EPROTO; } } return count; } static int acpi_find_gpio_count(struct acpi_resource *ares, void *data) { unsigned int *count = data; if (ares->type == ACPI_RESOURCE_TYPE_GPIO) *count += ares->data.gpio.pin_table_length; return 1; } /** * acpi_gpio_count - count the GPIOs associated with a firmware node / function * @fwnode: firmware node of the GPIO consumer * @con_id: function within the GPIO consumer * * Returns: * The number of GPIOs associated with a firmware node / function or %-ENOENT, * if no GPIO has been assigned to the requested function. */ int acpi_gpio_count(const struct fwnode_handle *fwnode, const char *con_id) { struct acpi_device *adev = to_acpi_device_node(fwnode); const union acpi_object *obj; const struct acpi_gpio_mapping *gm; int count = -ENOENT; int ret; char propname[32]; /* Try first from _DSD */ for_each_gpio_property_name(propname, con_id) { ret = acpi_dev_get_property(adev, propname, ACPI_TYPE_ANY, &obj); if (ret == 0) { if (obj->type == ACPI_TYPE_LOCAL_REFERENCE) count = 1; else if (obj->type == ACPI_TYPE_PACKAGE) count = acpi_gpio_package_count(obj); } else if (adev->driver_gpios) { for (gm = adev->driver_gpios; gm->name; gm++) if (strcmp(propname, gm->name) == 0) { count = gm->size; break; } } if (count > 0) break; } /* Then from plain _CRS GPIOs */ if (count < 0) { struct list_head resource_list; unsigned int crs_count = 0; if (!acpi_can_fallback_to_crs(adev, con_id)) return count; INIT_LIST_HEAD(&resource_list); acpi_dev_get_resources(adev, &resource_list, acpi_find_gpio_count, &crs_count); acpi_dev_free_resource_list(&resource_list); if (crs_count > 0) count = crs_count; } return count ? count : -ENOENT; } /* Run deferred acpi_gpiochip_request_irqs() */ static int __init acpi_gpio_handle_deferred_request_irqs(void) { struct acpi_gpio_chip *acpi_gpio, *tmp; mutex_lock(&acpi_gpio_deferred_req_irqs_lock); list_for_each_entry_safe(acpi_gpio, tmp, &acpi_gpio_deferred_req_irqs_list, deferred_req_irqs_list_entry) acpi_gpiochip_request_irqs(acpi_gpio); acpi_gpio_deferred_req_irqs_done = true; mutex_unlock(&acpi_gpio_deferred_req_irqs_lock); return 0; } /* We must use _sync so that this runs after the first deferred_probe run */ late_initcall_sync(acpi_gpio_handle_deferred_request_irqs); static const struct dmi_system_id gpiolib_acpi_quirks[] __initconst = { { /* * The Minix Neo Z83-4 has a micro-USB-B id-pin handler for * a non existing micro-USB-B connector which puts the HDMI * DDC pins in GPIO mode, breaking HDMI support. */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "MINIX"), DMI_MATCH(DMI_PRODUCT_NAME, "Z83-4"), }, .driver_data = &(struct acpi_gpiolib_dmi_quirk) { .no_edge_events_on_boot = true, }, }, { /* * The Terra Pad 1061 has a micro-USB-B id-pin handler, which * instead of controlling the actual micro-USB-B turns the 5V * boost for its USB-A connector off. The actual micro-USB-B * connector is wired for charging only. */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Wortmann_AG"), DMI_MATCH(DMI_PRODUCT_NAME, "TERRA_PAD_1061"), }, .driver_data = &(struct acpi_gpiolib_dmi_quirk) { .no_edge_events_on_boot = true, }, }, { /* * The Dell Venue 10 Pro 5055, with Bay Trail SoC + TI PMIC uses an * external embedded-controller connected via I2C + an ACPI GPIO * event handler on INT33FFC:02 pin 12, causing spurious wakeups. */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "Venue 10 Pro 5055"), }, .driver_data = &(struct acpi_gpiolib_dmi_quirk) { .ignore_wake = "INT33FC:02@12", }, }, { /* * HP X2 10 models with Cherry Trail SoC + TI PMIC use an * external embedded-controller connected via I2C + an ACPI GPIO * event handler on INT33FF:01 pin 0, causing spurious wakeups. * When suspending by closing the LID, the power to the USB * keyboard is turned off, causing INT0002 ACPI events to * trigger once the XHCI controller notices the keyboard is * gone. So INT0002 events cause spurious wakeups too. Ignoring * EC wakes breaks wakeup when opening the lid, the user needs * to press the power-button to wakeup the system. The * alternative is suspend simply not working, which is worse. */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "HP"), DMI_MATCH(DMI_PRODUCT_NAME, "HP x2 Detachable 10-p0XX"), }, .driver_data = &(struct acpi_gpiolib_dmi_quirk) { .ignore_wake = "INT33FF:01@0,INT0002:00@2", }, }, { /* * HP X2 10 models with Bay Trail SoC + AXP288 PMIC use an * external embedded-controller connected via I2C + an ACPI GPIO * event handler on INT33FC:02 pin 28, causing spurious wakeups. */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"), DMI_MATCH(DMI_BOARD_NAME, "815D"), }, .driver_data = &(struct acpi_gpiolib_dmi_quirk) { .ignore_wake = "INT33FC:02@28", }, }, { /* * HP X2 10 models with Cherry Trail SoC + AXP288 PMIC use an * external embedded-controller connected via I2C + an ACPI GPIO * event handler on INT33FF:01 pin 0, causing spurious wakeups. */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "HP"), DMI_MATCH(DMI_PRODUCT_NAME, "HP Pavilion x2 Detachable"), DMI_MATCH(DMI_BOARD_NAME, "813E"), }, .driver_data = &(struct acpi_gpiolib_dmi_quirk) { .ignore_wake = "INT33FF:01@0", }, }, { /* * Interrupt storm caused from edge triggered floating pin * Found in BIOS UX325UAZ.300 * https://bugzilla.kernel.org/show_bug.cgi?id=216208 */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325UAZ_UM325UAZ"), }, .driver_data = &(struct acpi_gpiolib_dmi_quirk) { .ignore_interrupt = "AMDI0030:00@18", }, }, { /* * Spurious wakeups from TP_ATTN# pin * Found in BIOS 1.7.8 * https://gitlab.freedesktop.org/drm/amd/-/issues/1722#note_1720627 */ .matches = { DMI_MATCH(DMI_BOARD_NAME, "NL5xNU"), }, .driver_data = &(struct acpi_gpiolib_dmi_quirk) { .ignore_wake = "ELAN0415:00@9", }, }, { /* * Spurious wakeups from TP_ATTN# pin * Found in BIOS 1.7.8 * https://gitlab.freedesktop.org/drm/amd/-/issues/1722#note_1720627 */ .matches = { DMI_MATCH(DMI_BOARD_NAME, "NL5xRU"), }, .driver_data = &(struct acpi_gpiolib_dmi_quirk) { .ignore_wake = "ELAN0415:00@9", }, }, { /* * Spurious wakeups from TP_ATTN# pin * Found in BIOS 1.7.7 */ .matches = { DMI_MATCH(DMI_BOARD_NAME, "NH5xAx"), }, .driver_data = &(struct acpi_gpiolib_dmi_quirk) { .ignore_wake = "SYNA1202:00@16", }, }, { /* * On the Peaq C1010 2-in-1 INT33FC:00 pin 3 is connected to * a "dolby" button. At the ACPI level an _AEI event-handler * is connected which sets an ACPI variable to 1 on both * edges. This variable can be polled + cleared to 0 using * WMI. But since the variable is set on both edges the WMI * interface is pretty useless even when polling. * So instead the x86-android-tablets code instantiates * a gpio-keys platform device for it. * Ignore the _AEI handler for the pin, so that it is not busy. */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "PEAQ"), DMI_MATCH(DMI_PRODUCT_NAME, "PEAQ PMM C1010 MD99187"), }, .driver_data = &(struct acpi_gpiolib_dmi_quirk) { .ignore_interrupt = "INT33FC:00@3", }, }, { /* * Spurious wakeups from TP_ATTN# pin * Found in BIOS 0.35 * https://gitlab.freedesktop.org/drm/amd/-/issues/3073 */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "GPD"), DMI_MATCH(DMI_PRODUCT_NAME, "G1619-04"), }, .driver_data = &(struct acpi_gpiolib_dmi_quirk) { .ignore_wake = "PNP0C50:00@8", }, }, { /* * Spurious wakeups from GPIO 11 * Found in BIOS 1.04 * https://gitlab.freedesktop.org/drm/amd/-/issues/3954 */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Acer"), DMI_MATCH(DMI_PRODUCT_FAMILY, "Acer Nitro V 14"), }, .driver_data = &(struct acpi_gpiolib_dmi_quirk) { .ignore_interrupt = "AMDI0030:00@11", }, }, {} /* Terminating entry */ }; static int __init acpi_gpio_setup_params(void) { const struct acpi_gpiolib_dmi_quirk *quirk = NULL; const struct dmi_system_id *id; id = dmi_first_match(gpiolib_acpi_quirks); if (id) quirk = id->driver_data; if (run_edge_events_on_boot < 0) { if (quirk && quirk->no_edge_events_on_boot) run_edge_events_on_boot = 0; else run_edge_events_on_boot = 1; } if (ignore_wake == NULL && quirk && quirk->ignore_wake) ignore_wake = quirk->ignore_wake; if (ignore_interrupt == NULL && quirk && quirk->ignore_interrupt) ignore_interrupt = quirk->ignore_interrupt; return 0; } /* Directly after dmi_setup() which runs as core_initcall() */ postcore_initcall(acpi_gpio_setup_params); |
3 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 | // SPDX-License-Identifier: GPL-2.0 /* * Generic USB GNSS receiver driver * * Copyright (C) 2021 Johan Hovold <johan@kernel.org> */ #include <linux/errno.h> #include <linux/gnss.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/usb.h> #define GNSS_USB_READ_BUF_LEN 512 #define GNSS_USB_WRITE_TIMEOUT 1000 static const struct usb_device_id gnss_usb_id_table[] = { { USB_DEVICE(0x1199, 0xb000) }, /* Sierra Wireless XM1210 */ { } }; MODULE_DEVICE_TABLE(usb, gnss_usb_id_table); struct gnss_usb { struct usb_device *udev; struct usb_interface *intf; struct gnss_device *gdev; struct urb *read_urb; unsigned int write_pipe; }; static void gnss_usb_rx_complete(struct urb *urb) { struct gnss_usb *gusb = urb->context; struct gnss_device *gdev = gusb->gdev; int status = urb->status; int len; int ret; switch (status) { case 0: break; case -ENOENT: case -ECONNRESET: case -ESHUTDOWN: dev_dbg(&gdev->dev, "urb stopped: %d\n", status); return; case -EPIPE: dev_err(&gdev->dev, "urb stopped: %d\n", status); return; default: dev_dbg(&gdev->dev, "nonzero urb status: %d\n", status); goto resubmit; } len = urb->actual_length; if (len == 0) goto resubmit; ret = gnss_insert_raw(gdev, urb->transfer_buffer, len); if (ret < len) dev_dbg(&gdev->dev, "dropped %d bytes\n", len - ret); resubmit: ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret && ret != -EPERM && ret != -ENODEV) dev_err(&gdev->dev, "failed to resubmit urb: %d\n", ret); } static int gnss_usb_open(struct gnss_device *gdev) { struct gnss_usb *gusb = gnss_get_drvdata(gdev); int ret; ret = usb_submit_urb(gusb->read_urb, GFP_KERNEL); if (ret) { if (ret != -EPERM && ret != -ENODEV) dev_err(&gdev->dev, "failed to submit urb: %d\n", ret); return ret; } return 0; } static void gnss_usb_close(struct gnss_device *gdev) { struct gnss_usb *gusb = gnss_get_drvdata(gdev); usb_kill_urb(gusb->read_urb); } static int gnss_usb_write_raw(struct gnss_device *gdev, const unsigned char *buf, size_t count) { struct gnss_usb *gusb = gnss_get_drvdata(gdev); void *tbuf; int ret; tbuf = kmemdup(buf, count, GFP_KERNEL); if (!tbuf) return -ENOMEM; ret = usb_bulk_msg(gusb->udev, gusb->write_pipe, tbuf, count, NULL, GNSS_USB_WRITE_TIMEOUT); kfree(tbuf); if (ret) return ret; return count; } static const struct gnss_operations gnss_usb_gnss_ops = { .open = gnss_usb_open, .close = gnss_usb_close, .write_raw = gnss_usb_write_raw, }; static int gnss_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(intf); struct usb_endpoint_descriptor *in, *out; struct gnss_device *gdev; struct gnss_usb *gusb; struct urb *urb; size_t buf_len; void *buf; int ret; ret = usb_find_common_endpoints(intf->cur_altsetting, &in, &out, NULL, NULL); if (ret) return ret; gusb = kzalloc(sizeof(*gusb), GFP_KERNEL); if (!gusb) return -ENOMEM; gdev = gnss_allocate_device(&intf->dev); if (!gdev) { ret = -ENOMEM; goto err_free_gusb; } gdev->ops = &gnss_usb_gnss_ops; gdev->type = GNSS_TYPE_NMEA; gnss_set_drvdata(gdev, gusb); urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { ret = -ENOMEM; goto err_put_gdev; } buf_len = max(usb_endpoint_maxp(in), GNSS_USB_READ_BUF_LEN); buf = kzalloc(buf_len, GFP_KERNEL); if (!buf) { ret = -ENOMEM; goto err_free_urb; } usb_fill_bulk_urb(urb, udev, usb_rcvbulkpipe(udev, usb_endpoint_num(in)), buf, buf_len, gnss_usb_rx_complete, gusb); gusb->intf = intf; gusb->udev = udev; gusb->gdev = gdev; gusb->read_urb = urb; gusb->write_pipe = usb_sndbulkpipe(udev, usb_endpoint_num(out)); ret = gnss_register_device(gdev); if (ret) goto err_free_buf; usb_set_intfdata(intf, gusb); return 0; err_free_buf: kfree(buf); err_free_urb: usb_free_urb(urb); err_put_gdev: gnss_put_device(gdev); err_free_gusb: kfree(gusb); return ret; } static void gnss_usb_disconnect(struct usb_interface *intf) { struct gnss_usb *gusb = usb_get_intfdata(intf); gnss_deregister_device(gusb->gdev); kfree(gusb->read_urb->transfer_buffer); usb_free_urb(gusb->read_urb); gnss_put_device(gusb->gdev); kfree(gusb); } static struct usb_driver gnss_usb_driver = { .name = "gnss-usb", .probe = gnss_usb_probe, .disconnect = gnss_usb_disconnect, .id_table = gnss_usb_id_table, }; module_usb_driver(gnss_usb_driver); MODULE_AUTHOR("Johan Hovold <johan@kernel.org>"); MODULE_DESCRIPTION("Generic USB GNSS receiver driver"); MODULE_LICENSE("GPL v2"); |
661 661 104 104 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MEMREMAP_H_ #define _LINUX_MEMREMAP_H_ #include <linux/mmzone.h> #include <linux/range.h> #include <linux/ioport.h> #include <linux/percpu-refcount.h> struct resource; struct device; /** * struct vmem_altmap - pre-allocated storage for vmemmap_populate * @base_pfn: base of the entire dev_pagemap mapping * @reserve: pages mapped, but reserved for driver use (relative to @base) * @free: free pages set aside in the mapping for memmap storage * @align: pages reserved to meet allocation alignments * @alloc: track pages consumed, private to vmemmap_populate() */ struct vmem_altmap { unsigned long base_pfn; const unsigned long end_pfn; const unsigned long reserve; unsigned long free; unsigned long align; unsigned long alloc; bool inaccessible; }; /* * Specialize ZONE_DEVICE memory into multiple types each has a different * usage. * * MEMORY_DEVICE_PRIVATE: * Device memory that is not directly addressable by the CPU: CPU can neither * read nor write private memory. In this case, we do still have struct pages * backing the device memory. Doing so simplifies the implementation, but it is * important to remember that there are certain points at which the struct page * must be treated as an opaque object, rather than a "normal" struct page. * * A more complete discussion of unaddressable memory may be found in * include/linux/hmm.h and Documentation/mm/hmm.rst. * * MEMORY_DEVICE_COHERENT: * Device memory that is cache coherent from device and CPU point of view. This * is used on platforms that have an advanced system bus (like CAPI or CXL). A * driver can hotplug the device memory using ZONE_DEVICE and with that memory * type. Any page of a process can be migrated to such memory. However no one * should be allowed to pin such memory so that it can always be evicted. * * MEMORY_DEVICE_FS_DAX: * Host memory that has similar access semantics as System RAM i.e. DMA * coherent and supports page pinning. In support of coordinating page * pinning vs other operations MEMORY_DEVICE_FS_DAX arranges for a * wakeup event whenever a page is unpinned and becomes idle. This * wakeup is used to coordinate physical address space management (ex: * fs truncate/hole punch) vs pinned pages (ex: device dma). * * MEMORY_DEVICE_GENERIC: * Host memory that has similar access semantics as System RAM i.e. DMA * coherent and supports page pinning. This is for example used by DAX devices * that expose memory using a character device. * * MEMORY_DEVICE_PCI_P2PDMA: * Device memory residing in a PCI BAR intended for use with Peer-to-Peer * transactions. */ enum memory_type { /* 0 is reserved to catch uninitialized type fields */ MEMORY_DEVICE_PRIVATE = 1, MEMORY_DEVICE_COHERENT, MEMORY_DEVICE_FS_DAX, MEMORY_DEVICE_GENERIC, MEMORY_DEVICE_PCI_P2PDMA, }; struct dev_pagemap_ops { /* * Called once the page refcount reaches 0. The reference count will be * reset to one by the core code after the method is called to prepare * for handing out the page again. */ void (*page_free)(struct page *page); /* * Used for private (un-addressable) device memory only. Must migrate * the page back to a CPU accessible page. */ vm_fault_t (*migrate_to_ram)(struct vm_fault *vmf); /* * Handle the memory failure happens on a range of pfns. Notify the * processes who are using these pfns, and try to recover the data on * them if necessary. The mf_flags is finally passed to the recover * function through the whole notify routine. * * When this is not implemented, or it returns -EOPNOTSUPP, the caller * will fall back to a common handler called mf_generic_kill_procs(). */ int (*memory_failure)(struct dev_pagemap *pgmap, unsigned long pfn, unsigned long nr_pages, int mf_flags); }; #define PGMAP_ALTMAP_VALID (1 << 0) /** * struct dev_pagemap - metadata for ZONE_DEVICE mappings * @altmap: pre-allocated/reserved memory for vmemmap allocations * @ref: reference count that pins the devm_memremap_pages() mapping * @done: completion for @ref * @type: memory type: see MEMORY_* above in memremap.h * @flags: PGMAP_* flags to specify defailed behavior * @vmemmap_shift: structural definition of how the vmemmap page metadata * is populated, specifically the metadata page order. * A zero value (default) uses base pages as the vmemmap metadata * representation. A bigger value will set up compound struct pages * of the requested order value. * @ops: method table * @owner: an opaque pointer identifying the entity that manages this * instance. Used by various helpers to make sure that no * foreign ZONE_DEVICE memory is accessed. * @nr_range: number of ranges to be mapped * @range: range to be mapped when nr_range == 1 * @ranges: array of ranges to be mapped when nr_range > 1 */ struct dev_pagemap { struct vmem_altmap altmap; struct percpu_ref ref; struct completion done; enum memory_type type; unsigned int flags; unsigned long vmemmap_shift; const struct dev_pagemap_ops *ops; void *owner; int nr_range; union { struct range range; DECLARE_FLEX_ARRAY(struct range, ranges); }; }; static inline bool pgmap_has_memory_failure(struct dev_pagemap *pgmap) { return pgmap->ops && pgmap->ops->memory_failure; } static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap) { if (pgmap->flags & PGMAP_ALTMAP_VALID) return &pgmap->altmap; return NULL; } static inline unsigned long pgmap_vmemmap_nr(struct dev_pagemap *pgmap) { return 1 << pgmap->vmemmap_shift; } static inline bool is_device_private_page(const struct page *page) { return IS_ENABLED(CONFIG_DEVICE_PRIVATE) && is_zone_device_page(page) && page->pgmap->type == MEMORY_DEVICE_PRIVATE; } static inline bool folio_is_device_private(const struct folio *folio) { return is_device_private_page(&folio->page); } static inline bool is_pci_p2pdma_page(const struct page *page) { return IS_ENABLED(CONFIG_PCI_P2PDMA) && is_zone_device_page(page) && page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA; } static inline bool is_device_coherent_page(const struct page *page) { return is_zone_device_page(page) && page->pgmap->type == MEMORY_DEVICE_COHERENT; } static inline bool folio_is_device_coherent(const struct folio *folio) { return is_device_coherent_page(&folio->page); } #ifdef CONFIG_ZONE_DEVICE void zone_device_page_init(struct page *page); void *memremap_pages(struct dev_pagemap *pgmap, int nid); void memunmap_pages(struct dev_pagemap *pgmap); void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap); struct dev_pagemap *get_dev_pagemap(unsigned long pfn, struct dev_pagemap *pgmap); bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn); unsigned long memremap_compat_align(void); #else static inline void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) { /* * Fail attempts to call devm_memremap_pages() without * ZONE_DEVICE support enabled, this requires callers to fall * back to plain devm_memremap() based on config */ WARN_ON_ONCE(1); return ERR_PTR(-ENXIO); } static inline void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap) { } static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn, struct dev_pagemap *pgmap) { return NULL; } static inline bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn) { return false; } /* when memremap_pages() is disabled all archs can remap a single page */ static inline unsigned long memremap_compat_align(void) { return PAGE_SIZE; } #endif /* CONFIG_ZONE_DEVICE */ static inline void put_dev_pagemap(struct dev_pagemap *pgmap) { if (pgmap) percpu_ref_put(&pgmap->ref); } #endif /* _LINUX_MEMREMAP_H_ */ |
67 66 57 56 67 67 303 303 302 224 148 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | /* * PCM I/O Plug-In Interface * Copyright (c) 1999 by Jaroslav Kysela <perex@perex.cz> * * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Library General Public License as * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include <linux/time.h> #include <sound/core.h> #include <sound/pcm.h> #include <sound/pcm_params.h> #include "pcm_plugin.h" #define pcm_write(plug,buf,count) snd_pcm_oss_write3(plug,buf,count,1) #define pcm_writev(plug,vec,count) snd_pcm_oss_writev3(plug,vec,count) #define pcm_read(plug,buf,count) snd_pcm_oss_read3(plug,buf,count,1) #define pcm_readv(plug,vec,count) snd_pcm_oss_readv3(plug,vec,count) /* * Basic io plugin */ static snd_pcm_sframes_t io_playback_transfer(struct snd_pcm_plugin *plugin, const struct snd_pcm_plugin_channel *src_channels, struct snd_pcm_plugin_channel *dst_channels, snd_pcm_uframes_t frames) { if (snd_BUG_ON(!plugin)) return -ENXIO; if (snd_BUG_ON(!src_channels)) return -ENXIO; if (plugin->access == SNDRV_PCM_ACCESS_RW_INTERLEAVED) { return pcm_write(plugin->plug, src_channels->area.addr, frames); } else { int channel, channels = plugin->dst_format.channels; void **bufs = (void**)plugin->extra_data; if (snd_BUG_ON(!bufs)) return -ENXIO; for (channel = 0; channel < channels; channel++) { if (src_channels[channel].enabled) bufs[channel] = src_channels[channel].area.addr; else bufs[channel] = NULL; } return pcm_writev(plugin->plug, bufs, frames); } } static snd_pcm_sframes_t io_capture_transfer(struct snd_pcm_plugin *plugin, const struct snd_pcm_plugin_channel *src_channels, struct snd_pcm_plugin_channel *dst_channels, snd_pcm_uframes_t frames) { if (snd_BUG_ON(!plugin)) return -ENXIO; if (snd_BUG_ON(!dst_channels)) return -ENXIO; if (plugin->access == SNDRV_PCM_ACCESS_RW_INTERLEAVED) { return pcm_read(plugin->plug, dst_channels->area.addr, frames); } else { int channel, channels = plugin->dst_format.channels; void **bufs = (void**)plugin->extra_data; if (snd_BUG_ON(!bufs)) return -ENXIO; for (channel = 0; channel < channels; channel++) { if (dst_channels[channel].enabled) bufs[channel] = dst_channels[channel].area.addr; else bufs[channel] = NULL; } return pcm_readv(plugin->plug, bufs, frames); } return 0; } static snd_pcm_sframes_t io_src_channels(struct snd_pcm_plugin *plugin, snd_pcm_uframes_t frames, struct snd_pcm_plugin_channel **channels) { int err; unsigned int channel; struct snd_pcm_plugin_channel *v; err = snd_pcm_plugin_client_channels(plugin, frames, &v); if (err < 0) return err; *channels = v; if (plugin->access == SNDRV_PCM_ACCESS_RW_INTERLEAVED) { for (channel = 0; channel < plugin->src_format.channels; ++channel, ++v) v->wanted = 1; } return frames; } int snd_pcm_plugin_build_io(struct snd_pcm_substream *plug, struct snd_pcm_hw_params *params, struct snd_pcm_plugin **r_plugin) { int err; struct snd_pcm_plugin_format format; struct snd_pcm_plugin *plugin; if (snd_BUG_ON(!r_plugin)) return -ENXIO; *r_plugin = NULL; if (snd_BUG_ON(!plug || !params)) return -ENXIO; format.format = params_format(params); format.rate = params_rate(params); format.channels = params_channels(params); err = snd_pcm_plugin_build(plug, "I/O io", &format, &format, sizeof(void *) * format.channels, &plugin); if (err < 0) return err; plugin->access = params_access(params); if (snd_pcm_plug_stream(plug) == SNDRV_PCM_STREAM_PLAYBACK) { plugin->transfer = io_playback_transfer; if (plugin->access == SNDRV_PCM_ACCESS_RW_INTERLEAVED) plugin->client_channels = io_src_channels; } else { plugin->transfer = io_capture_transfer; } *r_plugin = plugin; return 0; } |
157 40 10 29 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM hugetlbfs #if !defined(_TRACE_HUGETLBFS_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HUGETLBFS_H #include <linux/tracepoint.h> TRACE_EVENT(hugetlbfs_alloc_inode, TP_PROTO(struct inode *inode, struct inode *dir, int mode), TP_ARGS(inode, dir, mode), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) __field(ino_t, dir) __field(__u16, mode) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->dir = dir ? dir->i_ino : 0; __entry->mode = mode; ), TP_printk("dev %d,%d ino %lu dir %lu mode 0%o", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, (unsigned long) __entry->dir, __entry->mode) ); DECLARE_EVENT_CLASS(hugetlbfs__inode, TP_PROTO(struct inode *inode), TP_ARGS(inode), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) __field(__u16, mode) __field(loff_t, size) __field(unsigned int, nlink) __field(unsigned int, seals) __field(blkcnt_t, blocks) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->mode = inode->i_mode; __entry->size = inode->i_size; __entry->nlink = inode->i_nlink; __entry->seals = HUGETLBFS_I(inode)->seals; __entry->blocks = inode->i_blocks; ), TP_printk("dev %d,%d ino %lu mode 0%o size %lld nlink %u seals %u blocks %llu", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long) __entry->ino, __entry->mode, __entry->size, __entry->nlink, __entry->seals, (unsigned long long)__entry->blocks) ); DEFINE_EVENT(hugetlbfs__inode, hugetlbfs_evict_inode, TP_PROTO(struct inode *inode), TP_ARGS(inode) ); DEFINE_EVENT(hugetlbfs__inode, hugetlbfs_free_inode, TP_PROTO(struct inode *inode), TP_ARGS(inode) ); TRACE_EVENT(hugetlbfs_setattr, TP_PROTO(struct inode *inode, struct dentry *dentry, struct iattr *attr), TP_ARGS(inode, dentry, attr), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) __field(unsigned int, d_len) __string(d_name, dentry->d_name.name) __field(unsigned int, ia_valid) __field(unsigned int, ia_mode) __field(loff_t, old_size) __field(loff_t, ia_size) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->d_len = dentry->d_name.len; __assign_str(d_name); __entry->ia_valid = attr->ia_valid; __entry->ia_mode = attr->ia_mode; __entry->old_size = inode->i_size; __entry->ia_size = attr->ia_size; ), TP_printk("dev %d,%d ino %lu name %.*s valid %#x mode 0%o old_size %lld size %lld", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long)__entry->ino, __entry->d_len, __get_str(d_name), __entry->ia_valid, __entry->ia_mode, __entry->old_size, __entry->ia_size) ); TRACE_EVENT(hugetlbfs_fallocate, TP_PROTO(struct inode *inode, int mode, loff_t offset, loff_t len, int ret), TP_ARGS(inode, mode, offset, len, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) __field(int, mode) __field(loff_t, offset) __field(loff_t, len) __field(loff_t, size) __field(int, ret) ), TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; __entry->mode = mode; __entry->offset = offset; __entry->len = len; __entry->size = inode->i_size; __entry->ret = ret; ), TP_printk("dev %d,%d ino %lu mode 0%o offset %lld len %lld size %lld ret %d", MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long)__entry->ino, __entry->mode, (unsigned long long)__entry->offset, (unsigned long long)__entry->len, (unsigned long long)__entry->size, __entry->ret) ); #endif /* _TRACE_HUGETLBFS_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
67 67 1 8 68 1 67 67 67 68 1 67 67 9 1 8 8 9 1 8 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 | // SPDX-License-Identifier: GPL-2.0 /* * linux/drivers/scsi/scsi_proc.c * * The functions in this file provide an interface between * the PROC file system and the SCSI device drivers * It is mainly used for debugging, statistics and to pass * information directly to the lowlevel driver. * * (c) 1995 Michael Neuffer neuffer@goofy.zdv.uni-mainz.de * Version: 0.99.8 last change: 95/09/13 * * generic command parser provided by: * Andreas Heilwagen <crashcar@informatik.uni-koblenz.de> * * generic_proc_info() support of xxxx_info() by: * Michael A. Griffith <grif@acm.org> */ #include <linux/module.h> #include <linux/init.h> #include <linux/string.h> #include <linux/mm.h> #include <linux/proc_fs.h> #include <linux/errno.h> #include <linux/blkdev.h> #include <linux/seq_file.h> #include <linux/mutex.h> #include <linux/gfp.h> #include <linux/uaccess.h> #include <scsi/scsi.h> #include <scsi/scsi_device.h> #include <scsi/scsi_host.h> #include <scsi/scsi_transport.h> #include "scsi_priv.h" #include "scsi_logging.h" /* 4K page size, but our output routines, use some slack for overruns */ #define PROC_BLOCK_SIZE (3*1024) static struct proc_dir_entry *proc_scsi; /* Protects scsi_proc_list */ static DEFINE_MUTEX(global_host_template_mutex); static LIST_HEAD(scsi_proc_list); /** * struct scsi_proc_entry - (host template, SCSI proc dir) association * @entry: entry in scsi_proc_list. * @sht: SCSI host template associated with the procfs directory. * @proc_dir: procfs directory associated with the SCSI host template. * @present: Number of SCSI hosts instantiated for @sht. */ struct scsi_proc_entry { struct list_head entry; const struct scsi_host_template *sht; struct proc_dir_entry *proc_dir; unsigned int present; }; static ssize_t proc_scsi_host_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct Scsi_Host *shost = pde_data(file_inode(file)); ssize_t ret = -ENOMEM; char *page; if (count > PROC_BLOCK_SIZE) return -EOVERFLOW; if (!shost->hostt->write_info) return -EINVAL; page = (char *)__get_free_page(GFP_KERNEL); if (page) { ret = -EFAULT; if (copy_from_user(page, buf, count)) goto out; ret = shost->hostt->write_info(shost, page, count); } out: free_page((unsigned long)page); return ret; } static int proc_scsi_show(struct seq_file *m, void *v) { struct Scsi_Host *shost = m->private; return shost->hostt->show_info(m, shost); } static int proc_scsi_host_open(struct inode *inode, struct file *file) { return single_open_size(file, proc_scsi_show, pde_data(inode), 4 * PAGE_SIZE); } static struct scsi_proc_entry * __scsi_lookup_proc_entry(const struct scsi_host_template *sht) { struct scsi_proc_entry *e; lockdep_assert_held(&global_host_template_mutex); list_for_each_entry(e, &scsi_proc_list, entry) if (e->sht == sht) return e; return NULL; } static struct scsi_proc_entry * scsi_lookup_proc_entry(const struct scsi_host_template *sht) { struct scsi_proc_entry *e; mutex_lock(&global_host_template_mutex); e = __scsi_lookup_proc_entry(sht); mutex_unlock(&global_host_template_mutex); return e; } /** * scsi_template_proc_dir() - returns the procfs dir for a SCSI host template * @sht: SCSI host template pointer. */ struct proc_dir_entry * scsi_template_proc_dir(const struct scsi_host_template *sht) { struct scsi_proc_entry *e = scsi_lookup_proc_entry(sht); return e ? e->proc_dir : NULL; } EXPORT_SYMBOL_GPL(scsi_template_proc_dir); static const struct proc_ops proc_scsi_ops = { .proc_open = proc_scsi_host_open, .proc_release = single_release, .proc_read = seq_read, .proc_lseek = seq_lseek, .proc_write = proc_scsi_host_write }; /** * scsi_proc_hostdir_add - Create directory in /proc for a scsi host * @sht: owner of this directory * * Sets sht->proc_dir to the new directory. */ int scsi_proc_hostdir_add(const struct scsi_host_template *sht) { struct scsi_proc_entry *e; int ret; if (!sht->show_info) return 0; mutex_lock(&global_host_template_mutex); e = __scsi_lookup_proc_entry(sht); if (!e) { e = kzalloc(sizeof(*e), GFP_KERNEL); if (!e) { ret = -ENOMEM; goto unlock; } } if (e->present++) goto success; e->proc_dir = proc_mkdir(sht->proc_name, proc_scsi); if (!e->proc_dir) { printk(KERN_ERR "%s: proc_mkdir failed for %s\n", __func__, sht->proc_name); ret = -ENOMEM; goto unlock; } e->sht = sht; list_add_tail(&e->entry, &scsi_proc_list); success: e = NULL; ret = 0; unlock: mutex_unlock(&global_host_template_mutex); kfree(e); return ret; } /** * scsi_proc_hostdir_rm - remove directory in /proc for a scsi host * @sht: owner of directory */ void scsi_proc_hostdir_rm(const struct scsi_host_template *sht) { struct scsi_proc_entry *e; if (!sht->show_info) return; mutex_lock(&global_host_template_mutex); e = __scsi_lookup_proc_entry(sht); if (e && !--e->present) { remove_proc_entry(sht->proc_name, proc_scsi); list_del(&e->entry); kfree(e); } mutex_unlock(&global_host_template_mutex); } /** * scsi_proc_host_add - Add entry for this host to appropriate /proc dir * @shost: host to add */ void scsi_proc_host_add(struct Scsi_Host *shost) { const struct scsi_host_template *sht = shost->hostt; struct scsi_proc_entry *e; struct proc_dir_entry *p; char name[10]; if (!sht->show_info) return; e = scsi_lookup_proc_entry(sht); if (!e) goto err; sprintf(name,"%d", shost->host_no); p = proc_create_data(name, S_IRUGO | S_IWUSR, e->proc_dir, &proc_scsi_ops, shost); if (!p) goto err; return; err: shost_printk(KERN_ERR, shost, "%s: Failed to register host (%s failed)\n", __func__, e ? "proc_create_data()" : "scsi_proc_hostdir_add()"); } /** * scsi_proc_host_rm - remove this host's entry from /proc * @shost: which host */ void scsi_proc_host_rm(struct Scsi_Host *shost) { const struct scsi_host_template *sht = shost->hostt; struct scsi_proc_entry *e; char name[10]; if (!sht->show_info) return; e = scsi_lookup_proc_entry(sht); if (!e) return; sprintf(name,"%d", shost->host_no); remove_proc_entry(name, e->proc_dir); } /** * proc_print_scsidevice - return data about this host * @dev: A scsi device * @data: &struct seq_file to output to. * * Description: prints Host, Channel, Id, Lun, Vendor, Model, Rev, Type, * and revision. */ static int proc_print_scsidevice(struct device *dev, void *data) { struct scsi_device *sdev; struct seq_file *s = data; int i; if (!scsi_is_sdev_device(dev)) goto out; sdev = to_scsi_device(dev); seq_printf(s, "Host: scsi%d Channel: %02d Id: %02d Lun: %02llu\n Vendor: ", sdev->host->host_no, sdev->channel, sdev->id, sdev->lun); for (i = 0; i < 8; i++) { if (sdev->vendor[i] >= 0x20) seq_putc(s, sdev->vendor[i]); else seq_putc(s, ' '); } seq_puts(s, " Model: "); for (i = 0; i < 16; i++) { if (sdev->model[i] >= 0x20) seq_putc(s, sdev->model[i]); else seq_putc(s, ' '); } seq_puts(s, " Rev: "); for (i = 0; i < 4; i++) { if (sdev->rev[i] >= 0x20) seq_putc(s, sdev->rev[i]); else seq_putc(s, ' '); } seq_putc(s, '\n'); seq_printf(s, " Type: %s ", scsi_device_type(sdev->type)); seq_printf(s, " ANSI SCSI revision: %02x", sdev->scsi_level - (sdev->scsi_level > 1)); if (sdev->scsi_level == 2) seq_puts(s, " CCS\n"); else seq_putc(s, '\n'); out: return 0; } /** * scsi_add_single_device - Respond to user request to probe for/add device * @host: user-supplied decimal integer * @channel: user-supplied decimal integer * @id: user-supplied decimal integer * @lun: user-supplied decimal integer * * Description: called by writing "scsi add-single-device" to /proc/scsi/scsi. * * does scsi_host_lookup() and either user_scan() if that transport * type supports it, or else scsi_scan_host_selected() * * Note: this seems to be aimed exclusively at SCSI parallel busses. */ static int scsi_add_single_device(uint host, uint channel, uint id, uint lun) { struct Scsi_Host *shost; int error = -ENXIO; shost = scsi_host_lookup(host); if (!shost) return error; if (shost->transportt->user_scan) error = shost->transportt->user_scan(shost, channel, id, lun); else error = scsi_scan_host_selected(shost, channel, id, lun, SCSI_SCAN_MANUAL); scsi_host_put(shost); return error; } /** * scsi_remove_single_device - Respond to user request to remove a device * @host: user-supplied decimal integer * @channel: user-supplied decimal integer * @id: user-supplied decimal integer * @lun: user-supplied decimal integer * * Description: called by writing "scsi remove-single-device" to * /proc/scsi/scsi. Does a scsi_device_lookup() and scsi_remove_device() */ static int scsi_remove_single_device(uint host, uint channel, uint id, uint lun) { struct scsi_device *sdev; struct Scsi_Host *shost; int error = -ENXIO; shost = scsi_host_lookup(host); if (!shost) return error; sdev = scsi_device_lookup(shost, channel, id, lun); if (sdev) { scsi_remove_device(sdev); scsi_device_put(sdev); error = 0; } scsi_host_put(shost); return error; } /** * proc_scsi_write - handle writes to /proc/scsi/scsi * @file: not used * @buf: buffer to write * @length: length of buf, at most PAGE_SIZE * @ppos: not used * * Description: this provides a legacy mechanism to add or remove devices by * Host, Channel, ID, and Lun. To use, * "echo 'scsi add-single-device 0 1 2 3' > /proc/scsi/scsi" or * "echo 'scsi remove-single-device 0 1 2 3' > /proc/scsi/scsi" with * "0 1 2 3" replaced by the Host, Channel, Id, and Lun. * * Note: this seems to be aimed at parallel SCSI. Most modern busses (USB, * SATA, Firewire, Fibre Channel, etc) dynamically assign these values to * provide a unique identifier and nothing more. */ static ssize_t proc_scsi_write(struct file *file, const char __user *buf, size_t length, loff_t *ppos) { int host, channel, id, lun; char *buffer, *end, *p; int err; if (!buf || length > PAGE_SIZE) return -EINVAL; buffer = (char *)__get_free_page(GFP_KERNEL); if (!buffer) return -ENOMEM; err = -EFAULT; if (copy_from_user(buffer, buf, length)) goto out; err = -EINVAL; if (length < PAGE_SIZE) { end = buffer + length; *end = '\0'; } else { end = buffer + PAGE_SIZE - 1; if (*end) goto out; } /* * Usage: echo "scsi add-single-device 0 1 2 3" >/proc/scsi/scsi * with "0 1 2 3" replaced by your "Host Channel Id Lun". */ if (!strncmp("scsi add-single-device", buffer, 22)) { p = buffer + 23; host = (p < end) ? simple_strtoul(p, &p, 0) : 0; channel = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0; id = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0; lun = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0; err = scsi_add_single_device(host, channel, id, lun); /* * Usage: echo "scsi remove-single-device 0 1 2 3" >/proc/scsi/scsi * with "0 1 2 3" replaced by your "Host Channel Id Lun". */ } else if (!strncmp("scsi remove-single-device", buffer, 25)) { p = buffer + 26; host = (p < end) ? simple_strtoul(p, &p, 0) : 0; channel = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0; id = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0; lun = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0; err = scsi_remove_single_device(host, channel, id, lun); } /* * convert success returns so that we return the * number of bytes consumed. */ if (!err) err = length; out: free_page((unsigned long)buffer); return err; } static inline struct device *next_scsi_device(struct device *start) { struct device *next = bus_find_next_device(&scsi_bus_type, start); put_device(start); return next; } static void *scsi_seq_start(struct seq_file *sfile, loff_t *pos) { struct device *dev = NULL; loff_t n = *pos; while ((dev = next_scsi_device(dev))) { if (!n--) break; sfile->private++; } return dev; } static void *scsi_seq_next(struct seq_file *sfile, void *v, loff_t *pos) { (*pos)++; sfile->private++; return next_scsi_device(v); } static void scsi_seq_stop(struct seq_file *sfile, void *v) { put_device(v); } static int scsi_seq_show(struct seq_file *sfile, void *dev) { if (!sfile->private) seq_puts(sfile, "Attached devices:\n"); return proc_print_scsidevice(dev, sfile); } static const struct seq_operations scsi_seq_ops = { .start = scsi_seq_start, .next = scsi_seq_next, .stop = scsi_seq_stop, .show = scsi_seq_show }; /** * proc_scsi_open - glue function * @inode: not used * @file: passed to single_open() * * Associates proc_scsi_show with this file */ static int proc_scsi_open(struct inode *inode, struct file *file) { /* * We don't really need this for the write case but it doesn't * harm either. */ return seq_open(file, &scsi_seq_ops); } static const struct proc_ops scsi_scsi_proc_ops = { .proc_open = proc_scsi_open, .proc_read = seq_read, .proc_write = proc_scsi_write, .proc_lseek = seq_lseek, .proc_release = seq_release, }; /** * scsi_init_procfs - create scsi and scsi/scsi in procfs */ int __init scsi_init_procfs(void) { struct proc_dir_entry *pde; proc_scsi = proc_mkdir("scsi", NULL); if (!proc_scsi) goto err1; pde = proc_create("scsi/scsi", 0, NULL, &scsi_scsi_proc_ops); if (!pde) goto err2; return 0; err2: remove_proc_entry("scsi", NULL); err1: return -ENOMEM; } /** * scsi_exit_procfs - Remove scsi/scsi and scsi from procfs */ void scsi_exit_procfs(void) { remove_proc_entry("scsi/scsi", NULL); remove_proc_entry("scsi", NULL); } |
4 4 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef DRIVERS_PCI_H #define DRIVERS_PCI_H #include <linux/pci.h> struct pcie_tlp_log; /* Number of possible devfns: 0.0 to 1f.7 inclusive */ #define MAX_NR_DEVFNS 256 #define PCI_FIND_CAP_TTL 48 #define PCI_VSEC_ID_INTEL_TBT 0x1234 /* Thunderbolt */ #define PCIE_LINK_RETRAIN_TIMEOUT_MS 1000 /* * Power stable to PERST# inactive. * * See the "Power Sequencing and Reset Signal Timings" table of the PCI Express * Card Electromechanical Specification, Revision 5.1, Section 2.9.2, Symbol * "T_PVPERL". */ #define PCIE_T_PVPERL_MS 100 /* * REFCLK stable before PERST# inactive. * * See the "Power Sequencing and Reset Signal Timings" table of the PCI Express * Card Electromechanical Specification, Revision 5.1, Section 2.9.2, Symbol * "T_PERST-CLK". */ #define PCIE_T_PERST_CLK_US 100 /* * End of conventional reset (PERST# de-asserted) to first configuration * request (device able to respond with a "Request Retry Status" completion), * from PCIe r6.0, sec 6.6.1. */ #define PCIE_T_RRS_READY_MS 100 /* * PCIe r6.0, sec 5.3.3.2.1 <PME Synchronization> * Recommends 1ms to 10ms timeout to check L2 ready. */ #define PCIE_PME_TO_L2_TIMEOUT_US 10000 /* * PCIe r6.0, sec 6.6.1 <Conventional Reset> * * - "With a Downstream Port that does not support Link speeds greater * than 5.0 GT/s, software must wait a minimum of 100 ms following exit * from a Conventional Reset before sending a Configuration Request to * the device immediately below that Port." * * - "With a Downstream Port that supports Link speeds greater than * 5.0 GT/s, software must wait a minimum of 100 ms after Link training * completes before sending a Configuration Request to the device * immediately below that Port." */ #define PCIE_RESET_CONFIG_DEVICE_WAIT_MS 100 /* Message Routing (r[2:0]); PCIe r6.0, sec 2.2.8 */ #define PCIE_MSG_TYPE_R_RC 0 #define PCIE_MSG_TYPE_R_ADDR 1 #define PCIE_MSG_TYPE_R_ID 2 #define PCIE_MSG_TYPE_R_BC 3 #define PCIE_MSG_TYPE_R_LOCAL 4 #define PCIE_MSG_TYPE_R_GATHER 5 /* Power Management Messages; PCIe r6.0, sec 2.2.8.2 */ #define PCIE_MSG_CODE_PME_TURN_OFF 0x19 /* INTx Mechanism Messages; PCIe r6.0, sec 2.2.8.1 */ #define PCIE_MSG_CODE_ASSERT_INTA 0x20 #define PCIE_MSG_CODE_ASSERT_INTB 0x21 #define PCIE_MSG_CODE_ASSERT_INTC 0x22 #define PCIE_MSG_CODE_ASSERT_INTD 0x23 #define PCIE_MSG_CODE_DEASSERT_INTA 0x24 #define PCIE_MSG_CODE_DEASSERT_INTB 0x25 #define PCIE_MSG_CODE_DEASSERT_INTC 0x26 #define PCIE_MSG_CODE_DEASSERT_INTD 0x27 extern const unsigned char pcie_link_speed[]; extern bool pci_early_dump; bool pcie_cap_has_lnkctl(const struct pci_dev *dev); bool pcie_cap_has_lnkctl2(const struct pci_dev *dev); bool pcie_cap_has_rtctl(const struct pci_dev *dev); /* Functions internal to the PCI core code */ #ifdef CONFIG_DMI extern const struct attribute_group pci_dev_smbios_attr_group; #endif enum pci_mmap_api { PCI_MMAP_SYSFS, /* mmap on /sys/bus/pci/devices/<BDF>/resource<N> */ PCI_MMAP_PROCFS /* mmap on /proc/bus/pci/<BDF> */ }; int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vmai, enum pci_mmap_api mmap_api); bool pci_reset_supported(struct pci_dev *dev); void pci_init_reset_methods(struct pci_dev *dev); int pci_bridge_secondary_bus_reset(struct pci_dev *dev); int pci_bus_error_reset(struct pci_dev *dev); int __pci_reset_bus(struct pci_bus *bus); struct pci_cap_saved_data { u16 cap_nr; bool cap_extended; unsigned int size; u32 data[]; }; struct pci_cap_saved_state { struct hlist_node next; struct pci_cap_saved_data cap; }; void pci_allocate_cap_save_buffers(struct pci_dev *dev); void pci_free_cap_save_buffers(struct pci_dev *dev); int pci_add_cap_save_buffer(struct pci_dev *dev, char cap, unsigned int size); int pci_add_ext_cap_save_buffer(struct pci_dev *dev, u16 cap, unsigned int size); struct pci_cap_saved_state *pci_find_saved_cap(struct pci_dev *dev, char cap); struct pci_cap_saved_state *pci_find_saved_ext_cap(struct pci_dev *dev, u16 cap); #define PCI_PM_D2_DELAY 200 /* usec; see PCIe r4.0, sec 5.9.1 */ #define PCI_PM_D3HOT_WAIT 10 /* msec */ #define PCI_PM_D3COLD_WAIT 100 /* msec */ void pci_update_current_state(struct pci_dev *dev, pci_power_t state); void pci_refresh_power_state(struct pci_dev *dev); int pci_power_up(struct pci_dev *dev); void pci_disable_enabled_device(struct pci_dev *dev); int pci_finish_runtime_suspend(struct pci_dev *dev); void pcie_clear_device_status(struct pci_dev *dev); void pcie_clear_root_pme_status(struct pci_dev *dev); bool pci_check_pme_status(struct pci_dev *dev); void pci_pme_wakeup_bus(struct pci_bus *bus); void pci_pme_restore(struct pci_dev *dev); bool pci_dev_need_resume(struct pci_dev *dev); void pci_dev_adjust_pme(struct pci_dev *dev); void pci_dev_complete_resume(struct pci_dev *pci_dev); void pci_config_pm_runtime_get(struct pci_dev *dev); void pci_config_pm_runtime_put(struct pci_dev *dev); void pci_pm_init(struct pci_dev *dev); void pci_ea_init(struct pci_dev *dev); void pci_msi_init(struct pci_dev *dev); void pci_msix_init(struct pci_dev *dev); bool pci_bridge_d3_possible(struct pci_dev *dev); void pci_bridge_d3_update(struct pci_dev *dev); int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type); static inline bool pci_bus_rrs_vendor_id(u32 l) { return (l & 0xffff) == PCI_VENDOR_ID_PCI_SIG; } static inline void pci_wakeup_event(struct pci_dev *dev) { /* Wait 100 ms before the system can be put into a sleep state. */ pm_wakeup_event(&dev->dev, 100); } /** * pci_bar_index_is_valid - Check whether a BAR index is within valid range * @bar: BAR index * * Protects against overflowing &struct pci_dev.resource array. * * Return: true for valid index, false otherwise. */ static inline bool pci_bar_index_is_valid(int bar) { if (bar >= 0 && bar < PCI_NUM_RESOURCES) return true; return false; } static inline bool pci_has_subordinate(struct pci_dev *pci_dev) { return !!(pci_dev->subordinate); } static inline bool pci_power_manageable(struct pci_dev *pci_dev) { /* * Currently we allow normal PCI devices and PCI bridges transition * into D3 if their bridge_d3 is set. */ return !pci_has_subordinate(pci_dev) || pci_dev->bridge_d3; } static inline bool pcie_downstream_port(const struct pci_dev *dev) { int type = pci_pcie_type(dev); return type == PCI_EXP_TYPE_ROOT_PORT || type == PCI_EXP_TYPE_DOWNSTREAM || type == PCI_EXP_TYPE_PCIE_BRIDGE; } void pci_vpd_init(struct pci_dev *dev); extern const struct attribute_group pci_dev_vpd_attr_group; /* PCI Virtual Channel */ int pci_save_vc_state(struct pci_dev *dev); void pci_restore_vc_state(struct pci_dev *dev); void pci_allocate_vc_save_buffers(struct pci_dev *dev); /* PCI /proc functions */ #ifdef CONFIG_PROC_FS int pci_proc_attach_device(struct pci_dev *dev); int pci_proc_detach_device(struct pci_dev *dev); int pci_proc_detach_bus(struct pci_bus *bus); #else static inline int pci_proc_attach_device(struct pci_dev *dev) { return 0; } static inline int pci_proc_detach_device(struct pci_dev *dev) { return 0; } static inline int pci_proc_detach_bus(struct pci_bus *bus) { return 0; } #endif /* Functions for PCI Hotplug drivers to use */ int pci_hp_add_bridge(struct pci_dev *dev); #if defined(CONFIG_SYSFS) && defined(HAVE_PCI_LEGACY) void pci_create_legacy_files(struct pci_bus *bus); void pci_remove_legacy_files(struct pci_bus *bus); #else static inline void pci_create_legacy_files(struct pci_bus *bus) { } static inline void pci_remove_legacy_files(struct pci_bus *bus) { } #endif /* Lock for read/write access to pci device and bus lists */ extern struct rw_semaphore pci_bus_sem; extern struct mutex pci_slot_mutex; extern raw_spinlock_t pci_lock; extern unsigned int pci_pm_d3hot_delay; #ifdef CONFIG_PCI_MSI void pci_no_msi(void); #else static inline void pci_no_msi(void) { } #endif void pci_realloc_get_opt(char *); static inline int pci_no_d1d2(struct pci_dev *dev) { unsigned int parent_dstates = 0; if (dev->bus->self) parent_dstates = dev->bus->self->no_d1d2; return (dev->no_d1d2 || parent_dstates); } #ifdef CONFIG_SYSFS int pci_create_sysfs_dev_files(struct pci_dev *pdev); void pci_remove_sysfs_dev_files(struct pci_dev *pdev); extern const struct attribute_group *pci_dev_groups[]; extern const struct attribute_group *pci_dev_attr_groups[]; extern const struct attribute_group *pcibus_groups[]; extern const struct attribute_group *pci_bus_groups[]; extern const struct attribute_group pci_doe_sysfs_group; #else static inline int pci_create_sysfs_dev_files(struct pci_dev *pdev) { return 0; } static inline void pci_remove_sysfs_dev_files(struct pci_dev *pdev) { } #define pci_dev_groups NULL #define pci_dev_attr_groups NULL #define pcibus_groups NULL #define pci_bus_groups NULL #endif extern unsigned long pci_hotplug_io_size; extern unsigned long pci_hotplug_mmio_size; extern unsigned long pci_hotplug_mmio_pref_size; extern unsigned long pci_hotplug_bus_size; extern unsigned long pci_cardbus_io_size; extern unsigned long pci_cardbus_mem_size; /** * pci_match_one_device - Tell if a PCI device structure has a matching * PCI device id structure * @id: single PCI device id structure to match * @dev: the PCI device structure to match against * * Returns the matching pci_device_id structure or %NULL if there is no match. */ static inline const struct pci_device_id * pci_match_one_device(const struct pci_device_id *id, const struct pci_dev *dev) { if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) && (id->device == PCI_ANY_ID || id->device == dev->device) && (id->subvendor == PCI_ANY_ID || id->subvendor == dev->subsystem_vendor) && (id->subdevice == PCI_ANY_ID || id->subdevice == dev->subsystem_device) && !((id->class ^ dev->class) & id->class_mask)) return id; return NULL; } /* PCI slot sysfs helper code */ #define to_pci_slot(s) container_of(s, struct pci_slot, kobj) extern struct kset *pci_slots_kset; struct pci_slot_attribute { struct attribute attr; ssize_t (*show)(struct pci_slot *, char *); ssize_t (*store)(struct pci_slot *, const char *, size_t); }; #define to_pci_slot_attr(s) container_of(s, struct pci_slot_attribute, attr) enum pci_bar_type { pci_bar_unknown, /* Standard PCI BAR probe */ pci_bar_io, /* An I/O port BAR */ pci_bar_mem32, /* A 32-bit memory BAR */ pci_bar_mem64, /* A 64-bit memory BAR */ }; struct device *pci_get_host_bridge_device(struct pci_dev *dev); void pci_put_host_bridge_device(struct device *dev); unsigned int pci_rescan_bus_bridge_resize(struct pci_dev *bridge); int pci_reassign_bridge_resources(struct pci_dev *bridge, unsigned long type); int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align); int pci_configure_extended_tags(struct pci_dev *dev, void *ign); bool pci_bus_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *pl, int rrs_timeout); bool pci_bus_generic_read_dev_vendor_id(struct pci_bus *bus, int devfn, u32 *pl, int rrs_timeout); int pci_idt_bus_quirk(struct pci_bus *bus, int devfn, u32 *pl, int rrs_timeout); int pci_setup_device(struct pci_dev *dev); void __pci_size_stdbars(struct pci_dev *dev, int count, unsigned int pos, u32 *sizes); int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, struct resource *res, unsigned int reg, u32 *sizes); void pci_configure_ari(struct pci_dev *dev); void __pci_bus_size_bridges(struct pci_bus *bus, struct list_head *realloc_head); void __pci_bus_assign_resources(const struct pci_bus *bus, struct list_head *realloc_head, struct list_head *fail_head); bool pci_bus_clip_resource(struct pci_dev *dev, int idx); void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata); const char *pci_resource_name(struct pci_dev *dev, unsigned int i); bool pci_resource_is_optional(const struct pci_dev *dev, int resno); /** * pci_resource_num - Reverse lookup resource number from device resources * @dev: PCI device * @res: Resource to lookup index for (MUST be a @dev's resource) * * Perform reverse lookup to determine the resource number for @res within * @dev resource array. NOTE: The caller is responsible for ensuring @res is * among @dev's resources! * * Returns: resource number. */ static inline int pci_resource_num(const struct pci_dev *dev, const struct resource *res) { int resno = res - &dev->resource[0]; /* Passing a resource that is not among dev's resources? */ WARN_ON_ONCE(resno >= PCI_NUM_RESOURCES); return resno; } void pci_reassigndev_resource_alignment(struct pci_dev *dev); void pci_disable_bridge_window(struct pci_dev *dev); struct pci_bus *pci_bus_get(struct pci_bus *bus); void pci_bus_put(struct pci_bus *bus); #define PCIE_LNKCAP_SLS2SPEED(lnkcap) \ ({ \ ((lnkcap) == PCI_EXP_LNKCAP_SLS_64_0GB ? PCIE_SPEED_64_0GT : \ (lnkcap) == PCI_EXP_LNKCAP_SLS_32_0GB ? PCIE_SPEED_32_0GT : \ (lnkcap) == PCI_EXP_LNKCAP_SLS_16_0GB ? PCIE_SPEED_16_0GT : \ (lnkcap) == PCI_EXP_LNKCAP_SLS_8_0GB ? PCIE_SPEED_8_0GT : \ (lnkcap) == PCI_EXP_LNKCAP_SLS_5_0GB ? PCIE_SPEED_5_0GT : \ (lnkcap) == PCI_EXP_LNKCAP_SLS_2_5GB ? PCIE_SPEED_2_5GT : \ PCI_SPEED_UNKNOWN); \ }) /* PCIe link information from Link Capabilities 2 */ #define PCIE_LNKCAP2_SLS2SPEED(lnkcap2) \ ((lnkcap2) & PCI_EXP_LNKCAP2_SLS_64_0GB ? PCIE_SPEED_64_0GT : \ (lnkcap2) & PCI_EXP_LNKCAP2_SLS_32_0GB ? PCIE_SPEED_32_0GT : \ (lnkcap2) & PCI_EXP_LNKCAP2_SLS_16_0GB ? PCIE_SPEED_16_0GT : \ (lnkcap2) & PCI_EXP_LNKCAP2_SLS_8_0GB ? PCIE_SPEED_8_0GT : \ (lnkcap2) & PCI_EXP_LNKCAP2_SLS_5_0GB ? PCIE_SPEED_5_0GT : \ (lnkcap2) & PCI_EXP_LNKCAP2_SLS_2_5GB ? PCIE_SPEED_2_5GT : \ PCI_SPEED_UNKNOWN) #define PCIE_LNKCTL2_TLS2SPEED(lnkctl2) \ ((lnkctl2) == PCI_EXP_LNKCTL2_TLS_64_0GT ? PCIE_SPEED_64_0GT : \ (lnkctl2) == PCI_EXP_LNKCTL2_TLS_32_0GT ? PCIE_SPEED_32_0GT : \ (lnkctl2) == PCI_EXP_LNKCTL2_TLS_16_0GT ? PCIE_SPEED_16_0GT : \ (lnkctl2) == PCI_EXP_LNKCTL2_TLS_8_0GT ? PCIE_SPEED_8_0GT : \ (lnkctl2) == PCI_EXP_LNKCTL2_TLS_5_0GT ? PCIE_SPEED_5_0GT : \ (lnkctl2) == PCI_EXP_LNKCTL2_TLS_2_5GT ? PCIE_SPEED_2_5GT : \ PCI_SPEED_UNKNOWN) /* PCIe speed to Mb/s reduced by encoding overhead */ #define PCIE_SPEED2MBS_ENC(speed) \ ((speed) == PCIE_SPEED_64_0GT ? 64000*1/1 : \ (speed) == PCIE_SPEED_32_0GT ? 32000*128/130 : \ (speed) == PCIE_SPEED_16_0GT ? 16000*128/130 : \ (speed) == PCIE_SPEED_8_0GT ? 8000*128/130 : \ (speed) == PCIE_SPEED_5_0GT ? 5000*8/10 : \ (speed) == PCIE_SPEED_2_5GT ? 2500*8/10 : \ 0) static inline int pcie_dev_speed_mbps(enum pci_bus_speed speed) { switch (speed) { case PCIE_SPEED_2_5GT: return 2500; case PCIE_SPEED_5_0GT: return 5000; case PCIE_SPEED_8_0GT: return 8000; case PCIE_SPEED_16_0GT: return 16000; case PCIE_SPEED_32_0GT: return 32000; case PCIE_SPEED_64_0GT: return 64000; default: break; } return -EINVAL; } u8 pcie_get_supported_speeds(struct pci_dev *dev); const char *pci_speed_string(enum pci_bus_speed speed); void __pcie_print_link_status(struct pci_dev *dev, bool verbose); void pcie_report_downtraining(struct pci_dev *dev); static inline void __pcie_update_link_speed(struct pci_bus *bus, u16 linksta, u16 linksta2) { bus->cur_bus_speed = pcie_link_speed[linksta & PCI_EXP_LNKSTA_CLS]; bus->flit_mode = (linksta2 & PCI_EXP_LNKSTA2_FLIT) ? 1 : 0; } void pcie_update_link_speed(struct pci_bus *bus); /* Single Root I/O Virtualization */ struct pci_sriov { int pos; /* Capability position */ int nres; /* Number of resources */ u32 cap; /* SR-IOV Capabilities */ u16 ctrl; /* SR-IOV Control */ u16 total_VFs; /* Total VFs associated with the PF */ u16 initial_VFs; /* Initial VFs associated with the PF */ u16 num_VFs; /* Number of VFs available */ u16 offset; /* First VF Routing ID offset */ u16 stride; /* Following VF stride */ u16 vf_device; /* VF device ID */ u32 pgsz; /* Page size for BAR alignment */ u8 link; /* Function Dependency Link */ u8 max_VF_buses; /* Max buses consumed by VFs */ u16 driver_max_VFs; /* Max num VFs driver supports */ struct pci_dev *dev; /* Lowest numbered PF */ struct pci_dev *self; /* This PF */ u32 class; /* VF device */ u8 hdr_type; /* VF header type */ u16 subsystem_vendor; /* VF subsystem vendor */ u16 subsystem_device; /* VF subsystem device */ resource_size_t barsz[PCI_SRIOV_NUM_BARS]; /* VF BAR size */ bool drivers_autoprobe; /* Auto probing of VFs by driver */ }; #ifdef CONFIG_PCI_DOE void pci_doe_init(struct pci_dev *pdev); void pci_doe_destroy(struct pci_dev *pdev); void pci_doe_disconnected(struct pci_dev *pdev); #else static inline void pci_doe_init(struct pci_dev *pdev) { } static inline void pci_doe_destroy(struct pci_dev *pdev) { } static inline void pci_doe_disconnected(struct pci_dev *pdev) { } #endif #ifdef CONFIG_PCI_NPEM void pci_npem_create(struct pci_dev *dev); void pci_npem_remove(struct pci_dev *dev); #else static inline void pci_npem_create(struct pci_dev *dev) { } static inline void pci_npem_remove(struct pci_dev *dev) { } #endif #if defined(CONFIG_PCI_DOE) && defined(CONFIG_SYSFS) void pci_doe_sysfs_init(struct pci_dev *pci_dev); void pci_doe_sysfs_teardown(struct pci_dev *pdev); #else static inline void pci_doe_sysfs_init(struct pci_dev *pdev) { } static inline void pci_doe_sysfs_teardown(struct pci_dev *pdev) { } #endif /** * pci_dev_set_io_state - Set the new error state if possible. * * @dev: PCI device to set new error_state * @new: the state we want dev to be in * * If the device is experiencing perm_failure, it has to remain in that state. * Any other transition is allowed. * * Returns true if state has been changed to the requested state. */ static inline bool pci_dev_set_io_state(struct pci_dev *dev, pci_channel_state_t new) { pci_channel_state_t old; switch (new) { case pci_channel_io_perm_failure: xchg(&dev->error_state, pci_channel_io_perm_failure); return true; case pci_channel_io_frozen: old = cmpxchg(&dev->error_state, pci_channel_io_normal, pci_channel_io_frozen); return old != pci_channel_io_perm_failure; case pci_channel_io_normal: old = cmpxchg(&dev->error_state, pci_channel_io_frozen, pci_channel_io_normal); return old != pci_channel_io_perm_failure; default: return false; } } static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused) { pci_dev_set_io_state(dev, pci_channel_io_perm_failure); pci_doe_disconnected(dev); return 0; } /* pci_dev priv_flags */ #define PCI_DEV_ADDED 0 #define PCI_DPC_RECOVERED 1 #define PCI_DPC_RECOVERING 2 #define PCI_DEV_REMOVED 3 static inline void pci_dev_assign_added(struct pci_dev *dev) { smp_mb__before_atomic(); set_bit(PCI_DEV_ADDED, &dev->priv_flags); smp_mb__after_atomic(); } static inline bool pci_dev_test_and_clear_added(struct pci_dev *dev) { return test_and_clear_bit(PCI_DEV_ADDED, &dev->priv_flags); } static inline bool pci_dev_is_added(const struct pci_dev *dev) { return test_bit(PCI_DEV_ADDED, &dev->priv_flags); } static inline bool pci_dev_test_and_set_removed(struct pci_dev *dev) { return test_and_set_bit(PCI_DEV_REMOVED, &dev->priv_flags); } #ifdef CONFIG_PCIEAER #include <linux/aer.h> #define AER_MAX_MULTI_ERR_DEVICES 5 /* Not likely to have more */ struct aer_err_info { struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES]; int error_dev_num; unsigned int id:16; unsigned int severity:2; /* 0:NONFATAL | 1:FATAL | 2:COR */ unsigned int __pad1:5; unsigned int multi_error_valid:1; unsigned int first_error:5; unsigned int __pad2:2; unsigned int tlp_header_valid:1; unsigned int status; /* COR/UNCOR Error Status */ unsigned int mask; /* COR/UNCOR Error Mask */ struct pcie_tlp_log tlp; /* TLP Header */ }; int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info); void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2, unsigned int tlp_len, bool flit, struct pcie_tlp_log *log); unsigned int aer_tlp_log_len(struct pci_dev *dev, u32 aercc); void pcie_print_tlp_log(const struct pci_dev *dev, const struct pcie_tlp_log *log, const char *pfx); #endif /* CONFIG_PCIEAER */ #ifdef CONFIG_PCIEPORTBUS /* Cached RCEC Endpoint Association */ struct rcec_ea { u8 nextbusn; u8 lastbusn; u32 bitmap; }; #endif #ifdef CONFIG_PCIE_DPC void pci_save_dpc_state(struct pci_dev *dev); void pci_restore_dpc_state(struct pci_dev *dev); void pci_dpc_init(struct pci_dev *pdev); void dpc_process_error(struct pci_dev *pdev); pci_ers_result_t dpc_reset_link(struct pci_dev *pdev); bool pci_dpc_recovered(struct pci_dev *pdev); unsigned int dpc_tlp_log_len(struct pci_dev *dev); #else static inline void pci_save_dpc_state(struct pci_dev *dev) { } static inline void pci_restore_dpc_state(struct pci_dev *dev) { } static inline void pci_dpc_init(struct pci_dev *pdev) { } static inline bool pci_dpc_recovered(struct pci_dev *pdev) { return false; } #endif #ifdef CONFIG_PCIEPORTBUS void pci_rcec_init(struct pci_dev *dev); void pci_rcec_exit(struct pci_dev *dev); void pcie_link_rcec(struct pci_dev *rcec); void pcie_walk_rcec(struct pci_dev *rcec, int (*cb)(struct pci_dev *, void *), void *userdata); #else static inline void pci_rcec_init(struct pci_dev *dev) { } static inline void pci_rcec_exit(struct pci_dev *dev) { } static inline void pcie_link_rcec(struct pci_dev *rcec) { } static inline void pcie_walk_rcec(struct pci_dev *rcec, int (*cb)(struct pci_dev *, void *), void *userdata) { } #endif #ifdef CONFIG_PCI_ATS /* Address Translation Service */ void pci_ats_init(struct pci_dev *dev); void pci_restore_ats_state(struct pci_dev *dev); #else static inline void pci_ats_init(struct pci_dev *d) { } static inline void pci_restore_ats_state(struct pci_dev *dev) { } #endif /* CONFIG_PCI_ATS */ #ifdef CONFIG_PCI_PRI void pci_pri_init(struct pci_dev *dev); void pci_restore_pri_state(struct pci_dev *pdev); #else static inline void pci_pri_init(struct pci_dev *dev) { } static inline void pci_restore_pri_state(struct pci_dev *pdev) { } #endif #ifdef CONFIG_PCI_PASID void pci_pasid_init(struct pci_dev *dev); void pci_restore_pasid_state(struct pci_dev *pdev); #else static inline void pci_pasid_init(struct pci_dev *dev) { } static inline void pci_restore_pasid_state(struct pci_dev *pdev) { } #endif #ifdef CONFIG_PCI_IOV int pci_iov_init(struct pci_dev *dev); void pci_iov_release(struct pci_dev *dev); void pci_iov_remove(struct pci_dev *dev); void pci_iov_update_resource(struct pci_dev *dev, int resno); resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno); void pci_restore_iov_state(struct pci_dev *dev); int pci_iov_bus_range(struct pci_bus *bus); static inline bool pci_resource_is_iov(int resno) { return resno >= PCI_IOV_RESOURCES && resno <= PCI_IOV_RESOURCE_END; } extern const struct attribute_group sriov_pf_dev_attr_group; extern const struct attribute_group sriov_vf_dev_attr_group; #else static inline int pci_iov_init(struct pci_dev *dev) { return -ENODEV; } static inline void pci_iov_release(struct pci_dev *dev) { } static inline void pci_iov_remove(struct pci_dev *dev) { } static inline void pci_iov_update_resource(struct pci_dev *dev, int resno) { } static inline resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno) { return 0; } static inline void pci_restore_iov_state(struct pci_dev *dev) { } static inline int pci_iov_bus_range(struct pci_bus *bus) { return 0; } static inline bool pci_resource_is_iov(int resno) { return false; } #endif /* CONFIG_PCI_IOV */ #ifdef CONFIG_PCIE_TPH void pci_restore_tph_state(struct pci_dev *dev); void pci_save_tph_state(struct pci_dev *dev); void pci_no_tph(void); void pci_tph_init(struct pci_dev *dev); #else static inline void pci_restore_tph_state(struct pci_dev *dev) { } static inline void pci_save_tph_state(struct pci_dev *dev) { } static inline void pci_no_tph(void) { } static inline void pci_tph_init(struct pci_dev *dev) { } #endif #ifdef CONFIG_PCIE_PTM void pci_ptm_init(struct pci_dev *dev); void pci_save_ptm_state(struct pci_dev *dev); void pci_restore_ptm_state(struct pci_dev *dev); void pci_suspend_ptm(struct pci_dev *dev); void pci_resume_ptm(struct pci_dev *dev); #else static inline void pci_ptm_init(struct pci_dev *dev) { } static inline void pci_save_ptm_state(struct pci_dev *dev) { } static inline void pci_restore_ptm_state(struct pci_dev *dev) { } static inline void pci_suspend_ptm(struct pci_dev *dev) { } static inline void pci_resume_ptm(struct pci_dev *dev) { } #endif unsigned long pci_cardbus_resource_alignment(struct resource *); static inline resource_size_t pci_resource_alignment(struct pci_dev *dev, struct resource *res) { int resno = pci_resource_num(dev, res); if (pci_resource_is_iov(resno)) return pci_sriov_resource_alignment(dev, resno); if (dev->class >> 8 == PCI_CLASS_BRIDGE_CARDBUS) return pci_cardbus_resource_alignment(res); return resource_alignment(res); } void pci_acs_init(struct pci_dev *dev); #ifdef CONFIG_PCI_QUIRKS int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags); int pci_dev_specific_enable_acs(struct pci_dev *dev); int pci_dev_specific_disable_acs_redir(struct pci_dev *dev); int pcie_failed_link_retrain(struct pci_dev *dev); #else static inline int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags) { return -ENOTTY; } static inline int pci_dev_specific_enable_acs(struct pci_dev *dev) { return -ENOTTY; } static inline int pci_dev_specific_disable_acs_redir(struct pci_dev *dev) { return -ENOTTY; } static inline int pcie_failed_link_retrain(struct pci_dev *dev) { return -ENOTTY; } #endif /* PCI error reporting and recovery */ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, pci_channel_state_t state, pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev)); bool pcie_wait_for_link(struct pci_dev *pdev, bool active); int pcie_retrain_link(struct pci_dev *pdev, bool use_lt); /* ASPM-related functionality we need even without CONFIG_PCIEASPM */ void pci_save_ltr_state(struct pci_dev *dev); void pci_restore_ltr_state(struct pci_dev *dev); void pci_configure_aspm_l1ss(struct pci_dev *dev); void pci_save_aspm_l1ss_state(struct pci_dev *dev); void pci_restore_aspm_l1ss_state(struct pci_dev *dev); #ifdef CONFIG_PCIEASPM void pcie_aspm_init_link_state(struct pci_dev *pdev); void pcie_aspm_exit_link_state(struct pci_dev *pdev); void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked); void pcie_aspm_powersave_config_link(struct pci_dev *pdev); void pci_configure_ltr(struct pci_dev *pdev); void pci_bridge_reconfigure_ltr(struct pci_dev *pdev); #else static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { } static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { } static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked) { } static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { } static inline void pci_configure_ltr(struct pci_dev *pdev) { } static inline void pci_bridge_reconfigure_ltr(struct pci_dev *pdev) { } #endif #ifdef CONFIG_PCIE_ECRC void pcie_set_ecrc_checking(struct pci_dev *dev); void pcie_ecrc_get_policy(char *str); #else static inline void pcie_set_ecrc_checking(struct pci_dev *dev) { } static inline void pcie_ecrc_get_policy(char *str) { } #endif #ifdef CONFIG_PCIEPORTBUS void pcie_reset_lbms_count(struct pci_dev *port); int pcie_lbms_count(struct pci_dev *port, unsigned long *val); #else static inline void pcie_reset_lbms_count(struct pci_dev *port) {} static inline int pcie_lbms_count(struct pci_dev *port, unsigned long *val) { return -EOPNOTSUPP; } #endif struct pci_dev_reset_methods { u16 vendor; u16 device; int (*reset)(struct pci_dev *dev, bool probe); }; struct pci_reset_fn_method { int (*reset_fn)(struct pci_dev *pdev, bool probe); char *name; }; extern const struct pci_reset_fn_method pci_reset_fn_methods[]; #ifdef CONFIG_PCI_QUIRKS int pci_dev_specific_reset(struct pci_dev *dev, bool probe); #else static inline int pci_dev_specific_reset(struct pci_dev *dev, bool probe) { return -ENOTTY; } #endif #if defined(CONFIG_PCI_QUIRKS) && defined(CONFIG_ARM64) int acpi_get_rc_resources(struct device *dev, const char *hid, u16 segment, struct resource *res); #else static inline int acpi_get_rc_resources(struct device *dev, const char *hid, u16 segment, struct resource *res) { return -ENODEV; } #endif void pci_rebar_init(struct pci_dev *pdev); int pci_rebar_get_current_size(struct pci_dev *pdev, int bar); int pci_rebar_set_size(struct pci_dev *pdev, int bar, int size); static inline u64 pci_rebar_size_to_bytes(int size) { return 1ULL << (size + 20); } struct device_node; #ifdef CONFIG_OF int of_get_pci_domain_nr(struct device_node *node); int of_pci_get_max_link_speed(struct device_node *node); u32 of_pci_get_slot_power_limit(struct device_node *node, u8 *slot_power_limit_value, u8 *slot_power_limit_scale); bool of_pci_preserve_config(struct device_node *node); int pci_set_of_node(struct pci_dev *dev); void pci_release_of_node(struct pci_dev *dev); void pci_set_bus_of_node(struct pci_bus *bus); void pci_release_bus_of_node(struct pci_bus *bus); int devm_of_pci_bridge_init(struct device *dev, struct pci_host_bridge *bridge); bool of_pci_supply_present(struct device_node *np); #else static inline int of_get_pci_domain_nr(struct device_node *node) { return -1; } static inline int of_pci_get_max_link_speed(struct device_node *node) { return -EINVAL; } static inline u32 of_pci_get_slot_power_limit(struct device_node *node, u8 *slot_power_limit_value, u8 *slot_power_limit_scale) { if (slot_power_limit_value) *slot_power_limit_value = 0; if (slot_power_limit_scale) *slot_power_limit_scale = 0; return 0; } static inline bool of_pci_preserve_config(struct device_node *node) { return false; } static inline int pci_set_of_node(struct pci_dev *dev) { return 0; } static inline void pci_release_of_node(struct pci_dev *dev) { } static inline void pci_set_bus_of_node(struct pci_bus *bus) { } static inline void pci_release_bus_of_node(struct pci_bus *bus) { } static inline int devm_of_pci_bridge_init(struct device *dev, struct pci_host_bridge *bridge) { return 0; } static inline bool of_pci_supply_present(struct device_node *np) { return false; } #endif /* CONFIG_OF */ struct of_changeset; #ifdef CONFIG_PCI_DYNAMIC_OF_NODES void of_pci_make_dev_node(struct pci_dev *pdev); void of_pci_remove_node(struct pci_dev *pdev); int of_pci_add_properties(struct pci_dev *pdev, struct of_changeset *ocs, struct device_node *np); void of_pci_make_host_bridge_node(struct pci_host_bridge *bridge); void of_pci_remove_host_bridge_node(struct pci_host_bridge *bridge); int of_pci_add_host_bridge_properties(struct pci_host_bridge *bridge, struct of_changeset *ocs, struct device_node *np); #else static inline void of_pci_make_dev_node(struct pci_dev *pdev) { } static inline void of_pci_remove_node(struct pci_dev *pdev) { } static inline void of_pci_make_host_bridge_node(struct pci_host_bridge *bridge) { } static inline void of_pci_remove_host_bridge_node(struct pci_host_bridge *bridge) { } #endif #ifdef CONFIG_PCIEAER void pci_no_aer(void); void pci_aer_init(struct pci_dev *dev); void pci_aer_exit(struct pci_dev *dev); extern const struct attribute_group aer_stats_attr_group; void pci_aer_clear_fatal_status(struct pci_dev *dev); int pci_aer_clear_status(struct pci_dev *dev); int pci_aer_raw_clear_status(struct pci_dev *dev); void pci_save_aer_state(struct pci_dev *dev); void pci_restore_aer_state(struct pci_dev *dev); #else static inline void pci_no_aer(void) { } static inline void pci_aer_init(struct pci_dev *d) { } static inline void pci_aer_exit(struct pci_dev *d) { } static inline void pci_aer_clear_fatal_status(struct pci_dev *dev) { } static inline int pci_aer_clear_status(struct pci_dev *dev) { return -EINVAL; } static inline int pci_aer_raw_clear_status(struct pci_dev *dev) { return -EINVAL; } static inline void pci_save_aer_state(struct pci_dev *dev) { } static inline void pci_restore_aer_state(struct pci_dev *dev) { } #endif #ifdef CONFIG_ACPI bool pci_acpi_preserve_config(struct pci_host_bridge *bridge); int pci_acpi_program_hp_params(struct pci_dev *dev); extern const struct attribute_group pci_dev_acpi_attr_group; void pci_set_acpi_fwnode(struct pci_dev *dev); int pci_dev_acpi_reset(struct pci_dev *dev, bool probe); bool acpi_pci_power_manageable(struct pci_dev *dev); bool acpi_pci_bridge_d3(struct pci_dev *dev); int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state); pci_power_t acpi_pci_get_power_state(struct pci_dev *dev); void acpi_pci_refresh_power_state(struct pci_dev *dev); int acpi_pci_wakeup(struct pci_dev *dev, bool enable); bool acpi_pci_need_resume(struct pci_dev *dev); pci_power_t acpi_pci_choose_state(struct pci_dev *pdev); #else static inline bool pci_acpi_preserve_config(struct pci_host_bridge *bridge) { return false; } static inline int pci_dev_acpi_reset(struct pci_dev *dev, bool probe) { return -ENOTTY; } static inline void pci_set_acpi_fwnode(struct pci_dev *dev) { } static inline int pci_acpi_program_hp_params(struct pci_dev *dev) { return -ENODEV; } static inline bool acpi_pci_power_manageable(struct pci_dev *dev) { return false; } static inline bool acpi_pci_bridge_d3(struct pci_dev *dev) { return false; } static inline int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state) { return -ENODEV; } static inline pci_power_t acpi_pci_get_power_state(struct pci_dev *dev) { return PCI_UNKNOWN; } static inline void acpi_pci_refresh_power_state(struct pci_dev *dev) { } static inline int acpi_pci_wakeup(struct pci_dev *dev, bool enable) { return -ENODEV; } static inline bool acpi_pci_need_resume(struct pci_dev *dev) { return false; } static inline pci_power_t acpi_pci_choose_state(struct pci_dev *pdev) { return PCI_POWER_ERROR; } #endif #ifdef CONFIG_PCIEASPM extern const struct attribute_group aspm_ctrl_attr_group; #endif #ifdef CONFIG_X86_INTEL_MID bool pci_use_mid_pm(void); int mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state); pci_power_t mid_pci_get_power_state(struct pci_dev *pdev); #else static inline bool pci_use_mid_pm(void) { return false; } static inline int mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state) { return -ENODEV; } static inline pci_power_t mid_pci_get_power_state(struct pci_dev *pdev) { return PCI_UNKNOWN; } #endif int pcim_intx(struct pci_dev *dev, int enable); int pcim_request_region_exclusive(struct pci_dev *pdev, int bar, const char *name); void pcim_release_region(struct pci_dev *pdev, int bar); /* * Config Address for PCI Configuration Mechanism #1 * * See PCI Local Bus Specification, Revision 3.0, * Section 3.2.2.3.2, Figure 3-2, p. 50. */ #define PCI_CONF1_BUS_SHIFT 16 /* Bus number */ #define PCI_CONF1_DEV_SHIFT 11 /* Device number */ #define PCI_CONF1_FUNC_SHIFT 8 /* Function number */ #define PCI_CONF1_BUS_MASK 0xff #define PCI_CONF1_DEV_MASK 0x1f #define PCI_CONF1_FUNC_MASK 0x7 #define PCI_CONF1_REG_MASK 0xfc /* Limit aligned offset to a maximum of 256B */ #define PCI_CONF1_ENABLE BIT(31) #define PCI_CONF1_BUS(x) (((x) & PCI_CONF1_BUS_MASK) << PCI_CONF1_BUS_SHIFT) #define PCI_CONF1_DEV(x) (((x) & PCI_CONF1_DEV_MASK) << PCI_CONF1_DEV_SHIFT) #define PCI_CONF1_FUNC(x) (((x) & PCI_CONF1_FUNC_MASK) << PCI_CONF1_FUNC_SHIFT) #define PCI_CONF1_REG(x) ((x) & PCI_CONF1_REG_MASK) #define PCI_CONF1_ADDRESS(bus, dev, func, reg) \ (PCI_CONF1_ENABLE | \ PCI_CONF1_BUS(bus) | \ PCI_CONF1_DEV(dev) | \ PCI_CONF1_FUNC(func) | \ PCI_CONF1_REG(reg)) /* * Extension of PCI Config Address for accessing extended PCIe registers * * No standardized specification, but used on lot of non-ECAM-compliant ARM SoCs * or on AMD Barcelona and new CPUs. Reserved bits [27:24] of PCI Config Address * are used for specifying additional 4 high bits of PCI Express register. */ #define PCI_CONF1_EXT_REG_SHIFT 16 #define PCI_CONF1_EXT_REG_MASK 0xf00 #define PCI_CONF1_EXT_REG(x) (((x) & PCI_CONF1_EXT_REG_MASK) << PCI_CONF1_EXT_REG_SHIFT) #define PCI_CONF1_EXT_ADDRESS(bus, dev, func, reg) \ (PCI_CONF1_ADDRESS(bus, dev, func, reg) | \ PCI_CONF1_EXT_REG(reg)) #endif /* DRIVERS_PCI_H */ |
121 95 155 61 61 15 210 210 9 208 1 1235 1236 616 525 83 135 77 605 606 610 62 140 142 570 108 106 1 1 108 451 108 616 607 114 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Linux network device link state notification * * Author: * Stefan Rompf <sux@loplof.de> */ #include <linux/module.h> #include <linux/netdevice.h> #include <linux/if.h> #include <net/sock.h> #include <net/pkt_sched.h> #include <linux/rtnetlink.h> #include <linux/jiffies.h> #include <linux/spinlock.h> #include <linux/workqueue.h> #include <linux/bitops.h> #include <linux/types.h> #include "dev.h" enum lw_bits { LW_URGENT = 0, }; static unsigned long linkwatch_flags; static unsigned long linkwatch_nextevent; static void linkwatch_event(struct work_struct *dummy); static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event); static LIST_HEAD(lweventlist); static DEFINE_SPINLOCK(lweventlist_lock); static unsigned int default_operstate(const struct net_device *dev) { if (netif_testing(dev)) return IF_OPER_TESTING; /* Some uppers (DSA) have additional sources for being down, so * first check whether lower is indeed the source of its down state. */ if (!netif_carrier_ok(dev)) { struct net_device *peer; int iflink; /* If called from netdev_run_todo()/linkwatch_sync_dev(), * dev_net(dev) can be already freed, and RTNL is not held. */ if (dev->reg_state <= NETREG_REGISTERED) iflink = dev_get_iflink(dev); else iflink = dev->ifindex; if (iflink == dev->ifindex) return IF_OPER_DOWN; ASSERT_RTNL(); peer = __dev_get_by_index(dev_net(dev), iflink); if (!peer) return IF_OPER_DOWN; return netif_carrier_ok(peer) ? IF_OPER_DOWN : IF_OPER_LOWERLAYERDOWN; } if (netif_dormant(dev)) return IF_OPER_DORMANT; return IF_OPER_UP; } static void rfc2863_policy(struct net_device *dev) { unsigned int operstate = default_operstate(dev); if (operstate == READ_ONCE(dev->operstate)) return; switch(dev->link_mode) { case IF_LINK_MODE_TESTING: if (operstate == IF_OPER_UP) operstate = IF_OPER_TESTING; break; case IF_LINK_MODE_DORMANT: if (operstate == IF_OPER_UP) operstate = IF_OPER_DORMANT; break; case IF_LINK_MODE_DEFAULT: default: break; } WRITE_ONCE(dev->operstate, operstate); } void linkwatch_init_dev(struct net_device *dev) { /* Handle pre-registration link state changes */ if (!netif_carrier_ok(dev) || netif_dormant(dev) || netif_testing(dev)) rfc2863_policy(dev); } static bool linkwatch_urgent_event(struct net_device *dev) { if (!netif_running(dev)) return false; if (dev->ifindex != dev_get_iflink(dev)) return true; if (netif_is_lag_port(dev) || netif_is_lag_master(dev)) return true; return netif_carrier_ok(dev) && qdisc_tx_changing(dev); } static void linkwatch_add_event(struct net_device *dev) { unsigned long flags; spin_lock_irqsave(&lweventlist_lock, flags); if (list_empty(&dev->link_watch_list)) { list_add_tail(&dev->link_watch_list, &lweventlist); netdev_hold(dev, &dev->linkwatch_dev_tracker, GFP_ATOMIC); } spin_unlock_irqrestore(&lweventlist_lock, flags); } static void linkwatch_schedule_work(int urgent) { unsigned long delay = linkwatch_nextevent - jiffies; if (test_bit(LW_URGENT, &linkwatch_flags)) return; /* Minimise down-time: drop delay for up event. */ if (urgent) { if (test_and_set_bit(LW_URGENT, &linkwatch_flags)) return; delay = 0; } /* If we wrap around we'll delay it by at most HZ. */ if (delay > HZ) delay = 0; /* * If urgent, schedule immediate execution; otherwise, don't * override the existing timer. */ if (test_bit(LW_URGENT, &linkwatch_flags)) mod_delayed_work(system_unbound_wq, &linkwatch_work, 0); else queue_delayed_work(system_unbound_wq, &linkwatch_work, delay); } static void linkwatch_do_dev(struct net_device *dev) { /* * Make sure the above read is complete since it can be * rewritten as soon as we clear the bit below. */ smp_mb__before_atomic(); /* We are about to handle this device, * so new events can be accepted */ clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state); rfc2863_policy(dev); if (dev->flags & IFF_UP) { if (netif_carrier_ok(dev)) dev_activate(dev); else dev_deactivate(dev); netdev_state_change(dev); } /* Note: our callers are responsible for calling netdev_tracker_free(). * This is the reason we use __dev_put() instead of dev_put(). */ __dev_put(dev); } static void __linkwatch_run_queue(int urgent_only) { #define MAX_DO_DEV_PER_LOOP 100 int do_dev = MAX_DO_DEV_PER_LOOP; /* Use a local list here since we add non-urgent * events back to the global one when called with * urgent_only=1. */ LIST_HEAD(wrk); /* Give urgent case more budget */ if (urgent_only) do_dev += MAX_DO_DEV_PER_LOOP; /* * Limit the number of linkwatch events to one * per second so that a runaway driver does not * cause a storm of messages on the netlink * socket. This limit does not apply to up events * while the device qdisc is down. */ if (!urgent_only) linkwatch_nextevent = jiffies + HZ; /* Limit wrap-around effect on delay. */ else if (time_after(linkwatch_nextevent, jiffies + HZ)) linkwatch_nextevent = jiffies; clear_bit(LW_URGENT, &linkwatch_flags); spin_lock_irq(&lweventlist_lock); list_splice_init(&lweventlist, &wrk); while (!list_empty(&wrk) && do_dev > 0) { struct net_device *dev; dev = list_first_entry(&wrk, struct net_device, link_watch_list); list_del_init(&dev->link_watch_list); if (!netif_device_present(dev) || (urgent_only && !linkwatch_urgent_event(dev))) { list_add_tail(&dev->link_watch_list, &lweventlist); continue; } /* We must free netdev tracker under * the spinlock protection. */ netdev_tracker_free(dev, &dev->linkwatch_dev_tracker); spin_unlock_irq(&lweventlist_lock); linkwatch_do_dev(dev); do_dev--; spin_lock_irq(&lweventlist_lock); } /* Add the remaining work back to lweventlist */ list_splice_init(&wrk, &lweventlist); if (!list_empty(&lweventlist)) linkwatch_schedule_work(0); spin_unlock_irq(&lweventlist_lock); } void linkwatch_sync_dev(struct net_device *dev) { unsigned long flags; int clean = 0; spin_lock_irqsave(&lweventlist_lock, flags); if (!list_empty(&dev->link_watch_list)) { list_del_init(&dev->link_watch_list); clean = 1; /* We must release netdev tracker under * the spinlock protection. */ netdev_tracker_free(dev, &dev->linkwatch_dev_tracker); } spin_unlock_irqrestore(&lweventlist_lock, flags); if (clean) linkwatch_do_dev(dev); } /* Must be called with the rtnl semaphore held */ void linkwatch_run_queue(void) { __linkwatch_run_queue(0); } static void linkwatch_event(struct work_struct *dummy) { rtnl_lock(); __linkwatch_run_queue(time_after(linkwatch_nextevent, jiffies)); rtnl_unlock(); } void linkwatch_fire_event(struct net_device *dev) { bool urgent = linkwatch_urgent_event(dev); if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { linkwatch_add_event(dev); } else if (!urgent) return; linkwatch_schedule_work(urgent); } EXPORT_SYMBOL(linkwatch_fire_event); |
2 2 2 6 1 1 1 1 2 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Streamzap Remote Control driver * * Copyright (c) 2005 Christoph Bartelmus <lirc@bartelmus.de> * Copyright (c) 2010 Jarod Wilson <jarod@wilsonet.com> * * This driver was based on the work of Greg Wickham and Adrian * Dewhurst. It was substantially rewritten to support correct signal * gaps and now maintains a delay buffer, which is used to present * consistent timing behaviour to user space applications. Without the * delay buffer an ugly hack would be required in lircd, which can * cause sluggish signal decoding in certain situations. * * Ported to in-kernel ir-core interface by Jarod Wilson * * This driver is based on the USB skeleton driver packaged with the * kernel; copyright (C) 2001-2003 Greg Kroah-Hartman (greg@kroah.com) */ #include <linux/device.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/usb.h> #include <linux/usb/input.h> #include <media/rc-core.h> #define DRIVER_NAME "streamzap" #define DRIVER_DESC "Streamzap Remote Control driver" #define USB_STREAMZAP_VENDOR_ID 0x0e9c #define USB_STREAMZAP_PRODUCT_ID 0x0000 /* table of devices that work with this driver */ static const struct usb_device_id streamzap_table[] = { /* Streamzap Remote Control */ { USB_DEVICE(USB_STREAMZAP_VENDOR_ID, USB_STREAMZAP_PRODUCT_ID) }, /* Terminating entry */ { } }; MODULE_DEVICE_TABLE(usb, streamzap_table); #define SZ_PULSE_MASK 0xf0 #define SZ_SPACE_MASK 0x0f #define SZ_TIMEOUT 0xff #define SZ_RESOLUTION 256 /* number of samples buffered */ #define SZ_BUF_LEN 128 enum StreamzapDecoderState { PulseSpace, FullPulse, FullSpace, IgnorePulse }; /* structure to hold our device specific stuff */ struct streamzap_ir { /* ir-core */ struct rc_dev *rdev; /* core device info */ struct device *dev; /* usb */ struct urb *urb_in; /* buffer & dma */ unsigned char *buf_in; dma_addr_t dma_in; unsigned int buf_in_len; /* track what state we're in */ enum StreamzapDecoderState decoder_state; char phys[64]; }; /* local function prototypes */ static int streamzap_probe(struct usb_interface *interface, const struct usb_device_id *id); static void streamzap_disconnect(struct usb_interface *interface); static void streamzap_callback(struct urb *urb); static int streamzap_suspend(struct usb_interface *intf, pm_message_t message); static int streamzap_resume(struct usb_interface *intf); /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver streamzap_driver = { .name = DRIVER_NAME, .probe = streamzap_probe, .disconnect = streamzap_disconnect, .suspend = streamzap_suspend, .resume = streamzap_resume, .id_table = streamzap_table, }; static void sz_push(struct streamzap_ir *sz, struct ir_raw_event rawir) { dev_dbg(sz->dev, "Storing %s with duration %u us\n", (rawir.pulse ? "pulse" : "space"), rawir.duration); ir_raw_event_store_with_filter(sz->rdev, &rawir); } static void sz_push_full_pulse(struct streamzap_ir *sz, unsigned char value) { struct ir_raw_event rawir = { .pulse = true, .duration = value * SZ_RESOLUTION + SZ_RESOLUTION / 2, }; sz_push(sz, rawir); } static void sz_push_half_pulse(struct streamzap_ir *sz, unsigned char value) { sz_push_full_pulse(sz, (value & SZ_PULSE_MASK) >> 4); } static void sz_push_full_space(struct streamzap_ir *sz, unsigned char value) { struct ir_raw_event rawir = { .pulse = false, .duration = value * SZ_RESOLUTION + SZ_RESOLUTION / 2, }; sz_push(sz, rawir); } static void sz_push_half_space(struct streamzap_ir *sz, unsigned long value) { sz_push_full_space(sz, value & SZ_SPACE_MASK); } static void sz_process_ir_data(struct streamzap_ir *sz, int len) { unsigned int i; for (i = 0; i < len; i++) { dev_dbg(sz->dev, "sz->buf_in[%d]: %x\n", i, (unsigned char)sz->buf_in[i]); switch (sz->decoder_state) { case PulseSpace: if ((sz->buf_in[i] & SZ_PULSE_MASK) == SZ_PULSE_MASK) { sz->decoder_state = FullPulse; continue; } else if ((sz->buf_in[i] & SZ_SPACE_MASK) == SZ_SPACE_MASK) { sz_push_half_pulse(sz, sz->buf_in[i]); sz->decoder_state = FullSpace; continue; } else { sz_push_half_pulse(sz, sz->buf_in[i]); sz_push_half_space(sz, sz->buf_in[i]); } break; case FullPulse: sz_push_full_pulse(sz, sz->buf_in[i]); sz->decoder_state = IgnorePulse; break; case FullSpace: if (sz->buf_in[i] == SZ_TIMEOUT) { struct ir_raw_event rawir = { .pulse = false, .duration = sz->rdev->timeout }; sz_push(sz, rawir); } else { sz_push_full_space(sz, sz->buf_in[i]); } sz->decoder_state = PulseSpace; break; case IgnorePulse: if ((sz->buf_in[i] & SZ_SPACE_MASK) == SZ_SPACE_MASK) { sz->decoder_state = FullSpace; continue; } sz_push_half_space(sz, sz->buf_in[i]); sz->decoder_state = PulseSpace; break; } } ir_raw_event_handle(sz->rdev); } /* * streamzap_callback - usb IRQ handler callback * * This procedure is invoked on reception of data from * the usb remote. */ static void streamzap_callback(struct urb *urb) { struct streamzap_ir *sz; int len; if (!urb) return; sz = urb->context; len = urb->actual_length; switch (urb->status) { case 0: dev_dbg(sz->dev, "%s: received urb, len %d\n", __func__, len); sz_process_ir_data(sz, len); break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: /* * this urb is terminated, clean up. * sz might already be invalid at this point */ dev_err(sz->dev, "urb terminated, status: %d\n", urb->status); return; default: break; } usb_submit_urb(urb, GFP_ATOMIC); } static struct rc_dev *streamzap_init_rc_dev(struct streamzap_ir *sz, struct usb_device *usbdev) { struct rc_dev *rdev; struct device *dev = sz->dev; int ret; rdev = rc_allocate_device(RC_DRIVER_IR_RAW); if (!rdev) goto out; usb_make_path(usbdev, sz->phys, sizeof(sz->phys)); strlcat(sz->phys, "/input0", sizeof(sz->phys)); rdev->device_name = "Streamzap PC Remote Infrared Receiver"; rdev->input_phys = sz->phys; usb_to_input_id(usbdev, &rdev->input_id); rdev->dev.parent = dev; rdev->priv = sz; rdev->allowed_protocols = RC_PROTO_BIT_ALL_IR_DECODER; rdev->driver_name = DRIVER_NAME; rdev->map_name = RC_MAP_STREAMZAP; rdev->rx_resolution = SZ_RESOLUTION; ret = rc_register_device(rdev); if (ret < 0) { dev_err(dev, "remote input device register failed\n"); goto out; } return rdev; out: rc_free_device(rdev); return NULL; } /* * streamzap_probe * * Called by usb-core to associated with a candidate device * On any failure the return value is the ERROR * On success return 0 */ static int streamzap_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *usbdev = interface_to_usbdev(intf); struct usb_endpoint_descriptor *endpoint; struct usb_host_interface *iface_host; struct streamzap_ir *sz = NULL; int retval = -ENOMEM; int pipe, maxp; /* Allocate space for device driver specific data */ sz = kzalloc(sizeof(struct streamzap_ir), GFP_KERNEL); if (!sz) return -ENOMEM; /* Check to ensure endpoint information matches requirements */ iface_host = intf->cur_altsetting; if (iface_host->desc.bNumEndpoints != 1) { dev_err(&intf->dev, "%s: Unexpected desc.bNumEndpoints (%d)\n", __func__, iface_host->desc.bNumEndpoints); retval = -ENODEV; goto free_sz; } endpoint = &iface_host->endpoint[0].desc; if (!usb_endpoint_dir_in(endpoint)) { dev_err(&intf->dev, "%s: endpoint doesn't match input device 02%02x\n", __func__, endpoint->bEndpointAddress); retval = -ENODEV; goto free_sz; } if (!usb_endpoint_xfer_int(endpoint)) { dev_err(&intf->dev, "%s: endpoint attributes don't match xfer 02%02x\n", __func__, endpoint->bmAttributes); retval = -ENODEV; goto free_sz; } pipe = usb_rcvintpipe(usbdev, endpoint->bEndpointAddress); maxp = usb_maxpacket(usbdev, pipe); if (maxp == 0) { dev_err(&intf->dev, "%s: endpoint Max Packet Size is 0!?!\n", __func__); retval = -ENODEV; goto free_sz; } /* Allocate the USB buffer and IRQ URB */ sz->buf_in = usb_alloc_coherent(usbdev, maxp, GFP_ATOMIC, &sz->dma_in); if (!sz->buf_in) goto free_sz; sz->urb_in = usb_alloc_urb(0, GFP_KERNEL); if (!sz->urb_in) goto free_buf_in; sz->dev = &intf->dev; sz->buf_in_len = maxp; sz->rdev = streamzap_init_rc_dev(sz, usbdev); if (!sz->rdev) goto rc_dev_fail; sz->decoder_state = PulseSpace; /* FIXME: don't yet have a way to set this */ sz->rdev->timeout = SZ_TIMEOUT * SZ_RESOLUTION; #if 0 /* not yet supported, depends on patches from maxim */ /* see also: LIRC_GET_REC_RESOLUTION and LIRC_SET_REC_TIMEOUT */ sz->min_timeout = SZ_TIMEOUT * SZ_RESOLUTION; sz->max_timeout = SZ_TIMEOUT * SZ_RESOLUTION; #endif /* Complete final initialisations */ usb_fill_int_urb(sz->urb_in, usbdev, pipe, sz->buf_in, maxp, streamzap_callback, sz, endpoint->bInterval); sz->urb_in->transfer_dma = sz->dma_in; sz->urb_in->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; usb_set_intfdata(intf, sz); if (usb_submit_urb(sz->urb_in, GFP_ATOMIC)) dev_err(sz->dev, "urb submit failed\n"); return 0; rc_dev_fail: usb_free_urb(sz->urb_in); free_buf_in: usb_free_coherent(usbdev, maxp, sz->buf_in, sz->dma_in); free_sz: kfree(sz); return retval; } /* * streamzap_disconnect * * Called by the usb core when the device is removed from the system. * * This routine guarantees that the driver will not submit any more urbs * by clearing dev->usbdev. It is also supposed to terminate any currently * active urbs. Unfortunately, usb_bulk_msg(), used in streamzap_read(), * does not provide any way to do this. */ static void streamzap_disconnect(struct usb_interface *interface) { struct streamzap_ir *sz = usb_get_intfdata(interface); struct usb_device *usbdev = interface_to_usbdev(interface); usb_set_intfdata(interface, NULL); if (!sz) return; usb_kill_urb(sz->urb_in); rc_unregister_device(sz->rdev); usb_free_urb(sz->urb_in); usb_free_coherent(usbdev, sz->buf_in_len, sz->buf_in, sz->dma_in); kfree(sz); } static int streamzap_suspend(struct usb_interface *intf, pm_message_t message) { struct streamzap_ir *sz = usb_get_intfdata(intf); usb_kill_urb(sz->urb_in); return 0; } static int streamzap_resume(struct usb_interface *intf) { struct streamzap_ir *sz = usb_get_intfdata(intf); if (usb_submit_urb(sz->urb_in, GFP_NOIO)) { dev_err(sz->dev, "Error submitting urb\n"); return -EIO; } return 0; } module_usb_driver(streamzap_driver); MODULE_AUTHOR("Jarod Wilson <jarod@wilsonet.com>"); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); |
1 1 3 9 2 9 10 10 231 231 64 64 7 1 6 12 1 12 5 5 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 | // SPDX-License-Identifier: GPL-2.0 /* * KVM coalesced MMIO * * Copyright (c) 2008 Bull S.A.S. * Copyright 2009 Red Hat, Inc. and/or its affiliates. * * Author: Laurent Vivier <Laurent.Vivier@bull.net> * */ #include <kvm/iodev.h> #include <linux/kvm_host.h> #include <linux/slab.h> #include <linux/kvm.h> #include "coalesced_mmio.h" static inline struct kvm_coalesced_mmio_dev *to_mmio(struct kvm_io_device *dev) { return container_of(dev, struct kvm_coalesced_mmio_dev, dev); } static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev, gpa_t addr, int len) { /* is it in a batchable area ? * (addr,len) is fully included in * (zone->addr, zone->size) */ if (len < 0) return 0; if (addr + len < addr) return 0; if (addr < dev->zone.addr) return 0; if (addr + len > dev->zone.addr + dev->zone.size) return 0; return 1; } static int coalesced_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr, int len, const void *val) { struct kvm_coalesced_mmio_dev *dev = to_mmio(this); struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; __u32 insert; if (!coalesced_mmio_in_range(dev, addr, len)) return -EOPNOTSUPP; spin_lock(&dev->kvm->ring_lock); /* * last is the index of the entry to fill. Verify userspace hasn't * set last to be out of range, and that there is room in the ring. * Leave one entry free in the ring so that userspace can differentiate * between an empty ring and a full ring. */ insert = READ_ONCE(ring->last); if (insert >= KVM_COALESCED_MMIO_MAX || (insert + 1) % KVM_COALESCED_MMIO_MAX == READ_ONCE(ring->first)) { spin_unlock(&dev->kvm->ring_lock); return -EOPNOTSUPP; } /* copy data in first free entry of the ring */ ring->coalesced_mmio[insert].phys_addr = addr; ring->coalesced_mmio[insert].len = len; memcpy(ring->coalesced_mmio[insert].data, val, len); ring->coalesced_mmio[insert].pio = dev->zone.pio; smp_wmb(); ring->last = (insert + 1) % KVM_COALESCED_MMIO_MAX; spin_unlock(&dev->kvm->ring_lock); return 0; } static void coalesced_mmio_destructor(struct kvm_io_device *this) { struct kvm_coalesced_mmio_dev *dev = to_mmio(this); list_del(&dev->list); kfree(dev); } static const struct kvm_io_device_ops coalesced_mmio_ops = { .write = coalesced_mmio_write, .destructor = coalesced_mmio_destructor, }; int kvm_coalesced_mmio_init(struct kvm *kvm) { struct page *page; page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!page) return -ENOMEM; kvm->coalesced_mmio_ring = page_address(page); /* * We're using this spinlock to sync access to the coalesced ring. * The list doesn't need its own lock since device registration and * unregistration should only happen when kvm->slots_lock is held. */ spin_lock_init(&kvm->ring_lock); INIT_LIST_HEAD(&kvm->coalesced_zones); return 0; } void kvm_coalesced_mmio_free(struct kvm *kvm) { if (kvm->coalesced_mmio_ring) free_page((unsigned long)kvm->coalesced_mmio_ring); } int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, struct kvm_coalesced_mmio_zone *zone) { int ret; struct kvm_coalesced_mmio_dev *dev; if (zone->pio != 1 && zone->pio != 0) return -EINVAL; dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL_ACCOUNT); if (!dev) return -ENOMEM; kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); dev->kvm = kvm; dev->zone = *zone; mutex_lock(&kvm->slots_lock); ret = kvm_io_bus_register_dev(kvm, zone->pio |