Total coverage: 228602 (12%)of 1914322
2 1 2 2 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 // SPDX-License-Identifier: GPL-2.0-only /* Xtables module to match packets using a BPF filter. * Copyright 2013 Google Inc. * Written by Willem de Bruijn <willemb@google.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/syscalls.h> #include <linux/skbuff.h> #include <linux/filter.h> #include <linux/bpf.h> #include <linux/netfilter/xt_bpf.h> #include <linux/netfilter/x_tables.h> MODULE_AUTHOR("Willem de Bruijn <willemb@google.com>"); MODULE_DESCRIPTION("Xtables: BPF filter match"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_bpf"); MODULE_ALIAS("ip6t_bpf"); static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len, struct bpf_prog **ret) { struct sock_fprog_kern program; if (len > XT_BPF_MAX_NUM_INSTR) return -EINVAL; program.len = len; program.filter = insns; if (bpf_prog_create(ret, &program)) { pr_info_ratelimited("check failed: parse error\n"); return -EINVAL; } return 0; } static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret) { struct bpf_prog *prog; prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER); if (IS_ERR(prog)) return PTR_ERR(prog); *ret = prog; return 0; } static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret) { if (strnlen(path, XT_BPF_PATH_MAX) == XT_BPF_PATH_MAX) return -EINVAL; *ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER); return PTR_ERR_OR_ZERO(*ret); } static int bpf_mt_check(const struct xt_mtchk_param *par) { struct xt_bpf_info *info = par->matchinfo; return __bpf_mt_check_bytecode(info->bpf_program, info->bpf_program_num_elem, &info->filter); } static int bpf_mt_check_v1(const struct xt_mtchk_param *par) { struct xt_bpf_info_v1 *info = par->matchinfo; if (info->mode == XT_BPF_MODE_BYTECODE) return __bpf_mt_check_bytecode(info->bpf_program, info->bpf_program_num_elem, &info->filter); else if (info->mode == XT_BPF_MODE_FD_ELF) return __bpf_mt_check_fd(info->fd, &info->filter); else if (info->mode == XT_BPF_MODE_PATH_PINNED) return __bpf_mt_check_path(info->path, &info->filter); else return -EINVAL; } static bool bpf_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_bpf_info *info = par->matchinfo; return bpf_prog_run(info->filter, skb); } static bool bpf_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_bpf_info_v1 *info = par->matchinfo; return !!bpf_prog_run_save_cb(info->filter, (struct sk_buff *) skb); } static void bpf_mt_destroy(const struct xt_mtdtor_param *par) { const struct xt_bpf_info *info = par->matchinfo; bpf_prog_destroy(info->filter); } static void bpf_mt_destroy_v1(const struct xt_mtdtor_param *par) { const struct xt_bpf_info_v1 *info = par->matchinfo; bpf_prog_destroy(info->filter); } static struct xt_match bpf_mt_reg[] __read_mostly = { { .name = "bpf", .revision = 0, .family = NFPROTO_UNSPEC, .checkentry = bpf_mt_check, .match = bpf_mt, .destroy = bpf_mt_destroy, .matchsize = sizeof(struct xt_bpf_info), .usersize = offsetof(struct xt_bpf_info, filter), .me = THIS_MODULE, }, { .name = "bpf", .revision = 1, .family = NFPROTO_UNSPEC, .checkentry = bpf_mt_check_v1, .match = bpf_mt_v1, .destroy = bpf_mt_destroy_v1, .matchsize = sizeof(struct xt_bpf_info_v1), .usersize = offsetof(struct xt_bpf_info_v1, filter), .me = THIS_MODULE, }, }; static int __init bpf_mt_init(void) { return xt_register_matches(bpf_mt_reg, ARRAY_SIZE(bpf_mt_reg)); } static void __exit bpf_mt_exit(void) { xt_unregister_matches(bpf_mt_reg, ARRAY_SIZE(bpf_mt_reg)); } module_init(bpf_mt_init); module_exit(bpf_mt_exit);
2 2 3 3 3 3 2 1 1 2 31 31 28 9 6 6 25 53 52 3 58 56 2 47 47 46 28 19 6 6 6 5 5 6 22 22 18 6 6 1 11 11 7 6 6 6 1 5 27 6 21 1 2 2 2 2 1 1 1 2 2 2 1 1 1 1 2 4 2 2 7 7 1 1 6 6 1 10 10 10 10 1 9 1 9 10 10 10 20 6 10 5 11 20 20 10 10 1 20 1 10 5 5 3 1 1 9 3 6 17 1 11 5 16 16 16 9 9 3 6 9 17 17 3 6 3 3 1 3 4 2 2 3 1 1 7 6 5 26 26 26 25 1 1 18 5 12 1 26 1 10 16 20 3 11 14 14 5 9 6 7 1 6 6 3 1 2 2 2 6 17 21 21 21 21 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 // SPDX-License-Identifier: GPL-2.0-only /* L2TP core. * * Copyright (c) 2008,2009,2010 Katalix Systems Ltd * * This file contains some code of the original L2TPv2 pppol2tp * driver, which has the following copyright: * * Authors: Martijn van Oosterhout <kleptog@svana.org> * James Chapman (jchapman@katalix.com) * Contributors: * Michal Ostrowski <mostrows@speakeasy.net> * Arnaldo Carvalho de Melo <acme@xconectiva.com.br> * David S. Miller (davem@redhat.com) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/string.h> #include <linux/list.h> #include <linux/rculist.h> #include <linux/uaccess.h> #include <linux/kernel.h> #include <linux/spinlock.h> #include <linux/kthread.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/errno.h> #include <linux/jiffies.h> #include <linux/netdevice.h> #include <linux/net.h> #include <linux/inetdevice.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/udp.h> #include <linux/l2tp.h> #include <linux/sort.h> #include <linux/file.h> #include <linux/nsproxy.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/dst.h> #include <net/ip.h> #include <net/udp.h> #include <net/udp_tunnel.h> #include <net/inet_common.h> #include <net/xfrm.h> #include <net/protocol.h> #include <net/inet6_connection_sock.h> #include <net/inet_ecn.h> #include <net/ip6_route.h> #include <net/ip6_checksum.h> #include <asm/byteorder.h> #include <linux/atomic.h> #include "l2tp_core.h" #define CREATE_TRACE_POINTS #include "trace.h" #define L2TP_DRV_VERSION "V2.0" /* L2TP header constants */ #define L2TP_HDRFLAG_T 0x8000 #define L2TP_HDRFLAG_L 0x4000 #define L2TP_HDRFLAG_S 0x0800 #define L2TP_HDRFLAG_O 0x0200 #define L2TP_HDRFLAG_P 0x0100 #define L2TP_HDR_VER_MASK 0x000F #define L2TP_HDR_VER_2 0x0002 #define L2TP_HDR_VER_3 0x0003 /* L2TPv3 default L2-specific sublayer */ #define L2TP_SLFLAG_S 0x40000000 #define L2TP_SL_SEQ_MASK 0x00ffffff #define L2TP_HDR_SIZE_MAX 14 /* Default trace flags */ #define L2TP_DEFAULT_DEBUG_FLAGS 0 #define L2TP_DEPTH_NESTING 2 #if L2TP_DEPTH_NESTING == SINGLE_DEPTH_NESTING #error "L2TP requires its own lockdep subclass" #endif /* Private data stored for received packets in the skb. */ struct l2tp_skb_cb { u32 ns; u16 has_seq; u16 length; unsigned long expires; }; #define L2TP_SKB_CB(skb) ((struct l2tp_skb_cb *)&(skb)->cb[sizeof(struct inet_skb_parm)]) static struct workqueue_struct *l2tp_wq; /* per-net private data for this module */ static unsigned int l2tp_net_id; struct l2tp_net { /* Lock for write access to l2tp_tunnel_idr */ spinlock_t l2tp_tunnel_idr_lock; struct idr l2tp_tunnel_idr; /* Lock for write access to l2tp_v[23]_session_idr/htable */ spinlock_t l2tp_session_idr_lock; struct idr l2tp_v2_session_idr; struct idr l2tp_v3_session_idr; struct hlist_head l2tp_v3_session_htable[16]; }; static u32 l2tp_v2_session_key(u16 tunnel_id, u16 session_id) { return ((u32)tunnel_id) << 16 | session_id; } static unsigned long l2tp_v3_session_hashkey(struct sock *sk, u32 session_id) { return ((unsigned long)sk) + session_id; } #if IS_ENABLED(CONFIG_IPV6) static bool l2tp_sk_is_v6(struct sock *sk) { return sk->sk_family == PF_INET6 && !ipv6_addr_v4mapped(&sk->sk_v6_daddr); } #endif static struct l2tp_net *l2tp_pernet(const struct net *net) { return net_generic(net, l2tp_net_id); } static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel) { struct sock *sk = tunnel->sock; trace_free_tunnel(tunnel); if (sk) { /* Disable udp encapsulation */ switch (tunnel->encap) { case L2TP_ENCAPTYPE_UDP: /* No longer an encapsulation socket. See net/ipv4/udp.c */ WRITE_ONCE(udp_sk(sk)->encap_type, 0); udp_sk(sk)->encap_rcv = NULL; udp_sk(sk)->encap_destroy = NULL; break; case L2TP_ENCAPTYPE_IP: break; } tunnel->sock = NULL; sock_put(sk); } kfree_rcu(tunnel, rcu); } static void l2tp_session_free(struct l2tp_session *session) { trace_free_session(session); if (session->tunnel) l2tp_tunnel_put(session->tunnel); kfree_rcu(session, rcu); } struct l2tp_tunnel *l2tp_sk_to_tunnel(const struct sock *sk) { const struct net *net = sock_net(sk); unsigned long tunnel_id, tmp; struct l2tp_tunnel *tunnel; struct l2tp_net *pn; rcu_read_lock_bh(); pn = l2tp_pernet(net); idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) { if (tunnel && tunnel->sock == sk && refcount_inc_not_zero(&tunnel->ref_count)) { rcu_read_unlock_bh(); return tunnel; } } rcu_read_unlock_bh(); return NULL; } EXPORT_SYMBOL_GPL(l2tp_sk_to_tunnel); void l2tp_tunnel_put(struct l2tp_tunnel *tunnel) { if (refcount_dec_and_test(&tunnel->ref_count)) l2tp_tunnel_free(tunnel); } EXPORT_SYMBOL_GPL(l2tp_tunnel_put); void l2tp_session_put(struct l2tp_session *session) { if (refcount_dec_and_test(&session->ref_count)) l2tp_session_free(session); } EXPORT_SYMBOL_GPL(l2tp_session_put); /* Lookup a tunnel. A new reference is held on the returned tunnel. */ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id) { const struct l2tp_net *pn = l2tp_pernet(net); struct l2tp_tunnel *tunnel; rcu_read_lock_bh(); tunnel = idr_find(&pn->l2tp_tunnel_idr, tunnel_id); if (tunnel && refcount_inc_not_zero(&tunnel->ref_count)) { rcu_read_unlock_bh(); return tunnel; } rcu_read_unlock_bh(); return NULL; } EXPORT_SYMBOL_GPL(l2tp_tunnel_get); struct l2tp_tunnel *l2tp_tunnel_get_next(const struct net *net, unsigned long *key) { struct l2tp_net *pn = l2tp_pernet(net); struct l2tp_tunnel *tunnel = NULL; rcu_read_lock_bh(); again: tunnel = idr_get_next_ul(&pn->l2tp_tunnel_idr, key); if (tunnel) { if (refcount_inc_not_zero(&tunnel->ref_count)) { rcu_read_unlock_bh(); return tunnel; } (*key)++; goto again; } rcu_read_unlock_bh(); return NULL; } EXPORT_SYMBOL_GPL(l2tp_tunnel_get_next); struct l2tp_session *l2tp_v3_session_get(const struct net *net, struct sock *sk, u32 session_id) { const struct l2tp_net *pn = l2tp_pernet(net); struct l2tp_session *session; rcu_read_lock_bh(); session = idr_find(&pn->l2tp_v3_session_idr, session_id); if (session && !hash_hashed(&session->hlist) && refcount_inc_not_zero(&session->ref_count)) { rcu_read_unlock_bh(); return session; } /* If we get here and session is non-NULL, the session_id * collides with one in another tunnel. If sk is non-NULL, * find the session matching sk. */ if (session && sk) { unsigned long key = l2tp_v3_session_hashkey(sk, session->session_id); hash_for_each_possible_rcu(pn->l2tp_v3_session_htable, session, hlist, key) { /* session->tunnel may be NULL if another thread is in * l2tp_session_register and has added an item to * l2tp_v3_session_htable but hasn't yet added the * session to its tunnel's session_list. */ struct l2tp_tunnel *tunnel = READ_ONCE(session->tunnel); if (session->session_id == session_id && tunnel && tunnel->sock == sk && refcount_inc_not_zero(&session->ref_count)) { rcu_read_unlock_bh(); return session; } } } rcu_read_unlock_bh(); return NULL; } EXPORT_SYMBOL_GPL(l2tp_v3_session_get); struct l2tp_session *l2tp_v2_session_get(const struct net *net, u16 tunnel_id, u16 session_id) { u32 session_key = l2tp_v2_session_key(tunnel_id, session_id); const struct l2tp_net *pn = l2tp_pernet(net); struct l2tp_session *session; rcu_read_lock_bh(); session = idr_find(&pn->l2tp_v2_session_idr, session_key); if (session && refcount_inc_not_zero(&session->ref_count)) { rcu_read_unlock_bh(); return session; } rcu_read_unlock_bh(); return NULL; } EXPORT_SYMBOL_GPL(l2tp_v2_session_get); struct l2tp_session *l2tp_session_get(const struct net *net, struct sock *sk, int pver, u32 tunnel_id, u32 session_id) { if (pver == L2TP_HDR_VER_2) return l2tp_v2_session_get(net, tunnel_id, session_id); else return l2tp_v3_session_get(net, sk, session_id); } EXPORT_SYMBOL_GPL(l2tp_session_get); static struct l2tp_session *l2tp_v2_session_get_next(const struct net *net, u16 tid, unsigned long *key) { struct l2tp_net *pn = l2tp_pernet(net); struct l2tp_session *session = NULL; /* Start searching within the range of the tid */ if (*key == 0) *key = l2tp_v2_session_key(tid, 0); rcu_read_lock_bh(); again: session = idr_get_next_ul(&pn->l2tp_v2_session_idr, key); if (session) { struct l2tp_tunnel *tunnel = READ_ONCE(session->tunnel); /* ignore sessions with id 0 as they are internal for pppol2tp */ if (session->session_id == 0) { (*key)++; goto again; } if (tunnel->tunnel_id == tid && refcount_inc_not_zero(&session->ref_count)) { rcu_read_unlock_bh(); return session; } (*key)++; if (tunnel->tunnel_id == tid) goto again; } rcu_read_unlock_bh(); return NULL; } static struct l2tp_session *l2tp_v3_session_get_next(const struct net *net, u32 tid, struct sock *sk, unsigned long *key) { struct l2tp_net *pn = l2tp_pernet(net); struct l2tp_session *session = NULL; rcu_read_lock_bh(); again: session = idr_get_next_ul(&pn->l2tp_v3_session_idr, key); if (session && !hash_hashed(&session->hlist)) { struct l2tp_tunnel *tunnel = READ_ONCE(session->tunnel); if (tunnel && tunnel->tunnel_id == tid && refcount_inc_not_zero(&session->ref_count)) { rcu_read_unlock_bh(); return session; } (*key)++; goto again; } /* If we get here and session is non-NULL, the IDR entry may be one * where the session_id collides with one in another tunnel. Check * session_htable for a match. There can only be one session of a given * ID per tunnel so we can return as soon as a match is found. */ if (session && hash_hashed(&session->hlist)) { unsigned long hkey = l2tp_v3_session_hashkey(sk, session->session_id); u32 sid = session->session_id; hash_for_each_possible_rcu(pn->l2tp_v3_session_htable, session, hlist, hkey) { struct l2tp_tunnel *tunnel = READ_ONCE(session->tunnel); if (session->session_id == sid && tunnel && tunnel->tunnel_id == tid && refcount_inc_not_zero(&session->ref_count)) { rcu_read_unlock_bh(); return session; } } /* If no match found, the colliding session ID isn't in our * tunnel so try the next session ID. */ (*key)++; goto again; } rcu_read_unlock_bh(); return NULL; } struct l2tp_session *l2tp_session_get_next(const struct net *net, struct sock *sk, int pver, u32 tunnel_id, unsigned long *key) { if (pver == L2TP_HDR_VER_2) return l2tp_v2_session_get_next(net, tunnel_id, key); else return l2tp_v3_session_get_next(net, tunnel_id, sk, key); } EXPORT_SYMBOL_GPL(l2tp_session_get_next); /* Lookup a session by interface name. * This is very inefficient but is only used by management interfaces. */ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, const char *ifname) { struct l2tp_net *pn = l2tp_pernet(net); unsigned long tunnel_id, tmp; struct l2tp_session *session; struct l2tp_tunnel *tunnel; rcu_read_lock_bh(); idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) { if (tunnel) { list_for_each_entry_rcu(session, &tunnel->session_list, list) { if (!strcmp(session->ifname, ifname)) { refcount_inc(&session->ref_count); rcu_read_unlock_bh(); return session; } } } } rcu_read_unlock_bh(); return NULL; } EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname); static void l2tp_session_coll_list_add(struct l2tp_session_coll_list *clist, struct l2tp_session *session) { refcount_inc(&session->ref_count); WARN_ON_ONCE(session->coll_list); session->coll_list = clist; spin_lock(&clist->lock); list_add(&session->clist, &clist->list); spin_unlock(&clist->lock); } static int l2tp_session_collision_add(struct l2tp_net *pn, struct l2tp_session *session1, struct l2tp_session *session2) { struct l2tp_session_coll_list *clist; lockdep_assert_held(&pn->l2tp_session_idr_lock); if (!session2) return -EEXIST; /* If existing session is in IP-encap tunnel, refuse new session */ if (session2->tunnel->encap == L2TP_ENCAPTYPE_IP) return -EEXIST; clist = session2->coll_list; if (!clist) { /* First collision. Allocate list to manage the collided sessions * and add the existing session to the list. */ clist = kmalloc(sizeof(*clist), GFP_ATOMIC); if (!clist) return -ENOMEM; spin_lock_init(&clist->lock); INIT_LIST_HEAD(&clist->list); refcount_set(&clist->ref_count, 1); l2tp_session_coll_list_add(clist, session2); } /* If existing session isn't already in the session hlist, add it. */ if (!hash_hashed(&session2->hlist)) hash_add_rcu(pn->l2tp_v3_session_htable, &session2->hlist, session2->hlist_key); /* Add new session to the hlist and collision list */ hash_add_rcu(pn->l2tp_v3_session_htable, &session1->hlist, session1->hlist_key); refcount_inc(&clist->ref_count); l2tp_session_coll_list_add(clist, session1); return 0; } static void l2tp_session_collision_del(struct l2tp_net *pn, struct l2tp_session *session) { struct l2tp_session_coll_list *clist = session->coll_list; unsigned long session_key = session->session_id; struct l2tp_session *session2; lockdep_assert_held(&pn->l2tp_session_idr_lock); hash_del_rcu(&session->hlist); if (clist) { /* Remove session from its collision list. If there * are other sessions with the same ID, replace this * session's IDR entry with that session, otherwise * remove the IDR entry. If this is the last session, * the collision list data is freed. */ spin_lock(&clist->lock); list_del_init(&session->clist); session2 = list_first_entry_or_null(&clist->list, struct l2tp_session, clist); if (session2) { void *old = idr_replace(&pn->l2tp_v3_session_idr, session2, session_key); WARN_ON_ONCE(IS_ERR_VALUE(old)); } else { void *removed = idr_remove(&pn->l2tp_v3_session_idr, session_key); WARN_ON_ONCE(removed != session); } session->coll_list = NULL; spin_unlock(&clist->lock); if (refcount_dec_and_test(&clist->ref_count)) kfree(clist); l2tp_session_put(session); } } int l2tp_session_register(struct l2tp_session *session, struct l2tp_tunnel *tunnel) { struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); struct l2tp_session *other_session = NULL; void *old = NULL; u32 session_key; int err; spin_lock_bh(&tunnel->list_lock); spin_lock_bh(&pn->l2tp_session_idr_lock); if (!tunnel->acpt_newsess) { err = -ENODEV; goto out; } if (tunnel->version == L2TP_HDR_VER_3) { session_key = session->session_id; err = idr_alloc_u32(&pn->l2tp_v3_session_idr, NULL, &session_key, session_key, GFP_ATOMIC); /* IP encap expects session IDs to be globally unique, while * UDP encap doesn't. This isn't per the RFC, which says that * sessions are identified only by the session ID, but is to * support existing userspace which depends on it. */ if (err == -ENOSPC && tunnel->encap == L2TP_ENCAPTYPE_UDP) { other_session = idr_find(&pn->l2tp_v3_session_idr, session_key); err = l2tp_session_collision_add(pn, session, other_session); } } else { session_key = l2tp_v2_session_key(tunnel->tunnel_id, session->session_id); err = idr_alloc_u32(&pn->l2tp_v2_session_idr, NULL, &session_key, session_key, GFP_ATOMIC); } if (err) { if (err == -ENOSPC) err = -EEXIST; goto out; } refcount_inc(&tunnel->ref_count); WRITE_ONCE(session->tunnel, tunnel); list_add_rcu(&session->list, &tunnel->session_list); /* this makes session available to lockless getters */ if (tunnel->version == L2TP_HDR_VER_3) { if (!other_session) old = idr_replace(&pn->l2tp_v3_session_idr, session, session_key); } else { old = idr_replace(&pn->l2tp_v2_session_idr, session, session_key); } /* old should be NULL, unless something removed or modified * the IDR entry after our idr_alloc_32 above (which shouldn't * happen). */ WARN_ON_ONCE(old); out: spin_unlock_bh(&pn->l2tp_session_idr_lock); spin_unlock_bh(&tunnel->list_lock); if (!err) trace_register_session(session); return err; } EXPORT_SYMBOL_GPL(l2tp_session_register); /***************************************************************************** * Receive data handling *****************************************************************************/ /* Queue a skb in order. We come here only if the skb has an L2TP sequence * number. */ static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *skb) { struct sk_buff *skbp; struct sk_buff *tmp; u32 ns = L2TP_SKB_CB(skb)->ns; spin_lock_bh(&session->reorder_q.lock); skb_queue_walk_safe(&session->reorder_q, skbp, tmp) { if (L2TP_SKB_CB(skbp)->ns > ns) { __skb_queue_before(&session->reorder_q, skbp, skb); atomic_long_inc(&session->stats.rx_oos_packets); goto out; } } __skb_queue_tail(&session->reorder_q, skb); out: spin_unlock_bh(&session->reorder_q.lock); } /* Dequeue a single skb. */ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff *skb) { struct l2tp_tunnel *tunnel = session->tunnel; int length = L2TP_SKB_CB(skb)->length; /* We're about to requeue the skb, so return resources * to its current owner (a socket receive buffer). */ skb_orphan(skb); atomic_long_inc(&tunnel->stats.rx_packets); atomic_long_add(length, &tunnel->stats.rx_bytes); atomic_long_inc(&session->stats.rx_packets); atomic_long_add(length, &session->stats.rx_bytes); if (L2TP_SKB_CB(skb)->has_seq) { /* Bump our Nr */ session->nr++; session->nr &= session->nr_max; trace_session_seqnum_update(session); } /* call private receive handler */ if (session->recv_skb) (*session->recv_skb)(session, skb, L2TP_SKB_CB(skb)->length); else kfree_skb(skb); } /* Dequeue skbs from the session's reorder_q, subject to packet order. * Skbs that have been in the queue for too long are simply discarded. */ static void l2tp_recv_dequeue(struct l2tp_session *session) { struct sk_buff *skb; struct sk_buff *tmp; /* If the pkt at the head of the queue has the nr that we * expect to send up next, dequeue it and any other * in-sequence packets behind it. */ start: spin_lock_bh(&session->reorder_q.lock); skb_queue_walk_safe(&session->reorder_q, skb, tmp) { struct l2tp_skb_cb *cb = L2TP_SKB_CB(skb); /* If the packet has been pending on the queue for too long, discard it */ if (time_after(jiffies, cb->expires)) { atomic_long_inc(&session->stats.rx_seq_discards); atomic_long_inc(&session->stats.rx_errors); trace_session_pkt_expired(session, cb->ns); session->reorder_skip = 1; __skb_unlink(skb, &session->reorder_q); kfree_skb(skb); continue; } if (cb->has_seq) { if (session->reorder_skip) { session->reorder_skip = 0; session->nr = cb->ns; trace_session_seqnum_reset(session); } if (cb->ns != session->nr) goto out; } __skb_unlink(skb, &session->reorder_q); /* Process the skb. We release the queue lock while we * do so to let other contexts process the queue. */ spin_unlock_bh(&session->reorder_q.lock); l2tp_recv_dequeue_skb(session, skb); goto start; } out: spin_unlock_bh(&session->reorder_q.lock); } static int l2tp_seq_check_rx_window(struct l2tp_session *session, u32 nr) { u32 nws; if (nr >= session->nr) nws = nr - session->nr; else nws = (session->nr_max + 1) - (session->nr - nr); return nws < session->nr_window_size; } /* If packet has sequence numbers, queue it if acceptable. Returns 0 if * acceptable, else non-zero. */ static int l2tp_recv_data_seq(struct l2tp_session *session, struct sk_buff *skb) { struct l2tp_skb_cb *cb = L2TP_SKB_CB(skb); if (!l2tp_seq_check_rx_window(session, cb->ns)) { /* Packet sequence number is outside allowed window. * Discard it. */ trace_session_pkt_outside_rx_window(session, cb->ns); goto discard; } if (session->reorder_timeout != 0) { /* Packet reordering enabled. Add skb to session's * reorder queue, in order of ns. */ l2tp_recv_queue_skb(session, skb); goto out; } /* Packet reordering disabled. Discard out-of-sequence packets, while * tracking the number if in-sequence packets after the first OOS packet * is seen. After nr_oos_count_max in-sequence packets, reset the * sequence number to re-enable packet reception. */ if (cb->ns == session->nr) { skb_queue_tail(&session->reorder_q, skb); } else { u32 nr_oos = cb->ns; u32 nr_next = (session->nr_oos + 1) & session->nr_max; if (nr_oos == nr_next) session->nr_oos_count++; else session->nr_oos_count = 0; session->nr_oos = nr_oos; if (session->nr_oos_count > session->nr_oos_count_max) { session->reorder_skip = 1; } if (!session->reorder_skip) { atomic_long_inc(&session->stats.rx_seq_discards); trace_session_pkt_oos(session, cb->ns); goto discard; } skb_queue_tail(&session->reorder_q, skb); } out: return 0; discard: return 1; } /* Do receive processing of L2TP data frames. We handle both L2TPv2 * and L2TPv3 data frames here. * * L2TPv2 Data Message Header * * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |T|L|x|x|S|x|O|P|x|x|x|x| Ver | Length (opt) | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Tunnel ID | Session ID | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Ns (opt) | Nr (opt) | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Offset Size (opt) | Offset pad... (opt) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * Data frames are marked by T=0. All other fields are the same as * those in L2TP control frames. * * L2TPv3 Data Message Header * * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | L2TP Session Header | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | L2-Specific Sublayer | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Tunnel Payload ... * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * L2TPv3 Session Header Over IP * * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Session ID | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Cookie (optional, maximum 64 bits)... * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * L2TPv3 L2-Specific Sublayer Format * * 0 1 2 3 * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |x|S|x|x|x|x|x|x| Sequence Number | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * Cookie value and sublayer format are negotiated with the peer when * the session is set up. Unlike L2TPv2, we do not need to parse the * packet header to determine if optional fields are present. * * Caller must already have parsed the frame and determined that it is * a data (not control) frame before coming here. Fields up to the * session-id have already been parsed and ptr points to the data * after the session-id. */ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length) { struct l2tp_tunnel *tunnel = session->tunnel; int offset; /* Parse and check optional cookie */ if (session->peer_cookie_len > 0) { if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) { pr_debug_ratelimited("%s: cookie mismatch (%u/%u). Discarding.\n", tunnel->name, tunnel->tunnel_id, session->session_id); atomic_long_inc(&session->stats.rx_cookie_discards); goto discard; } ptr += session->peer_cookie_len; } /* Handle the optional sequence numbers. Sequence numbers are * in different places for L2TPv2 and L2TPv3. * * If we are the LAC, enable/disable sequence numbers under * the control of the LNS. If no sequence numbers present but * we were expecting them, discard frame. */ L2TP_SKB_CB(skb)->has_seq = 0; if (tunnel->version == L2TP_HDR_VER_2) { if (hdrflags & L2TP_HDRFLAG_S) { /* Store L2TP info in the skb */ L2TP_SKB_CB(skb)->ns = ntohs(*(__be16 *)ptr); L2TP_SKB_CB(skb)->has_seq = 1; ptr += 2; /* Skip past nr in the header */ ptr += 2; } } else if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) { u32 l2h = ntohl(*(__be32 *)ptr); if (l2h & 0x40000000) { /* Store L2TP info in the skb */ L2TP_SKB_CB(skb)->ns = l2h & 0x00ffffff; L2TP_SKB_CB(skb)->has_seq = 1; } ptr += 4; } if (L2TP_SKB_CB(skb)->has_seq) { /* Received a packet with sequence numbers. If we're the LAC, * check if we sre sending sequence numbers and if not, * configure it so. */ if (!session->lns_mode && !session->send_seq) { trace_session_seqnum_lns_enable(session); session->send_seq = 1; l2tp_session_set_header_len(session, tunnel->version, tunnel->encap); } } else { /* No sequence numbers. * If user has configured mandatory sequence numbers, discard. */ if (session->recv_seq) { pr_debug_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n", session->name); atomic_long_inc(&session->stats.rx_seq_discards); goto discard; } /* If we're the LAC and we're sending sequence numbers, the * LNS has requested that we no longer send sequence numbers. * If we're the LNS and we're sending sequence numbers, the * LAC is broken. Discard the frame. */ if (!session->lns_mode && session->send_seq) { trace_session_seqnum_lns_disable(session); session->send_seq = 0; l2tp_session_set_header_len(session, tunnel->version, tunnel->encap); } else if (session->send_seq) { pr_debug_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n", session->name); atomic_long_inc(&session->stats.rx_seq_discards); goto discard; } } /* Session data offset is defined only for L2TPv2 and is * indicated by an optional 16-bit value in the header. */ if (tunnel->version == L2TP_HDR_VER_2) { /* If offset bit set, skip it. */ if (hdrflags & L2TP_HDRFLAG_O) { offset = ntohs(*(__be16 *)ptr); ptr += 2 + offset; } } offset = ptr - optr; if (!pskb_may_pull(skb, offset)) goto discard; __skb_pull(skb, offset); /* Prepare skb for adding to the session's reorder_q. Hold * packets for max reorder_timeout or 1 second if not * reordering. */ L2TP_SKB_CB(skb)->length = length; L2TP_SKB_CB(skb)->expires = jiffies + (session->reorder_timeout ? session->reorder_timeout : HZ); /* Add packet to the session's receive queue. Reordering is done here, if * enabled. Saved L2TP protocol info is stored in skb->sb[]. */ if (L2TP_SKB_CB(skb)->has_seq) { if (l2tp_recv_data_seq(session, skb)) goto discard; } else { /* No sequence numbers. Add the skb to the tail of the * reorder queue. This ensures that it will be * delivered after all previous sequenced skbs. */ skb_queue_tail(&session->reorder_q, skb); } /* Try to dequeue as many skbs from reorder_q as we can. */ l2tp_recv_dequeue(session); return; discard: atomic_long_inc(&session->stats.rx_errors); kfree_skb(skb); } EXPORT_SYMBOL_GPL(l2tp_recv_common); /* Drop skbs from the session's reorder_q */ static void l2tp_session_queue_purge(struct l2tp_session *session) { struct sk_buff *skb = NULL; while ((skb = skb_dequeue(&session->reorder_q))) { atomic_long_inc(&session->stats.rx_errors); kfree_skb(skb); } } /* UDP encapsulation receive handler. See net/ipv4/udp.c for details. */ int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { struct l2tp_session *session = NULL; struct l2tp_tunnel *tunnel = NULL; struct net *net = sock_net(sk); unsigned char *ptr, *optr; u16 hdrflags; u16 version; int length; /* UDP has verified checksum */ /* UDP always verifies the packet length. */ __skb_pull(skb, sizeof(struct udphdr)); /* Short packet? */ if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) goto pass; /* Point to L2TP header */ optr = skb->data; ptr = skb->data; /* Get L2TP header flags */ hdrflags = ntohs(*(__be16 *)ptr); /* Get protocol version */ version = hdrflags & L2TP_HDR_VER_MASK; /* Get length of L2TP packet */ length = skb->len; /* If type is control packet, it is handled by userspace. */ if (hdrflags & L2TP_HDRFLAG_T) goto pass; /* Skip flags */ ptr += 2; if (version == L2TP_HDR_VER_2) { u16 tunnel_id, session_id; /* If length is present, skip it */ if (hdrflags & L2TP_HDRFLAG_L) ptr += 2; /* Extract tunnel and session ID */ tunnel_id = ntohs(*(__be16 *)ptr); ptr += 2; session_id = ntohs(*(__be16 *)ptr); ptr += 2; session = l2tp_v2_session_get(net, tunnel_id, session_id); } else { u32 session_id; ptr += 2; /* skip reserved bits */ session_id = ntohl(*(__be32 *)ptr); ptr += 4; session = l2tp_v3_session_get(net, sk, session_id); } if (!session || !session->recv_skb) { if (session) l2tp_session_put(session); /* Not found? Pass to userspace to deal with */ goto pass; } tunnel = session->tunnel; /* Check protocol version */ if (version != tunnel->version) { l2tp_session_put(session); goto invalid; } if (version == L2TP_HDR_VER_3 && l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) { l2tp_session_put(session); goto invalid; } l2tp_recv_common(session, skb, ptr, optr, hdrflags, length); l2tp_session_put(session); return 0; invalid: atomic_long_inc(&tunnel->stats.rx_invalid); pass: /* Put UDP header back */ __skb_push(skb, sizeof(struct udphdr)); return 1; } EXPORT_SYMBOL_GPL(l2tp_udp_encap_recv); /* UDP encapsulation receive error handler. See net/ipv4/udp.c for details. */ static void l2tp_udp_encap_err_recv(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload) { sk->sk_err = err; sk_error_report(sk); if (ip_hdr(skb)->version == IPVERSION) { if (inet_test_bit(RECVERR, sk)) return ip_icmp_error(sk, skb, err, port, info, payload); #if IS_ENABLED(CONFIG_IPV6) } else { if (inet6_test_bit(RECVERR6, sk)) return ipv6_icmp_error(sk, skb, err, port, info, payload); #endif } } /************************************************************************ * Transmit handling ***********************************************************************/ /* Build an L2TP header for the session into the buffer provided. */ static int l2tp_build_l2tpv2_header(struct l2tp_session *session, void *buf) { struct l2tp_tunnel *tunnel = session->tunnel; __be16 *bufp = buf; __be16 *optr = buf; u16 flags = L2TP_HDR_VER_2; u32 tunnel_id = tunnel->peer_tunnel_id; u32 session_id = session->peer_session_id; if (session->send_seq) flags |= L2TP_HDRFLAG_S; /* Setup L2TP header. */ *bufp++ = htons(flags); *bufp++ = htons(tunnel_id); *bufp++ = htons(session_id); if (session->send_seq) { *bufp++ = htons(session->ns); *bufp++ = 0; session->ns++; session->ns &= 0xffff; trace_session_seqnum_update(session); } return bufp - optr; } static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf) { struct l2tp_tunnel *tunnel = session->tunnel; char *bufp = buf; char *optr = bufp; /* Setup L2TP header. The header differs slightly for UDP and * IP encapsulations. For UDP, there is 4 bytes of flags. */ if (tunnel->encap == L2TP_ENCAPTYPE_UDP) { u16 flags = L2TP_HDR_VER_3; *((__be16 *)bufp) = htons(flags); bufp += 2; *((__be16 *)bufp) = 0; bufp += 2; } *((__be32 *)bufp) = htonl(session->peer_session_id); bufp += 4; if (session->cookie_len) { memcpy(bufp, &session->cookie[0], session->cookie_len); bufp += session->cookie_len; } if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) { u32 l2h = 0; if (session->send_seq) { l2h = 0x40000000 | session->ns; session->ns++; session->ns &= 0xffffff; trace_session_seqnum_update(session); } *((__be32 *)bufp) = htonl(l2h); bufp += 4; } return bufp - optr; } /* Queue the packet to IP for output: tunnel socket lock must be held */ static int l2tp_xmit_queue(struct l2tp_tunnel *tunnel, struct sk_buff *skb, struct flowi *fl) { int err; skb->ignore_df = 1; skb_dst_drop(skb); #if IS_ENABLED(CONFIG_IPV6) if (l2tp_sk_is_v6(tunnel->sock)) err = inet6_csk_xmit(tunnel->sock, skb, NULL); else #endif err = ip_queue_xmit(tunnel->sock, skb, fl); return err >= 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP; } static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, unsigned int *len) { struct l2tp_tunnel *tunnel = session->tunnel; unsigned int data_len = skb->len; struct sock *sk = tunnel->sock; int headroom, uhlen, udp_len; int ret = NET_XMIT_SUCCESS; struct inet_sock *inet; struct udphdr *uh; /* Check that there's enough headroom in the skb to insert IP, * UDP and L2TP headers. If not enough, expand it to * make room. Adjust truesize. */ uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(*uh) : 0; headroom = NET_SKB_PAD + sizeof(struct iphdr) + uhlen + session->hdr_len; if (skb_cow_head(skb, headroom)) { kfree_skb(skb); return NET_XMIT_DROP; } /* Setup L2TP header */ if (tunnel->version == L2TP_HDR_VER_2) l2tp_build_l2tpv2_header(session, __skb_push(skb, session->hdr_len)); else l2tp_build_l2tpv3_header(session, __skb_push(skb, session->hdr_len)); /* Reset control buffer */ memset(skb->cb, 0, sizeof(skb->cb)); nf_reset_ct(skb); /* L2TP uses its own lockdep subclass to avoid lockdep splats caused by * nested socket calls on the same lockdep socket class. This can * happen when data from a user socket is routed over l2tp, which uses * another userspace socket. */ spin_lock_nested(&sk->sk_lock.slock, L2TP_DEPTH_NESTING); if (sock_owned_by_user(sk)) { kfree_skb(skb); ret = NET_XMIT_DROP; goto out_unlock; } /* The user-space may change the connection status for the user-space * provided socket at run time: we must check it under the socket lock */ if (tunnel->fd >= 0 && sk->sk_state != TCP_ESTABLISHED) { kfree_skb(skb); ret = NET_XMIT_DROP; goto out_unlock; } /* Report transmitted length before we add encap header, which keeps * statistics consistent for both UDP and IP encap tx/rx paths. */ *len = skb->len; inet = inet_sk(sk); switch (tunnel->encap) { case L2TP_ENCAPTYPE_UDP: /* Setup UDP header */ __skb_push(skb, sizeof(*uh)); skb_reset_transport_header(skb); uh = udp_hdr(skb); uh->source = inet->inet_sport; uh->dest = inet->inet_dport; udp_len = uhlen + session->hdr_len + data_len; uh->len = htons(udp_len); /* Calculate UDP checksum if configured to do so */ #if IS_ENABLED(CONFIG_IPV6) if (l2tp_sk_is_v6(sk)) udp6_set_csum(udp_get_no_check6_tx(sk), skb, &inet6_sk(sk)->saddr, &sk->sk_v6_daddr, udp_len); else #endif udp_set_csum(sk->sk_no_check_tx, skb, inet->inet_saddr, inet->inet_daddr, udp_len); break; case L2TP_ENCAPTYPE_IP: break; } ret = l2tp_xmit_queue(tunnel, skb, &inet->cork.fl); out_unlock: spin_unlock(&sk->sk_lock.slock); return ret; } /* If caller requires the skb to have a ppp header, the header must be * inserted in the skb data before calling this function. */ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb) { unsigned int len = 0; int ret; ret = l2tp_xmit_core(session, skb, &len); if (ret == NET_XMIT_SUCCESS) { atomic_long_inc(&session->tunnel->stats.tx_packets); atomic_long_add(len, &session->tunnel->stats.tx_bytes); atomic_long_inc(&session->stats.tx_packets); atomic_long_add(len, &session->stats.tx_bytes); } else { atomic_long_inc(&session->tunnel->stats.tx_errors); atomic_long_inc(&session->stats.tx_errors); } return ret; } EXPORT_SYMBOL_GPL(l2tp_xmit_skb); /***************************************************************************** * Tinnel and session create/destroy. *****************************************************************************/ /* Remove an l2tp session from l2tp_core's lists. */ static void l2tp_session_unhash(struct l2tp_session *session) { struct l2tp_tunnel *tunnel = session->tunnel; if (tunnel) { struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); struct l2tp_session *removed = session; spin_lock_bh(&tunnel->list_lock); spin_lock_bh(&pn->l2tp_session_idr_lock); /* Remove from the per-tunnel list */ list_del_init(&session->list); /* Remove from per-net IDR */ if (tunnel->version == L2TP_HDR_VER_3) { if (hash_hashed(&session->hlist)) l2tp_session_collision_del(pn, session); else removed = idr_remove(&pn->l2tp_v3_session_idr, session->session_id); } else { u32 session_key = l2tp_v2_session_key(tunnel->tunnel_id, session->session_id); removed = idr_remove(&pn->l2tp_v2_session_idr, session_key); } WARN_ON_ONCE(removed && removed != session); spin_unlock_bh(&pn->l2tp_session_idr_lock); spin_unlock_bh(&tunnel->list_lock); } } /* When the tunnel is closed, all the attached sessions need to go too. */ static void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) { struct l2tp_session *session; spin_lock_bh(&tunnel->list_lock); tunnel->acpt_newsess = false; list_for_each_entry(session, &tunnel->session_list, list) l2tp_session_delete(session); spin_unlock_bh(&tunnel->list_lock); } /* Tunnel socket destroy hook for UDP encapsulation */ static void l2tp_udp_encap_destroy(struct sock *sk) { struct l2tp_tunnel *tunnel; tunnel = l2tp_sk_to_tunnel(sk); if (tunnel) { l2tp_tunnel_delete(tunnel); l2tp_tunnel_put(tunnel); } } static void l2tp_tunnel_remove(struct net *net, struct l2tp_tunnel *tunnel) { struct l2tp_net *pn = l2tp_pernet(net); spin_lock_bh(&pn->l2tp_tunnel_idr_lock); idr_remove(&pn->l2tp_tunnel_idr, tunnel->tunnel_id); spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); } /* Workqueue tunnel deletion function */ static void l2tp_tunnel_del_work(struct work_struct *work) { struct l2tp_tunnel *tunnel = container_of(work, struct l2tp_tunnel, del_work); l2tp_tunnel_closeall(tunnel); /* If the tunnel socket was created within the kernel, use * the sk API to release it here. */ if (tunnel->fd < 0) { struct socket *sock = tunnel->sock->sk_socket; if (sock) { kernel_sock_shutdown(sock, SHUT_RDWR); sock_release(sock); } } l2tp_tunnel_remove(tunnel->l2tp_net, tunnel); /* drop initial ref */ l2tp_tunnel_put(tunnel); /* drop workqueue ref */ l2tp_tunnel_put(tunnel); } /* Create a socket for the tunnel, if one isn't set up by * userspace. This is used for static tunnels where there is no * managing L2TP daemon. * * Since we don't want these sockets to keep a namespace alive by * themselves, we drop the socket's namespace refcount after creation. * These sockets are freed when the namespace exits using the pernet * exit hook. */ static int l2tp_tunnel_sock_create(struct net *net, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct socket **sockp) { int err = -EINVAL; struct socket *sock = NULL; struct udp_port_cfg udp_conf; switch (cfg->encap) { case L2TP_ENCAPTYPE_UDP: memset(&udp_conf, 0, sizeof(udp_conf)); #if IS_ENABLED(CONFIG_IPV6) if (cfg->local_ip6 && cfg->peer_ip6) { udp_conf.family = AF_INET6; memcpy(&udp_conf.local_ip6, cfg->local_ip6, sizeof(udp_conf.local_ip6)); memcpy(&udp_conf.peer_ip6, cfg->peer_ip6, sizeof(udp_conf.peer_ip6)); udp_conf.use_udp6_tx_checksums = !cfg->udp6_zero_tx_checksums; udp_conf.use_udp6_rx_checksums = !cfg->udp6_zero_rx_checksums; } else #endif { udp_conf.family = AF_INET; udp_conf.local_ip = cfg->local_ip; udp_conf.peer_ip = cfg->peer_ip; udp_conf.use_udp_checksums = cfg->use_udp_checksums; } udp_conf.local_udp_port = htons(cfg->local_udp_port); udp_conf.peer_udp_port = htons(cfg->peer_udp_port); err = udp_sock_create(net, &udp_conf, &sock); if (err < 0) goto out; break; case L2TP_ENCAPTYPE_IP: #if IS_ENABLED(CONFIG_IPV6) if (cfg->local_ip6 && cfg->peer_ip6) { struct sockaddr_l2tpip6 ip6_addr = {0}; err = sock_create_kern(net, AF_INET6, SOCK_DGRAM, IPPROTO_L2TP, &sock); if (err < 0) goto out; ip6_addr.l2tp_family = AF_INET6; memcpy(&ip6_addr.l2tp_addr, cfg->local_ip6, sizeof(ip6_addr.l2tp_addr)); ip6_addr.l2tp_conn_id = tunnel_id; err = kernel_bind(sock, (struct sockaddr_unsized *)&ip6_addr, sizeof(ip6_addr)); if (err < 0) goto out; ip6_addr.l2tp_family = AF_INET6; memcpy(&ip6_addr.l2tp_addr, cfg->peer_ip6, sizeof(ip6_addr.l2tp_addr)); ip6_addr.l2tp_conn_id = peer_tunnel_id; err = kernel_connect(sock, (struct sockaddr_unsized *)&ip6_addr, sizeof(ip6_addr), 0); if (err < 0) goto out; } else #endif { struct sockaddr_l2tpip ip_addr = {0}; err = sock_create_kern(net, AF_INET, SOCK_DGRAM, IPPROTO_L2TP, &sock); if (err < 0) goto out; ip_addr.l2tp_family = AF_INET; ip_addr.l2tp_addr = cfg->local_ip; ip_addr.l2tp_conn_id = tunnel_id; err = kernel_bind(sock, (struct sockaddr_unsized *)&ip_addr, sizeof(ip_addr)); if (err < 0) goto out; ip_addr.l2tp_family = AF_INET; ip_addr.l2tp_addr = cfg->peer_ip; ip_addr.l2tp_conn_id = peer_tunnel_id; err = kernel_connect(sock, (struct sockaddr_unsized *)&ip_addr, sizeof(ip_addr), 0); if (err < 0) goto out; } break; default: goto out; } out: *sockp = sock; if (err < 0 && sock) { kernel_sock_shutdown(sock, SHUT_RDWR); sock_release(sock); *sockp = NULL; } return err; } int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp) { struct l2tp_tunnel *tunnel = NULL; int err; enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP; if (cfg) encap = cfg->encap; tunnel = kzalloc(sizeof(*tunnel), GFP_KERNEL); if (!tunnel) { err = -ENOMEM; goto err; } tunnel->version = version; tunnel->tunnel_id = tunnel_id; tunnel->peer_tunnel_id = peer_tunnel_id; sprintf(&tunnel->name[0], "tunl %u", tunnel_id); spin_lock_init(&tunnel->list_lock); tunnel->acpt_newsess = true; INIT_LIST_HEAD(&tunnel->session_list); tunnel->encap = encap; refcount_set(&tunnel->ref_count, 1); tunnel->fd = fd; /* Init delete workqueue struct */ INIT_WORK(&tunnel->del_work, l2tp_tunnel_del_work); err = 0; err: if (tunnelp) *tunnelp = tunnel; return err; } EXPORT_SYMBOL_GPL(l2tp_tunnel_create); static int l2tp_validate_socket(const struct sock *sk, const struct net *net, enum l2tp_encap_type encap) { struct l2tp_tunnel *tunnel; if (!net_eq(sock_net(sk), net)) return -EINVAL; if (sk->sk_type != SOCK_DGRAM) return -EPROTONOSUPPORT; if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6) return -EPROTONOSUPPORT; if ((encap == L2TP_ENCAPTYPE_UDP && sk->sk_protocol != IPPROTO_UDP) || (encap == L2TP_ENCAPTYPE_IP && sk->sk_protocol != IPPROTO_L2TP)) return -EPROTONOSUPPORT; if (encap == L2TP_ENCAPTYPE_UDP && sk->sk_user_data) return -EBUSY; tunnel = l2tp_sk_to_tunnel(sk); if (tunnel) { l2tp_tunnel_put(tunnel); return -EBUSY; } return 0; } int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, struct l2tp_tunnel_cfg *cfg) { struct l2tp_net *pn = l2tp_pernet(net); u32 tunnel_id = tunnel->tunnel_id; struct socket *sock; struct sock *sk; int ret; spin_lock_bh(&pn->l2tp_tunnel_idr_lock); ret = idr_alloc_u32(&pn->l2tp_tunnel_idr, NULL, &tunnel_id, tunnel_id, GFP_ATOMIC); spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); if (ret) return ret == -ENOSPC ? -EEXIST : ret; if (tunnel->fd < 0) { ret = l2tp_tunnel_sock_create(net, tunnel->tunnel_id, tunnel->peer_tunnel_id, cfg, &sock); if (ret < 0) goto err; } else { sock = sockfd_lookup(tunnel->fd, &ret); if (!sock) goto err; } sk = sock->sk; lock_sock(sk); write_lock_bh(&sk->sk_callback_lock); ret = l2tp_validate_socket(sk, net, tunnel->encap); if (ret < 0) goto err_inval_sock; write_unlock_bh(&sk->sk_callback_lock); if (tunnel->encap == L2TP_ENCAPTYPE_UDP) { struct udp_tunnel_sock_cfg udp_cfg = { .encap_type = UDP_ENCAP_L2TPINUDP, .encap_rcv = l2tp_udp_encap_recv, .encap_err_rcv = l2tp_udp_encap_err_recv, .encap_destroy = l2tp_udp_encap_destroy, }; setup_udp_tunnel_sock(net, sock, &udp_cfg); } sk->sk_allocation = GFP_ATOMIC; release_sock(sk); sock_hold(sk); tunnel->sock = sk; tunnel->l2tp_net = net; spin_lock_bh(&pn->l2tp_tunnel_idr_lock); idr_replace(&pn->l2tp_tunnel_idr, tunnel, tunnel->tunnel_id); spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); trace_register_tunnel(tunnel); if (tunnel->fd >= 0) sockfd_put(sock); return 0; err_inval_sock: write_unlock_bh(&sk->sk_callback_lock); release_sock(sk); if (tunnel->fd < 0) sock_release(sock); else sockfd_put(sock); err: l2tp_tunnel_remove(net, tunnel); return ret; } EXPORT_SYMBOL_GPL(l2tp_tunnel_register); /* This function is used by the netlink TUNNEL_DELETE command. */ void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) { if (!test_and_set_bit(0, &tunnel->dead)) { trace_delete_tunnel(tunnel); refcount_inc(&tunnel->ref_count); queue_work(l2tp_wq, &tunnel->del_work); } } EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); void l2tp_session_delete(struct l2tp_session *session) { if (!test_and_set_bit(0, &session->dead)) { trace_delete_session(session); refcount_inc(&session->ref_count); queue_work(l2tp_wq, &session->del_work); } } EXPORT_SYMBOL_GPL(l2tp_session_delete); /* Workqueue session deletion function */ static void l2tp_session_del_work(struct work_struct *work) { struct l2tp_session *session = container_of(work, struct l2tp_session, del_work); l2tp_session_unhash(session); l2tp_session_queue_purge(session); if (session->session_close) (*session->session_close)(session); /* drop initial ref */ l2tp_session_put(session); /* drop workqueue ref */ l2tp_session_put(session); } /* We come here whenever a session's send_seq, cookie_len or * l2specific_type parameters are set. */ void l2tp_session_set_header_len(struct l2tp_session *session, int version, enum l2tp_encap_type encap) { if (version == L2TP_HDR_VER_2) { session->hdr_len = 6; if (session->send_seq) session->hdr_len += 4; } else { session->hdr_len = 4 + session->cookie_len; session->hdr_len += l2tp_get_l2specific_len(session); if (encap == L2TP_ENCAPTYPE_UDP) session->hdr_len += 4; } } EXPORT_SYMBOL_GPL(l2tp_session_set_header_len); struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) { struct l2tp_session *session; session = kzalloc(sizeof(*session) + priv_size, GFP_KERNEL); if (session) { session->magic = L2TP_SESSION_MAGIC; session->session_id = session_id; session->peer_session_id = peer_session_id; session->nr = 0; if (tunnel->version == L2TP_HDR_VER_2) session->nr_max = 0xffff; else session->nr_max = 0xffffff; session->nr_window_size = session->nr_max / 2; session->nr_oos_count_max = 4; /* Use NR of first received packet */ session->reorder_skip = 1; sprintf(&session->name[0], "sess %u/%u", tunnel->tunnel_id, session->session_id); skb_queue_head_init(&session->reorder_q); session->hlist_key = l2tp_v3_session_hashkey(tunnel->sock, session->session_id); INIT_HLIST_NODE(&session->hlist); INIT_LIST_HEAD(&session->clist); INIT_LIST_HEAD(&session->list); INIT_WORK(&session->del_work, l2tp_session_del_work); if (cfg) { session->pwtype = cfg->pw_type; session->send_seq = cfg->send_seq; session->recv_seq = cfg->recv_seq; session->lns_mode = cfg->lns_mode; session->reorder_timeout = cfg->reorder_timeout; session->l2specific_type = cfg->l2specific_type; session->cookie_len = cfg->cookie_len; memcpy(&session->cookie[0], &cfg->cookie[0], cfg->cookie_len); session->peer_cookie_len = cfg->peer_cookie_len; memcpy(&session->peer_cookie[0], &cfg->peer_cookie[0], cfg->peer_cookie_len); } l2tp_session_set_header_len(session, tunnel->version, tunnel->encap); refcount_set(&session->ref_count, 1); return session; } return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL_GPL(l2tp_session_create); /***************************************************************************** * Init and cleanup *****************************************************************************/ static __net_init int l2tp_init_net(struct net *net) { struct l2tp_net *pn = net_generic(net, l2tp_net_id); idr_init(&pn->l2tp_tunnel_idr); spin_lock_init(&pn->l2tp_tunnel_idr_lock); idr_init(&pn->l2tp_v2_session_idr); idr_init(&pn->l2tp_v3_session_idr); spin_lock_init(&pn->l2tp_session_idr_lock); return 0; } static __net_exit void l2tp_pre_exit_net(struct net *net) { struct l2tp_net *pn = l2tp_pernet(net); struct l2tp_tunnel *tunnel = NULL; unsigned long tunnel_id, tmp; rcu_read_lock_bh(); idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) { if (tunnel) l2tp_tunnel_delete(tunnel); } rcu_read_unlock_bh(); if (l2tp_wq) { /* Run all TUNNEL_DELETE work items just queued. */ __flush_workqueue(l2tp_wq); /* Each TUNNEL_DELETE work item will queue a SESSION_DELETE * work item for each session in the tunnel. Flush the * workqueue again to process these. */ __flush_workqueue(l2tp_wq); } } static int l2tp_idr_item_unexpected(int id, void *p, void *data) { const char *idr_name = data; pr_err("l2tp: %s IDR not empty at net %d exit\n", idr_name, id); WARN_ON_ONCE(1); return 1; } static __net_exit void l2tp_exit_net(struct net *net) { struct l2tp_net *pn = l2tp_pernet(net); /* Our per-net IDRs should be empty. Check that is so, to * help catch cleanup races or refcnt leaks. */ idr_for_each(&pn->l2tp_v2_session_idr, l2tp_idr_item_unexpected, "v2_session"); idr_for_each(&pn->l2tp_v3_session_idr, l2tp_idr_item_unexpected, "v3_session"); idr_for_each(&pn->l2tp_tunnel_idr, l2tp_idr_item_unexpected, "tunnel"); idr_destroy(&pn->l2tp_v2_session_idr); idr_destroy(&pn->l2tp_v3_session_idr); idr_destroy(&pn->l2tp_tunnel_idr); } static struct pernet_operations l2tp_net_ops = { .init = l2tp_init_net, .exit = l2tp_exit_net, .pre_exit = l2tp_pre_exit_net, .id = &l2tp_net_id, .size = sizeof(struct l2tp_net), }; static int __init l2tp_init(void) { int rc = 0; rc = register_pernet_device(&l2tp_net_ops); if (rc) goto out; l2tp_wq = alloc_workqueue("l2tp", WQ_UNBOUND, 0); if (!l2tp_wq) { pr_err("alloc_workqueue failed\n"); unregister_pernet_device(&l2tp_net_ops); rc = -ENOMEM; goto out; } pr_info("L2TP core driver, %s\n", L2TP_DRV_VERSION); out: return rc; } static void __exit l2tp_exit(void) { unregister_pernet_device(&l2tp_net_ops); if (l2tp_wq) { destroy_workqueue(l2tp_wq); l2tp_wq = NULL; } } module_init(l2tp_init); module_exit(l2tp_exit); MODULE_AUTHOR("James Chapman <jchapman@katalix.com>"); MODULE_DESCRIPTION("L2TP core"); MODULE_LICENSE("GPL"); MODULE_VERSION(L2TP_DRV_VERSION);
2 4 70 1 2 67 2 2 7 82 2 2 2 2 2 2 518 520 8 41 10 41 1 515 103 103 35 10 7 10 7 7 11 18 22 9 13 31 31 63 5 57 7 78 77 78 3 74 7 7 7 1 7 5 1 36 2 1 32 1 24 7 18 13 33 45 10 47 22 8 8 1 7 7 1 37 15 24 31 10 35 4 17 22 39 27 27 22 5 18 18 18 7 10 28 28 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 // SPDX-License-Identifier: GPL-2.0 #include <linux/types.h> #include <linux/errno.h> #include <linux/kmod.h> #include <linux/sched.h> #include <linux/interrupt.h> #include <linux/tty.h> #include <linux/tty_driver.h> #include <linux/file.h> #include <linux/mm.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/poll.h> #include <linux/proc_fs.h> #include <linux/module.h> #include <linux/device.h> #include <linux/wait.h> #include <linux/bitops.h> #include <linux/seq_file.h> #include <linux/uaccess.h> #include <linux/ratelimit.h> #include "tty.h" #undef LDISC_DEBUG_HANGUP #ifdef LDISC_DEBUG_HANGUP #define tty_ldisc_debug(tty, f, args...) tty_debug(tty, f, ##args) #else #define tty_ldisc_debug(tty, f, args...) #endif /* lockdep nested classes for tty->ldisc_sem */ enum { LDISC_SEM_NORMAL, LDISC_SEM_OTHER, }; /* * This guards the refcounted line discipline lists. The lock * must be taken with irqs off because there are hangup path * callers who will do ldisc lookups and cannot sleep. */ static DEFINE_RAW_SPINLOCK(tty_ldiscs_lock); /* Line disc dispatch table */ static struct tty_ldisc_ops *tty_ldiscs[NR_LDISCS]; /** * tty_register_ldisc - install a line discipline * @new_ldisc: pointer to the ldisc object * * Installs a new line discipline into the kernel. The discipline is set up as * unreferenced and then made available to the kernel from this point onwards. * * Locking: takes %tty_ldiscs_lock to guard against ldisc races */ int tty_register_ldisc(struct tty_ldisc_ops *new_ldisc) { unsigned long flags; if (new_ldisc->num < N_TTY || new_ldisc->num >= NR_LDISCS) return -EINVAL; raw_spin_lock_irqsave(&tty_ldiscs_lock, flags); tty_ldiscs[new_ldisc->num] = new_ldisc; raw_spin_unlock_irqrestore(&tty_ldiscs_lock, flags); return 0; } EXPORT_SYMBOL(tty_register_ldisc); /** * tty_unregister_ldisc - unload a line discipline * @ldisc: ldisc number * * Remove a line discipline from the kernel providing it is not currently in * use. * * Locking: takes %tty_ldiscs_lock to guard against ldisc races */ void tty_unregister_ldisc(struct tty_ldisc_ops *ldisc) { unsigned long flags; raw_spin_lock_irqsave(&tty_ldiscs_lock, flags); tty_ldiscs[ldisc->num] = NULL; raw_spin_unlock_irqrestore(&tty_ldiscs_lock, flags); } EXPORT_SYMBOL(tty_unregister_ldisc); static struct tty_ldisc_ops *get_ldops(int disc) { unsigned long flags; struct tty_ldisc_ops *ldops, *ret; raw_spin_lock_irqsave(&tty_ldiscs_lock, flags); ret = ERR_PTR(-EINVAL); ldops = tty_ldiscs[disc]; if (ldops) { ret = ERR_PTR(-EAGAIN); if (try_module_get(ldops->owner)) ret = ldops; } raw_spin_unlock_irqrestore(&tty_ldiscs_lock, flags); return ret; } static void put_ldops(struct tty_ldisc_ops *ldops) { unsigned long flags; raw_spin_lock_irqsave(&tty_ldiscs_lock, flags); module_put(ldops->owner); raw_spin_unlock_irqrestore(&tty_ldiscs_lock, flags); } int tty_ldisc_autoload = IS_BUILTIN(CONFIG_LDISC_AUTOLOAD); /** * tty_ldisc_get - take a reference to an ldisc * @tty: tty device * @disc: ldisc number * * Takes a reference to a line discipline. Deals with refcounts and module * locking counts. If the discipline is not available, its module loaded, if * possible. * * Returns: * * -%EINVAL if the discipline index is not [%N_TTY .. %NR_LDISCS] or if the * discipline is not registered * * -%EAGAIN if request_module() failed to load or register the discipline * * -%ENOMEM if allocation failure * * Otherwise, returns a pointer to the discipline and bumps the ref count * * Locking: takes %tty_ldiscs_lock to guard against ldisc races */ static struct tty_ldisc *tty_ldisc_get(struct tty_struct *tty, int disc) { struct tty_ldisc *ld; struct tty_ldisc_ops *ldops; if (disc < N_TTY || disc >= NR_LDISCS) return ERR_PTR(-EINVAL); /* * Get the ldisc ops - we may need to request them to be loaded * dynamically and try again. */ ldops = get_ldops(disc); if (IS_ERR(ldops)) { if (!capable(CAP_SYS_MODULE) && !tty_ldisc_autoload) return ERR_PTR(-EPERM); request_module("tty-ldisc-%d", disc); ldops = get_ldops(disc); if (IS_ERR(ldops)) return ERR_CAST(ldops); } /* * There is no way to handle allocation failure of only 16 bytes. * Let's simplify error handling and save more memory. */ ld = kmalloc(sizeof(struct tty_ldisc), GFP_KERNEL | __GFP_NOFAIL); ld->ops = ldops; ld->tty = tty; return ld; } /** * tty_ldisc_put - release the ldisc * @ld: lisdsc to release * * Complement of tty_ldisc_get(). */ static void tty_ldisc_put(struct tty_ldisc *ld) { if (WARN_ON_ONCE(!ld)) return; put_ldops(ld->ops); kfree(ld); } static void *tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos) { return (*pos < NR_LDISCS) ? pos : NULL; } static void *tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos) { (*pos)++; return (*pos < NR_LDISCS) ? pos : NULL; } static void tty_ldiscs_seq_stop(struct seq_file *m, void *v) { } static int tty_ldiscs_seq_show(struct seq_file *m, void *v) { int i = *(loff_t *)v; struct tty_ldisc_ops *ldops; ldops = get_ldops(i); if (IS_ERR(ldops)) return 0; seq_printf(m, "%-10s %2d\n", ldops->name ? ldops->name : "???", i); put_ldops(ldops); return 0; } const struct seq_operations tty_ldiscs_seq_ops = { .start = tty_ldiscs_seq_start, .next = tty_ldiscs_seq_next, .stop = tty_ldiscs_seq_stop, .show = tty_ldiscs_seq_show, }; /** * tty_ldisc_ref_wait - wait for the tty ldisc * @tty: tty device * * Dereference the line discipline for the terminal and take a reference to it. * If the line discipline is in flux then wait patiently until it changes. * * Returns: %NULL if the tty has been hungup and not re-opened with a new file * descriptor, otherwise valid ldisc reference * * Note 1: Must not be called from an IRQ/timer context. The caller must also * be careful not to hold other locks that will deadlock against a discipline * change, such as an existing ldisc reference (which we check for). * * Note 2: a file_operations routine (read/poll/write) should use this function * to wait for any ldisc lifetime events to finish. */ struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty) { struct tty_ldisc *ld; ldsem_down_read(&tty->ldisc_sem, MAX_SCHEDULE_TIMEOUT); ld = tty->ldisc; if (!ld) ldsem_up_read(&tty->ldisc_sem); return ld; } EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); /** * tty_ldisc_ref - get the tty ldisc * @tty: tty device * * Dereference the line discipline for the terminal and take a reference to it. * If the line discipline is in flux then return %NULL. Can be called from IRQ * and timer functions. */ struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty) { struct tty_ldisc *ld = NULL; if (ldsem_down_read_trylock(&tty->ldisc_sem)) { ld = tty->ldisc; if (!ld) ldsem_up_read(&tty->ldisc_sem); } return ld; } EXPORT_SYMBOL_GPL(tty_ldisc_ref); /** * tty_ldisc_deref - free a tty ldisc reference * @ld: reference to free up * * Undoes the effect of tty_ldisc_ref() or tty_ldisc_ref_wait(). May be called * in IRQ context. */ void tty_ldisc_deref(struct tty_ldisc *ld) { ldsem_up_read(&ld->tty->ldisc_sem); } EXPORT_SYMBOL_GPL(tty_ldisc_deref); static inline int __tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout) { return ldsem_down_write(&tty->ldisc_sem, timeout); } static inline int __tty_ldisc_lock_nested(struct tty_struct *tty, unsigned long timeout) { return ldsem_down_write_nested(&tty->ldisc_sem, LDISC_SEM_OTHER, timeout); } static inline void __tty_ldisc_unlock(struct tty_struct *tty) { ldsem_up_write(&tty->ldisc_sem); } int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout) { int ret; /* Kindly asking blocked readers to release the read side */ set_bit(TTY_LDISC_CHANGING, &tty->flags); wake_up_interruptible_all(&tty->read_wait); wake_up_interruptible_all(&tty->write_wait); ret = __tty_ldisc_lock(tty, timeout); if (!ret) return -EBUSY; set_bit(TTY_LDISC_HALTED, &tty->flags); return 0; } void tty_ldisc_unlock(struct tty_struct *tty) { clear_bit(TTY_LDISC_HALTED, &tty->flags); /* Can be cleared here - ldisc_unlock will wake up writers firstly */ clear_bit(TTY_LDISC_CHANGING, &tty->flags); __tty_ldisc_unlock(tty); } static int tty_ldisc_lock_pair_timeout(struct tty_struct *tty, struct tty_struct *tty2, unsigned long timeout) { int ret; if (tty < tty2) { ret = __tty_ldisc_lock(tty, timeout); if (ret) { ret = __tty_ldisc_lock_nested(tty2, timeout); if (!ret) __tty_ldisc_unlock(tty); } } else { /* if this is possible, it has lots of implications */ WARN_ON_ONCE(tty == tty2); if (tty2 && tty != tty2) { ret = __tty_ldisc_lock(tty2, timeout); if (ret) { ret = __tty_ldisc_lock_nested(tty, timeout); if (!ret) __tty_ldisc_unlock(tty2); } } else ret = __tty_ldisc_lock(tty, timeout); } if (!ret) return -EBUSY; set_bit(TTY_LDISC_HALTED, &tty->flags); if (tty2) set_bit(TTY_LDISC_HALTED, &tty2->flags); return 0; } static void tty_ldisc_lock_pair(struct tty_struct *tty, struct tty_struct *tty2) { tty_ldisc_lock_pair_timeout(tty, tty2, MAX_SCHEDULE_TIMEOUT); } static void tty_ldisc_unlock_pair(struct tty_struct *tty, struct tty_struct *tty2) { __tty_ldisc_unlock(tty); if (tty2) __tty_ldisc_unlock(tty2); } /** * tty_ldisc_flush - flush line discipline queue * @tty: tty to flush ldisc for * * Flush the line discipline queue (if any) and the tty flip buffers for this * @tty. */ void tty_ldisc_flush(struct tty_struct *tty) { struct tty_ldisc *ld = tty_ldisc_ref(tty); tty_buffer_flush(tty, ld); if (ld) tty_ldisc_deref(ld); } EXPORT_SYMBOL_GPL(tty_ldisc_flush); /** * tty_set_termios_ldisc - set ldisc field * @tty: tty structure * @disc: line discipline number * * This is probably overkill for real world processors but they are not on hot * paths so a little discipline won't do any harm. * * The line discipline-related tty_struct fields are reset to prevent the ldisc * driver from re-using stale information for the new ldisc instance. * * Locking: takes termios_rwsem */ static void tty_set_termios_ldisc(struct tty_struct *tty, int disc) { down_write(&tty->termios_rwsem); tty->termios.c_line = disc; up_write(&tty->termios_rwsem); tty->disc_data = NULL; tty->receive_room = 0; } /** * tty_ldisc_open - open a line discipline * @tty: tty we are opening the ldisc on * @ld: discipline to open * * A helper opening method. Also a convenient debugging and check point. * * Locking: always called with BTM already held. */ static int tty_ldisc_open(struct tty_struct *tty, struct tty_ldisc *ld) { WARN_ON(test_and_set_bit(TTY_LDISC_OPEN, &tty->flags)); if (ld->ops->open) { int ret; /* BTM here locks versus a hangup event */ ret = ld->ops->open(tty); if (ret) clear_bit(TTY_LDISC_OPEN, &tty->flags); tty_ldisc_debug(tty, "%p: opened\n", ld); return ret; } return 0; } /** * tty_ldisc_close - close a line discipline * @tty: tty we are opening the ldisc on * @ld: discipline to close * * A helper close method. Also a convenient debugging and check point. */ static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld) { lockdep_assert_held_write(&tty->ldisc_sem); WARN_ON(!test_bit(TTY_LDISC_OPEN, &tty->flags)); clear_bit(TTY_LDISC_OPEN, &tty->flags); if (ld->ops->close) ld->ops->close(tty); tty_ldisc_debug(tty, "%p: closed\n", ld); } /** * tty_ldisc_failto - helper for ldisc failback * @tty: tty to open the ldisc on * @ld: ldisc we are trying to fail back to * * Helper to try and recover a tty when switching back to the old ldisc fails * and we need something attached. */ static int tty_ldisc_failto(struct tty_struct *tty, int ld) { struct tty_ldisc *disc = tty_ldisc_get(tty, ld); int r; lockdep_assert_held_write(&tty->ldisc_sem); if (IS_ERR(disc)) return PTR_ERR(disc); tty->ldisc = disc; tty_set_termios_ldisc(tty, ld); r = tty_ldisc_open(tty, disc); if (r < 0) tty_ldisc_put(disc); return r; } /** * tty_ldisc_restore - helper for tty ldisc change * @tty: tty to recover * @old: previous ldisc * * Restore the previous line discipline or %N_TTY when a line discipline change * fails due to an open error */ static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old) { /* There is an outstanding reference here so this is safe */ if (tty_ldisc_failto(tty, old->ops->num) < 0) { const char *name = tty_name(tty); pr_warn("Falling back ldisc for %s.\n", name); /* * The traditional behaviour is to fall back to N_TTY, we * want to avoid falling back to N_NULL unless we have no * choice to avoid the risk of breaking anything */ if (tty_ldisc_failto(tty, N_TTY) < 0 && tty_ldisc_failto(tty, N_NULL) < 0) panic("Couldn't open N_NULL ldisc for %s.", name); } } /** * tty_set_ldisc - set line discipline * @tty: the terminal to set * @disc: the line discipline number * * Set the discipline of a tty line. Must be called from a process context. The * ldisc change logic has to protect itself against any overlapping ldisc * change (including on the other end of pty pairs), the close of one side of a * tty/pty pair, and eventually hangup. */ int tty_set_ldisc(struct tty_struct *tty, int disc) { int retval; struct tty_ldisc *old_ldisc, *new_ldisc; new_ldisc = tty_ldisc_get(tty, disc); if (IS_ERR(new_ldisc)) return PTR_ERR(new_ldisc); tty_lock(tty); retval = tty_ldisc_lock(tty, 5 * HZ); if (retval) goto err; if (!tty->ldisc) { retval = -EIO; goto out; } /* Check the no-op case */ if (tty->ldisc->ops->num == disc) goto out; if (test_bit(TTY_HUPPED, &tty->flags)) { /* We were raced by hangup */ retval = -EIO; goto out; } if (tty->ops->ldisc_ok) { retval = tty->ops->ldisc_ok(tty, disc); if (retval) goto out; } old_ldisc = tty->ldisc; /* Shutdown the old discipline. */ tty_ldisc_close(tty, old_ldisc); /* Now set up the new line discipline. */ tty->ldisc = new_ldisc; tty_set_termios_ldisc(tty, disc); retval = tty_ldisc_open(tty, new_ldisc); if (retval < 0) { /* Back to the old one or N_TTY if we can't */ tty_ldisc_put(new_ldisc); tty_ldisc_restore(tty, old_ldisc); } if (tty->ldisc->ops->num != old_ldisc->ops->num && tty->ops->set_ldisc) { down_read(&tty->termios_rwsem); tty->ops->set_ldisc(tty); up_read(&tty->termios_rwsem); } /* * At this point we hold a reference to the new ldisc and a * reference to the old ldisc, or we hold two references to * the old ldisc (if it was restored as part of error cleanup * above). In either case, releasing a single reference from * the old ldisc is correct. */ new_ldisc = old_ldisc; out: tty_ldisc_unlock(tty); /* * Restart the work queue in case no characters kick it off. Safe if * already running */ tty_buffer_restart_work(tty->port); err: tty_ldisc_put(new_ldisc); /* drop the extra reference */ tty_unlock(tty); return retval; } EXPORT_SYMBOL_GPL(tty_set_ldisc); /** * tty_ldisc_kill - teardown ldisc * @tty: tty being released * * Perform final close of the ldisc and reset @tty->ldisc */ static void tty_ldisc_kill(struct tty_struct *tty) { lockdep_assert_held_write(&tty->ldisc_sem); if (!tty->ldisc) return; /* * Now kill off the ldisc */ tty_ldisc_close(tty, tty->ldisc); tty_ldisc_put(tty->ldisc); /* Force an oops if we mess this up */ tty->ldisc = NULL; } /** * tty_reset_termios - reset terminal state * @tty: tty to reset * * Restore a terminal to the driver default state. */ static void tty_reset_termios(struct tty_struct *tty) { down_write(&tty->termios_rwsem); tty->termios = tty->driver->init_termios; tty->termios.c_ispeed = tty_termios_input_baud_rate(&tty->termios); tty->termios.c_ospeed = tty_termios_baud_rate(&tty->termios); up_write(&tty->termios_rwsem); } /** * tty_ldisc_reinit - reinitialise the tty ldisc * @tty: tty to reinit * @disc: line discipline to reinitialize * * Completely reinitialize the line discipline state, by closing the current * instance, if there is one, and opening a new instance. If an error occurs * opening the new non-%N_TTY instance, the instance is dropped and @tty->ldisc * reset to %NULL. The caller can then retry with %N_TTY instead. * * Returns: 0 if successful, otherwise error code < 0 */ int tty_ldisc_reinit(struct tty_struct *tty, int disc) { struct tty_ldisc *ld; int retval; lockdep_assert_held_write(&tty->ldisc_sem); ld = tty_ldisc_get(tty, disc); if (IS_ERR(ld)) { BUG_ON(disc == N_TTY); return PTR_ERR(ld); } if (tty->ldisc) { tty_ldisc_close(tty, tty->ldisc); tty_ldisc_put(tty->ldisc); } /* switch the line discipline */ tty->ldisc = ld; tty_set_termios_ldisc(tty, disc); retval = tty_ldisc_open(tty, tty->ldisc); if (retval) { tty_ldisc_put(tty->ldisc); tty->ldisc = NULL; } return retval; } /** * tty_ldisc_hangup - hangup ldisc reset * @tty: tty being hung up * @reinit: whether to re-initialise the tty * * Some tty devices reset their termios when they receive a hangup event. In * that situation we must also switch back to %N_TTY properly before we reset * the termios data. * * Locking: We can take the ldisc mutex as the rest of the code is careful to * allow for this. * * In the pty pair case this occurs in the close() path of the tty itself so we * must be careful about locking rules. */ void tty_ldisc_hangup(struct tty_struct *tty, bool reinit) { struct tty_ldisc *ld; tty_ldisc_debug(tty, "%p: hangup\n", tty->ldisc); ld = tty_ldisc_ref(tty); if (ld != NULL) { if (ld->ops->flush_buffer) ld->ops->flush_buffer(tty); tty_driver_flush_buffer(tty); if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) && ld->ops->write_wakeup) ld->ops->write_wakeup(tty); if (ld->ops->hangup) ld->ops->hangup(tty); tty_ldisc_deref(ld); } wake_up_interruptible_poll(&tty->write_wait, EPOLLOUT); wake_up_interruptible_poll(&tty->read_wait, EPOLLIN); /* * Shutdown the current line discipline, and reset it to * N_TTY if need be. * * Avoid racing set_ldisc or tty_ldisc_release */ tty_ldisc_lock(tty, MAX_SCHEDULE_TIMEOUT); if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS) tty_reset_termios(tty); if (tty->ldisc) { if (reinit) { if (tty_ldisc_reinit(tty, tty->termios.c_line) < 0 && tty_ldisc_reinit(tty, N_TTY) < 0) WARN_ON(tty_ldisc_reinit(tty, N_NULL) < 0); } else tty_ldisc_kill(tty); } tty_ldisc_unlock(tty); } /** * tty_ldisc_setup - open line discipline * @tty: tty being shut down * @o_tty: pair tty for pty/tty pairs * * Called during the initial open of a tty/pty pair in order to set up the line * disciplines and bind them to the @tty. This has no locking issues as the * device isn't yet active. */ int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty) { int retval = tty_ldisc_open(tty, tty->ldisc); if (retval) return retval; if (o_tty) { /* * Called without o_tty->ldisc_sem held, as o_tty has been * just allocated and no one has a reference to it. */ retval = tty_ldisc_open(o_tty, o_tty->ldisc); if (retval) { tty_ldisc_close(tty, tty->ldisc); return retval; } } return 0; } /** * tty_ldisc_release - release line discipline * @tty: tty being shut down (or one end of pty pair) * * Called during the final close of a tty or a pty pair in order to shut down * the line discpline layer. On exit, each tty's ldisc is %NULL. */ void tty_ldisc_release(struct tty_struct *tty) { struct tty_struct *o_tty = tty->link; /* * Shutdown this line discipline. As this is the final close, * it does not race with the set_ldisc code path. */ tty_ldisc_lock_pair(tty, o_tty); tty_ldisc_kill(tty); if (o_tty) tty_ldisc_kill(o_tty); tty_ldisc_unlock_pair(tty, o_tty); /* * And the memory resources remaining (buffers, termios) will be * disposed of when the kref hits zero */ tty_ldisc_debug(tty, "released\n"); } /** * tty_ldisc_init - ldisc setup for new tty * @tty: tty being allocated * * Set up the line discipline objects for a newly allocated tty. Note that the * tty structure is not completely set up when this call is made. */ int tty_ldisc_init(struct tty_struct *tty) { struct tty_ldisc *ld = tty_ldisc_get(tty, N_TTY); if (IS_ERR(ld)) return PTR_ERR(ld); tty->ldisc = ld; return 0; } /** * tty_ldisc_deinit - ldisc cleanup for new tty * @tty: tty that was allocated recently * * The tty structure must not be completely set up (tty_ldisc_setup()) when * this call is made. */ void tty_ldisc_deinit(struct tty_struct *tty) { /* no ldisc_sem, tty is being destroyed */ if (tty->ldisc) tty_ldisc_put(tty->ldisc); tty->ldisc = NULL; }
6 1 3 4 3 2 3 3 2 3 2 2 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 // SPDX-License-Identifier: GPL-2.0-only /* * KVM binary statistics interface implementation * * Copyright 2021 Google LLC */ #include <linux/kvm_host.h> #include <linux/kvm.h> #include <linux/errno.h> #include <linux/uaccess.h> /** * kvm_stats_read() - Common function to read from the binary statistics * file descriptor. * * @id: identification string of the stats * @header: stats header for a vm or a vcpu * @desc: start address of an array of stats descriptors for a vm or a vcpu * @stats: start address of stats data block for a vm or a vcpu * @size_stats: the size of stats data block pointed by @stats * @user_buffer: start address of userspace buffer * @size: requested read size from userspace * @offset: the start position from which the content will be read for the * corresponding vm or vcp file descriptor * * The file content of a vm/vcpu file descriptor is now defined as below: * +-------------+ * | Header | * +-------------+ * | id string | * +-------------+ * | Descriptors | * +-------------+ * | Stats Data | * +-------------+ * Although this function allows userspace to read any amount of data (as long * as in the limit) from any position, the typical usage would follow below * steps: * 1. Read header from offset 0. Get the offset of descriptors and stats data * and some other necessary information. This is a one-time work for the * lifecycle of the corresponding vm/vcpu stats fd. * 2. Read id string from its offset. This is a one-time work for the lifecycle * of the corresponding vm/vcpu stats fd. * 3. Read descriptors from its offset and discover all the stats by parsing * descriptors. This is a one-time work for the lifecycle of the * corresponding vm/vcpu stats fd. * 4. Periodically read stats data from its offset using pread. * * Return: the number of bytes that has been successfully read */ ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header, const struct kvm_stats_desc *desc, void *stats, size_t size_stats, char __user *user_buffer, size_t size, loff_t *offset) { ssize_t len; ssize_t copylen; ssize_t remain = size; size_t size_desc; size_t size_header; void *src; loff_t pos = *offset; char __user *dest = user_buffer; size_header = sizeof(*header); size_desc = header->num_desc * sizeof(*desc); len = KVM_STATS_NAME_SIZE + size_header + size_desc + size_stats - pos; len = min(len, remain); if (len <= 0) return 0; remain = len; /* * Copy kvm stats header. * The header is the first block of content userspace usually read out. * The pos is 0 and the copylen and remain would be the size of header. * The copy of the header would be skipped if offset is larger than the * size of header. That usually happens when userspace reads stats * descriptors and stats data. */ copylen = size_header - pos; copylen = min(copylen, remain); if (copylen > 0) { src = (void *)header + pos; if (copy_to_user(dest, src, copylen)) return -EFAULT; remain -= copylen; pos += copylen; dest += copylen; } /* * Copy kvm stats header id string. * The id string is unique for every vm/vcpu, which is stored in kvm * and kvm_vcpu structure. * The id string is part of the stat header from the perspective of * userspace, it is usually read out together with previous constant * header part and could be skipped for later descriptors and stats * data readings. */ copylen = header->id_offset + KVM_STATS_NAME_SIZE - pos; copylen = min(copylen, remain); if (copylen > 0) { src = id + pos - header->id_offset; if (copy_to_user(dest, src, copylen)) return -EFAULT; remain -= copylen; pos += copylen; dest += copylen; } /* * Copy kvm stats descriptors. * The descriptors copy would be skipped in the typical case that * userspace periodically read stats data, since the pos would be * greater than the end address of descriptors * (header->header.desc_offset + size_desc) causing copylen <= 0. */ copylen = header->desc_offset + size_desc - pos; copylen = min(copylen, remain); if (copylen > 0) { src = (void *)desc + pos - header->desc_offset; if (copy_to_user(dest, src, copylen)) return -EFAULT; remain -= copylen; pos += copylen; dest += copylen; } /* Copy kvm stats values */ copylen = header->data_offset + size_stats - pos; copylen = min(copylen, remain); if (copylen > 0) { src = stats + pos - header->data_offset; if (copy_to_user(dest, src, copylen)) return -EFAULT; pos += copylen; } *offset = pos; return len; }
2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 // SPDX-License-Identifier: GPL-2.0-only /* * (C) 2007 Patrick McHardy <kaber@trash.net> */ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/gen_stats.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_rateest.h> #include <net/netfilter/xt_rateest.h> static bool xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_rateest_match_info *info = par->matchinfo; struct gnet_stats_rate_est64 sample = {0}; u_int32_t bps1, bps2, pps1, pps2; bool ret = true; gen_estimator_read(&info->est1->rate_est, &sample); if (info->flags & XT_RATEEST_MATCH_DELTA) { bps1 = info->bps1 >= sample.bps ? info->bps1 - sample.bps : 0; pps1 = info->pps1 >= sample.pps ? info->pps1 - sample.pps : 0; } else { bps1 = sample.bps; pps1 = sample.pps; } if (info->flags & XT_RATEEST_MATCH_ABS) { bps2 = info->bps2; pps2 = info->pps2; } else { gen_estimator_read(&info->est2->rate_est, &sample); if (info->flags & XT_RATEEST_MATCH_DELTA) { bps2 = info->bps2 >= sample.bps ? info->bps2 - sample.bps : 0; pps2 = info->pps2 >= sample.pps ? info->pps2 - sample.pps : 0; } else { bps2 = sample.bps; pps2 = sample.pps; } } switch (info->mode) { case XT_RATEEST_MATCH_LT: if (info->flags & XT_RATEEST_MATCH_BPS) ret &= bps1 < bps2; if (info->flags & XT_RATEEST_MATCH_PPS) ret &= pps1 < pps2; break; case XT_RATEEST_MATCH_GT: if (info->flags & XT_RATEEST_MATCH_BPS) ret &= bps1 > bps2; if (info->flags & XT_RATEEST_MATCH_PPS) ret &= pps1 > pps2; break; case XT_RATEEST_MATCH_EQ: if (info->flags & XT_RATEEST_MATCH_BPS) ret &= bps1 == bps2; if (info->flags & XT_RATEEST_MATCH_PPS) ret &= pps1 == pps2; break; } ret ^= info->flags & XT_RATEEST_MATCH_INVERT ? true : false; return ret; } static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) { struct xt_rateest_match_info *info = par->matchinfo; struct xt_rateest *est1, *est2; int ret = -EINVAL; if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS | XT_RATEEST_MATCH_REL)) != 1) goto err1; if (!(info->flags & (XT_RATEEST_MATCH_BPS | XT_RATEEST_MATCH_PPS))) goto err1; switch (info->mode) { case XT_RATEEST_MATCH_EQ: case XT_RATEEST_MATCH_LT: case XT_RATEEST_MATCH_GT: break; default: goto err1; } ret = -ENOENT; est1 = xt_rateest_lookup(par->net, info->name1); if (!est1) goto err1; est2 = NULL; if (info->flags & XT_RATEEST_MATCH_REL) { est2 = xt_rateest_lookup(par->net, info->name2); if (!est2) goto err2; } info->est1 = est1; info->est2 = est2; return 0; err2: xt_rateest_put(par->net, est1); err1: return ret; } static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par) { struct xt_rateest_match_info *info = par->matchinfo; xt_rateest_put(par->net, info->est1); if (info->est2) xt_rateest_put(par->net, info->est2); } static struct xt_match xt_rateest_mt_reg __read_mostly = { .name = "rateest", .revision = 0, .family = NFPROTO_UNSPEC, .match = xt_rateest_mt, .checkentry = xt_rateest_mt_checkentry, .destroy = xt_rateest_mt_destroy, .matchsize = sizeof(struct xt_rateest_match_info), .usersize = offsetof(struct xt_rateest_match_info, est1), .me = THIS_MODULE, }; static int __init xt_rateest_mt_init(void) { return xt_register_match(&xt_rateest_mt_reg); } static void __exit xt_rateest_mt_fini(void) { xt_unregister_match(&xt_rateest_mt_reg); } MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("xtables rate estimator match"); MODULE_ALIAS("ipt_rateest"); MODULE_ALIAS("ip6t_rateest"); module_init(xt_rateest_mt_init); module_exit(xt_rateest_mt_fini);
1 1 1 6 5 1 6 6 5 5 3 3 2 2 4 70 67 4 4 4 3 4 4 4 5 3 1 4 1 3 2 2 2 2 2 2 2 2 3 5 5 1 3 2 2 3 4 2 2 6 6 6 1 2 4 2 4 4 4 1 3 1 3 9 1 2 3 3 1 1 2 1 5 4 2 5 5 12 1 4 6 2 1 1 1 15 151 31 151 179 179 178 116 70 19 19 178 179 55 53 107 108 108 96 15 13 11 107 96 96 95 95 85 15 96 95 95 95 85 15 95 29 29 29 26 2 29 11 11 11 4 7 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk> * */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> #include <linux/blkdev.h> #include <linux/blktrace_api.h> #include <linux/percpu.h> #include <linux/init.h> #include <linux/mutex.h> #include <linux/slab.h> #include <linux/debugfs.h> #include <linux/export.h> #include <linux/time.h> #include <linux/uaccess.h> #include <linux/list.h> #include <linux/blk-cgroup.h> #include "../../block/blk.h" #include <trace/events/block.h> #include "trace_output.h" #ifdef CONFIG_BLK_DEV_IO_TRACE static unsigned int blktrace_seq __read_mostly = 1; static struct trace_array *blk_tr; static bool blk_tracer_enabled __read_mostly; static LIST_HEAD(running_trace_list); static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(running_trace_lock); /* Select an alternative, minimalistic output than the original one */ #define TRACE_BLK_OPT_CLASSIC 0x1 #define TRACE_BLK_OPT_CGROUP 0x2 #define TRACE_BLK_OPT_CGNAME 0x4 static struct tracer_opt blk_tracer_opts[] = { /* Default disable the minimalistic output */ { TRACER_OPT(blk_classic, TRACE_BLK_OPT_CLASSIC) }, #ifdef CONFIG_BLK_CGROUP { TRACER_OPT(blk_cgroup, TRACE_BLK_OPT_CGROUP) }, { TRACER_OPT(blk_cgname, TRACE_BLK_OPT_CGNAME) }, #endif { } }; static struct tracer_flags blk_tracer_flags = { .val = 0, .opts = blk_tracer_opts, }; /* Global reference count of probes */ static DEFINE_MUTEX(blk_probe_mutex); static int blk_probes_ref; static void blk_register_tracepoints(void); static void blk_unregister_tracepoints(void); static void record_blktrace_event(struct blk_io_trace *t, pid_t pid, int cpu, sector_t sector, int bytes, u64 what, dev_t dev, int error, u64 cgid, ssize_t cgid_len, void *pdu_data, int pdu_len) { /* * These two are not needed in ftrace as they are in the * generic trace_entry, filled by tracing_generic_entry_update, * but for the trace_event->bin() synthesizer benefit we do it * here too. */ t->cpu = cpu; t->pid = pid; t->sector = sector; t->bytes = bytes; t->action = lower_32_bits(what); t->device = dev; t->error = error; t->pdu_len = pdu_len + cgid_len; if (cgid_len) memcpy((void *)t + sizeof(*t), &cgid, cgid_len); if (pdu_len) memcpy((void *)t + sizeof(*t) + cgid_len, pdu_data, pdu_len); } static void record_blktrace_event2(struct blk_io_trace2 *t2, pid_t pid, int cpu, sector_t sector, int bytes, u64 what, dev_t dev, int error, u64 cgid, ssize_t cgid_len, void *pdu_data, int pdu_len) { t2->pid = pid; t2->cpu = cpu; t2->sector = sector; t2->bytes = bytes; t2->action = what; t2->device = dev; t2->error = error; t2->pdu_len = pdu_len + cgid_len; if (cgid_len) memcpy((void *)t2 + sizeof(*t2), &cgid, cgid_len); if (pdu_len) memcpy((void *)t2 + sizeof(*t2) + cgid_len, pdu_data, pdu_len); } static void relay_blktrace_event1(struct blk_trace *bt, unsigned long sequence, pid_t pid, int cpu, sector_t sector, int bytes, u64 what, int error, u64 cgid, ssize_t cgid_len, void *pdu_data, int pdu_len) { struct blk_io_trace *t; size_t trace_len = sizeof(*t) + pdu_len + cgid_len; t = relay_reserve(bt->rchan, trace_len); if (!t) return; t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION; t->sequence = sequence; t->time = ktime_to_ns(ktime_get()); record_blktrace_event(t, pid, cpu, sector, bytes, what, bt->dev, error, cgid, cgid_len, pdu_data, pdu_len); } static void relay_blktrace_event2(struct blk_trace *bt, unsigned long sequence, pid_t pid, int cpu, sector_t sector, int bytes, u64 what, int error, u64 cgid, ssize_t cgid_len, void *pdu_data, int pdu_len) { struct blk_io_trace2 *t; size_t trace_len = sizeof(struct blk_io_trace2) + pdu_len + cgid_len; t = relay_reserve(bt->rchan, trace_len); if (!t) return; t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE2_VERSION; t->sequence = sequence; t->time = ktime_to_ns(ktime_get()); record_blktrace_event2(t, pid, cpu, sector, bytes, what, bt->dev, error, cgid, cgid_len, pdu_data, pdu_len); } static void relay_blktrace_event(struct blk_trace *bt, unsigned long sequence, pid_t pid, int cpu, sector_t sector, int bytes, u64 what, int error, u64 cgid, ssize_t cgid_len, void *pdu_data, int pdu_len) { if (bt->version == 2) return relay_blktrace_event2(bt, sequence, pid, cpu, sector, bytes, what, error, cgid, cgid_len, pdu_data, pdu_len); return relay_blktrace_event1(bt, sequence, pid, cpu, sector, bytes, what, error, cgid, cgid_len, pdu_data, pdu_len); } /* * Send out a notify message. */ static void trace_note(struct blk_trace *bt, pid_t pid, u64 action, const void *data, size_t len, u64 cgid) { struct ring_buffer_event *event = NULL; struct trace_buffer *buffer = NULL; unsigned int trace_ctx = 0; int cpu = smp_processor_id(); bool blk_tracer = blk_tracer_enabled; ssize_t cgid_len = cgid ? sizeof(cgid) : 0; action = lower_32_bits(action | (cgid ? __BLK_TN_CGROUP : 0)); if (blk_tracer) { struct blk_io_trace2 *t; size_t trace_len = sizeof(*t) + cgid_len + len; buffer = blk_tr->array_buffer.buffer; trace_ctx = tracing_gen_ctx_flags(0); event = trace_buffer_lock_reserve(buffer, TRACE_BLK, trace_len, trace_ctx); if (!event) return; t = ring_buffer_event_data(event); record_blktrace_event2(t, pid, cpu, 0, 0, action, bt->dev, 0, cgid, cgid_len, (void *)data, len); trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx); return; } if (!bt->rchan) return; relay_blktrace_event(bt, 0, pid, cpu, 0, 0, action, 0, cgid, cgid_len, (void *)data, len); } /* * Send out a notify for this process, if we haven't done so since a trace * started */ static void trace_note_tsk(struct task_struct *tsk) { unsigned long flags; struct blk_trace *bt; tsk->btrace_seq = blktrace_seq; raw_spin_lock_irqsave(&running_trace_lock, flags); list_for_each_entry(bt, &running_trace_list, running_list) { trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm), 0); } raw_spin_unlock_irqrestore(&running_trace_lock, flags); } static void trace_note_time(struct blk_trace *bt) { struct timespec64 now; unsigned long flags; u32 words[2]; /* need to check user space to see if this breaks in y2038 or y2106 */ ktime_get_real_ts64(&now); words[0] = (u32)now.tv_sec; words[1] = now.tv_nsec; local_irq_save(flags); trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words), 0); local_irq_restore(flags); } void __blk_trace_note_message(struct blk_trace *bt, struct cgroup_subsys_state *css, const char *fmt, ...) { int n; va_list args; unsigned long flags; char *buf; u64 cgid = 0; if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer_enabled)) return; /* * If the BLK_TC_NOTIFY action mask isn't set, don't send any note * message to the trace. */ if (!(bt->act_mask & BLK_TC_NOTIFY)) return; local_irq_save(flags); buf = this_cpu_ptr(bt->msg_data); va_start(args, fmt); n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args); va_end(args); #ifdef CONFIG_BLK_CGROUP if (css && (blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) cgid = cgroup_id(css->cgroup); else cgid = 1; #endif trace_note(bt, current->pid, BLK_TN_MESSAGE, buf, n, cgid); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(__blk_trace_note_message); static int act_log_check(struct blk_trace *bt, u64 what, sector_t sector, pid_t pid) { if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0) return 1; if (sector && (sector < bt->start_lba || sector > bt->end_lba)) return 1; if (bt->pid && pid != bt->pid) return 1; return 0; } /* * Data direction bit lookup */ static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) }; #define BLK_TC_RAHEAD BLK_TC_AHEAD #define BLK_TC_PREFLUSH BLK_TC_FLUSH /* The ilog2() calls fall out because they're constant */ #define MASK_TC_BIT(rw, __name) ((__force u32)(rw & REQ_ ## __name) << \ (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ## __name)) /* * The worker for the various blk_add_trace*() types. Fills out a * blk_io_trace structure and places it in a per-cpu subbuffer. */ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, const blk_opf_t opf, u64 what, int error, int pdu_len, void *pdu_data, u64 cgid) { struct task_struct *tsk = current; struct ring_buffer_event *event = NULL; struct trace_buffer *buffer = NULL; unsigned long flags = 0; unsigned long *sequence; unsigned int trace_ctx = 0; pid_t pid; int cpu; bool blk_tracer = blk_tracer_enabled; ssize_t cgid_len = cgid ? sizeof(cgid) : 0; const enum req_op op = opf & REQ_OP_MASK; size_t trace_len; if (unlikely(bt->trace_state != Blktrace_running && !blk_tracer)) return; what |= ddir_act[op_is_write(op) ? WRITE : READ]; what |= MASK_TC_BIT(opf, SYNC); what |= MASK_TC_BIT(opf, RAHEAD); what |= MASK_TC_BIT(opf, META); what |= MASK_TC_BIT(opf, PREFLUSH); what |= MASK_TC_BIT(opf, FUA); switch (op) { case REQ_OP_DISCARD: case REQ_OP_SECURE_ERASE: what |= BLK_TC_ACT(BLK_TC_DISCARD); break; case REQ_OP_FLUSH: what |= BLK_TC_ACT(BLK_TC_FLUSH); break; case REQ_OP_ZONE_APPEND: what |= BLK_TC_ACT(BLK_TC_ZONE_APPEND); break; case REQ_OP_ZONE_RESET: what |= BLK_TC_ACT(BLK_TC_ZONE_RESET); break; case REQ_OP_ZONE_RESET_ALL: what |= BLK_TC_ACT(BLK_TC_ZONE_RESET_ALL); break; case REQ_OP_ZONE_FINISH: what |= BLK_TC_ACT(BLK_TC_ZONE_FINISH); break; case REQ_OP_ZONE_OPEN: what |= BLK_TC_ACT(BLK_TC_ZONE_OPEN); break; case REQ_OP_ZONE_CLOSE: what |= BLK_TC_ACT(BLK_TC_ZONE_CLOSE); break; case REQ_OP_WRITE_ZEROES: what |= BLK_TC_ACT(BLK_TC_WRITE_ZEROES); break; default: break; } /* Drop trace events for zone operations with blktrace v1 */ if (bt->version == 1 && (what >> BLK_TC_SHIFT) > BLK_TC_END_V1) { pr_debug_ratelimited("blktrace v1 cannot trace zone operation 0x%llx\n", (unsigned long long)what); return; } if (cgid) what |= __BLK_TA_CGROUP; pid = tsk->pid; if (act_log_check(bt, what, sector, pid)) return; cpu = raw_smp_processor_id(); if (blk_tracer) { tracing_record_cmdline(current); buffer = blk_tr->array_buffer.buffer; trace_ctx = tracing_gen_ctx_flags(0); switch (bt->version) { case 1: trace_len = sizeof(struct blk_io_trace); break; case 2: default: /* * ftrace always uses v2 (blk_io_trace2) format. * * For sysfs-enabled tracing path (enabled via * /sys/block/DEV/trace/enable), blk_trace_setup_queue() * never initializes bt->version, leaving it 0 from * kzalloc(). We must handle version==0 safely here. * * Fall through to default to ensure we never hit the * old bug where default set trace_len=0, causing * buffer underflow and memory corruption. * * Always use v2 format for ftrace and normalize * bt->version to 2 when uninitialized. */ trace_len = sizeof(struct blk_io_trace2); if (bt->version == 0) bt->version = 2; break; } trace_len += pdu_len + cgid_len; event = trace_buffer_lock_reserve(buffer, TRACE_BLK, trace_len, trace_ctx); if (!event) return; switch (bt->version) { case 1: record_blktrace_event(ring_buffer_event_data(event), pid, cpu, sector, bytes, what, bt->dev, error, cgid, cgid_len, pdu_data, pdu_len); break; case 2: default: /* * Use v2 recording function (record_blktrace_event2) * which writes blk_io_trace2 structure with correct * field layout: * - 32-bit pid at offset 28 * - 64-bit action at offset 32 * * Fall through to default handles version==0 case * (from sysfs path), ensuring we always use correct * v2 recording function to match the v2 buffer * allocated above. */ record_blktrace_event2(ring_buffer_event_data(event), pid, cpu, sector, bytes, what, bt->dev, error, cgid, cgid_len, pdu_data, pdu_len); break; } trace_buffer_unlock_commit(blk_tr, buffer, event, trace_ctx); return; } if (unlikely(tsk->btrace_seq != blktrace_seq)) trace_note_tsk(tsk); /* * A word about the locking here - we disable interrupts to reserve * some space in the relay per-cpu buffer, to prevent an irq * from coming in and stepping on our toes. */ local_irq_save(flags); sequence = per_cpu_ptr(bt->sequence, cpu); (*sequence)++; relay_blktrace_event(bt, *sequence, pid, cpu, sector, bytes, what, error, cgid, cgid_len, pdu_data, pdu_len); local_irq_restore(flags); } static void blk_trace_free(struct request_queue *q, struct blk_trace *bt) { relay_close(bt->rchan); /* * If 'bt->dir' is not set, then both 'dropped' and 'msg' are created * under 'q->debugfs_dir', thus lookup and remove them. */ if (!bt->dir) { debugfs_lookup_and_remove("dropped", q->debugfs_dir); debugfs_lookup_and_remove("msg", q->debugfs_dir); } else { debugfs_remove(bt->dir); } free_percpu(bt->sequence); free_percpu(bt->msg_data); kfree(bt); } static void get_probe_ref(void) { mutex_lock(&blk_probe_mutex); if (++blk_probes_ref == 1) blk_register_tracepoints(); mutex_unlock(&blk_probe_mutex); } static void put_probe_ref(void) { mutex_lock(&blk_probe_mutex); if (!--blk_probes_ref) blk_unregister_tracepoints(); mutex_unlock(&blk_probe_mutex); } static int blk_trace_start(struct blk_trace *bt) { if (bt->trace_state != Blktrace_setup && bt->trace_state != Blktrace_stopped) return -EINVAL; blktrace_seq++; smp_mb(); bt->trace_state = Blktrace_running; raw_spin_lock_irq(&running_trace_lock); list_add(&bt->running_list, &running_trace_list); raw_spin_unlock_irq(&running_trace_lock); trace_note_time(bt); return 0; } static int blk_trace_stop(struct blk_trace *bt) { if (bt->trace_state != Blktrace_running) return -EINVAL; bt->trace_state = Blktrace_stopped; raw_spin_lock_irq(&running_trace_lock); list_del_init(&bt->running_list); raw_spin_unlock_irq(&running_trace_lock); relay_flush(bt->rchan); return 0; } static void blk_trace_cleanup(struct request_queue *q, struct blk_trace *bt) { blk_trace_stop(bt); synchronize_rcu(); blk_trace_free(q, bt); put_probe_ref(); } static int __blk_trace_remove(struct request_queue *q) { struct blk_trace *bt; bt = rcu_replace_pointer(q->blk_trace, NULL, lockdep_is_held(&q->debugfs_mutex)); if (!bt) return -EINVAL; blk_trace_cleanup(q, bt); return 0; } int blk_trace_remove(struct request_queue *q) { int ret; mutex_lock(&q->debugfs_mutex); ret = __blk_trace_remove(q); mutex_unlock(&q->debugfs_mutex); return ret; } EXPORT_SYMBOL_GPL(blk_trace_remove); static ssize_t blk_dropped_read(struct file *filp, char __user *buffer, size_t count, loff_t *ppos) { struct blk_trace *bt = filp->private_data; size_t dropped = relay_stats(bt->rchan, RELAY_STATS_BUF_FULL); char buf[16]; snprintf(buf, sizeof(buf), "%zu\n", dropped); return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf)); } static const struct file_operations blk_dropped_fops = { .owner = THIS_MODULE, .open = simple_open, .read = blk_dropped_read, .llseek = default_llseek, }; static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, size_t count, loff_t *ppos) { char *msg; struct blk_trace *bt; if (count >= BLK_TN_MAX_MSG) return -EINVAL; msg = memdup_user_nul(buffer, count); if (IS_ERR(msg)) return PTR_ERR(msg); bt = filp->private_data; __blk_trace_note_message(bt, NULL, "%s", msg); kfree(msg); return count; } static const struct file_operations blk_msg_fops = { .owner = THIS_MODULE, .open = simple_open, .write = blk_msg_write, .llseek = noop_llseek, }; static int blk_remove_buf_file_callback(struct dentry *dentry) { debugfs_remove(dentry); return 0; } static struct dentry *blk_create_buf_file_callback(const char *filename, struct dentry *parent, umode_t mode, struct rchan_buf *buf, int *is_global) { return debugfs_create_file(filename, mode, parent, buf, &relay_file_operations); } static const struct rchan_callbacks blk_relay_callbacks = { .create_buf_file = blk_create_buf_file_callback, .remove_buf_file = blk_remove_buf_file_callback, }; static void blk_trace_setup_lba(struct blk_trace *bt, struct block_device *bdev) { if (bdev) { bt->start_lba = bdev->bd_start_sect; bt->end_lba = bdev->bd_start_sect + bdev_nr_sectors(bdev); } else { bt->start_lba = 0; bt->end_lba = -1ULL; } } /* * Setup everything required to start tracing */ static struct blk_trace *blk_trace_setup_prepare(struct request_queue *q, char *name, dev_t dev, u32 buf_size, u32 buf_nr, struct block_device *bdev) { struct blk_trace *bt = NULL; struct dentry *dir = NULL; int ret; lockdep_assert_held(&q->debugfs_mutex); /* * bdev can be NULL, as with scsi-generic, this is a helpful as * we can be. */ if (rcu_dereference_protected(q->blk_trace, lockdep_is_held(&q->debugfs_mutex))) { pr_warn("Concurrent blktraces are not allowed on %s\n", name); return ERR_PTR(-EBUSY); } bt = kzalloc(sizeof(*bt), GFP_KERNEL); if (!bt) return ERR_PTR(-ENOMEM); ret = -ENOMEM; bt->sequence = alloc_percpu(unsigned long); if (!bt->sequence) goto err; bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char)); if (!bt->msg_data) goto err; /* * When tracing the whole disk reuse the existing debugfs directory * created by the block layer on init. For partitions block devices, * and scsi-generic block devices we create a temporary new debugfs * directory that will be removed once the trace ends. */ if (bdev && !bdev_is_partition(bdev)) dir = q->debugfs_dir; else bt->dir = dir = debugfs_create_dir(name, blk_debugfs_root); /* * As blktrace relies on debugfs for its interface the debugfs directory * is required, contrary to the usual mantra of not checking for debugfs * files or directories. */ if (IS_ERR_OR_NULL(dir)) { pr_warn("debugfs_dir not present for %s so skipping\n", name); ret = -ENOENT; goto err; } bt->dev = dev; INIT_LIST_HEAD(&bt->running_list); ret = -EIO; debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops); debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops); bt->rchan = relay_open("trace", dir, buf_size, buf_nr, &blk_relay_callbacks, bt); if (!bt->rchan) goto err; blk_trace_setup_lba(bt, bdev); return bt; err: blk_trace_free(q, bt); return ERR_PTR(ret); } static void blk_trace_setup_finalize(struct request_queue *q, char *name, int version, struct blk_trace *bt, struct blk_user_trace_setup2 *buts) { strscpy_pad(buts->name, name, BLKTRACE_BDEV_SIZE2); /* * some device names have larger paths - convert the slashes * to underscores for this to work as expected */ strreplace(buts->name, '/', '_'); bt->version = version; bt->act_mask = buts->act_mask; if (!bt->act_mask) bt->act_mask = (u16) -1; /* overwrite with user settings */ if (buts->start_lba) bt->start_lba = buts->start_lba; if (buts->end_lba) bt->end_lba = buts->end_lba; bt->pid = buts->pid; bt->trace_state = Blktrace_setup; rcu_assign_pointer(q->blk_trace, bt); get_probe_ref(); } int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct block_device *bdev, char __user *arg) { struct blk_user_trace_setup2 buts2; struct blk_user_trace_setup buts; struct blk_trace *bt; int ret; ret = copy_from_user(&buts, arg, sizeof(buts)); if (ret) return -EFAULT; if (!buts.buf_size || !buts.buf_nr) return -EINVAL; buts2 = (struct blk_user_trace_setup2) { .act_mask = buts.act_mask, .buf_size = buts.buf_size, .buf_nr = buts.buf_nr, .start_lba = buts.start_lba, .end_lba = buts.end_lba, .pid = buts.pid, }; mutex_lock(&q->debugfs_mutex); bt = blk_trace_setup_prepare(q, name, dev, buts.buf_size, buts.buf_nr, bdev); if (IS_ERR(bt)) { mutex_unlock(&q->debugfs_mutex); return PTR_ERR(bt); } blk_trace_setup_finalize(q, name, 1, bt, &buts2); strcpy(buts.name, buts2.name); mutex_unlock(&q->debugfs_mutex); if (copy_to_user(arg, &buts, sizeof(buts))) { blk_trace_remove(q); return -EFAULT; } return 0; } EXPORT_SYMBOL_GPL(blk_trace_setup); static int blk_trace_setup2(struct request_queue *q, char *name, dev_t dev, struct block_device *bdev, char __user *arg) { struct blk_user_trace_setup2 buts2; struct blk_trace *bt; if (copy_from_user(&buts2, arg, sizeof(buts2))) return -EFAULT; if (!buts2.buf_size || !buts2.buf_nr) return -EINVAL; if (buts2.flags != 0) return -EINVAL; mutex_lock(&q->debugfs_mutex); bt = blk_trace_setup_prepare(q, name, dev, buts2.buf_size, buts2.buf_nr, bdev); if (IS_ERR(bt)) { mutex_unlock(&q->debugfs_mutex); return PTR_ERR(bt); } blk_trace_setup_finalize(q, name, 2, bt, &buts2); mutex_unlock(&q->debugfs_mutex); if (copy_to_user(arg, &buts2, sizeof(buts2))) { blk_trace_remove(q); return -EFAULT; } return 0; } #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64) static int compat_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct block_device *bdev, char __user *arg) { struct blk_user_trace_setup2 buts2; struct compat_blk_user_trace_setup cbuts; struct blk_trace *bt; if (copy_from_user(&cbuts, arg, sizeof(cbuts))) return -EFAULT; if (!cbuts.buf_size || !cbuts.buf_nr) return -EINVAL; buts2 = (struct blk_user_trace_setup2) { .act_mask = cbuts.act_mask, .buf_size = cbuts.buf_size, .buf_nr = cbuts.buf_nr, .start_lba = cbuts.start_lba, .end_lba = cbuts.end_lba, .pid = cbuts.pid, }; mutex_lock(&q->debugfs_mutex); bt = blk_trace_setup_prepare(q, name, dev, buts2.buf_size, buts2.buf_nr, bdev); if (IS_ERR(bt)) { mutex_unlock(&q->debugfs_mutex); return PTR_ERR(bt); } blk_trace_setup_finalize(q, name, 1, bt, &buts2); mutex_unlock(&q->debugfs_mutex); if (copy_to_user(arg, &buts2.name, ARRAY_SIZE(buts2.name))) { blk_trace_remove(q); return -EFAULT; } return 0; } #endif static int __blk_trace_startstop(struct request_queue *q, int start) { struct blk_trace *bt; bt = rcu_dereference_protected(q->blk_trace, lockdep_is_held(&q->debugfs_mutex)); if (bt == NULL) return -EINVAL; if (start) return blk_trace_start(bt); else return blk_trace_stop(bt); } int blk_trace_startstop(struct request_queue *q, int start) { int ret; mutex_lock(&q->debugfs_mutex); ret = __blk_trace_startstop(q, start); mutex_unlock(&q->debugfs_mutex); return ret; } EXPORT_SYMBOL_GPL(blk_trace_startstop); /* * When reading or writing the blktrace sysfs files, the references to the * opened sysfs or device files should prevent the underlying block device * from being removed. So no further delete protection is really needed. */ /** * blk_trace_ioctl - handle the ioctls associated with tracing * @bdev: the block device * @cmd: the ioctl cmd * @arg: the argument data, if any * **/ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) { struct request_queue *q = bdev_get_queue(bdev); int ret, start = 0; char b[BDEVNAME_SIZE]; switch (cmd) { case BLKTRACESETUP2: snprintf(b, sizeof(b), "%pg", bdev); ret = blk_trace_setup2(q, b, bdev->bd_dev, bdev, arg); break; case BLKTRACESETUP: snprintf(b, sizeof(b), "%pg", bdev); ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); break; #if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64) case BLKTRACESETUP32: snprintf(b, sizeof(b), "%pg", bdev); ret = compat_blk_trace_setup(q, b, bdev->bd_dev, bdev, arg); break; #endif case BLKTRACESTART: start = 1; fallthrough; case BLKTRACESTOP: ret = blk_trace_startstop(q, start); break; case BLKTRACETEARDOWN: ret = blk_trace_remove(q); break; default: ret = -ENOTTY; break; } return ret; } /** * blk_trace_shutdown - stop and cleanup trace structures * @q: the request queue associated with the device * **/ void blk_trace_shutdown(struct request_queue *q) { if (rcu_dereference_protected(q->blk_trace, lockdep_is_held(&q->debugfs_mutex))) __blk_trace_remove(q); } #ifdef CONFIG_BLK_CGROUP static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) { struct cgroup_subsys_state *blkcg_css; struct blk_trace *bt; /* We don't use the 'bt' value here except as an optimization... */ bt = rcu_dereference_protected(q->blk_trace, 1); if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) return 0; blkcg_css = bio_blkcg_css(bio); if (!blkcg_css) return 0; return cgroup_id(blkcg_css->cgroup); } #else static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) { return 0; } #endif static u64 blk_trace_request_get_cgid(struct request *rq) { if (!rq->bio) return 0; /* Use the first bio */ return blk_trace_bio_get_cgid(rq->q, rq->bio); } /* * blktrace probes */ /** * blk_add_trace_rq - Add a trace for a request oriented action * @rq: the source request * @error: return status to log * @nr_bytes: number of completed bytes * @what: the action * @cgid: the cgroup info * * Description: * Records an action against a request. Will log the bio offset + size. * **/ static void blk_add_trace_rq(struct request *rq, blk_status_t error, unsigned int nr_bytes, u64 what, u64 cgid) { struct blk_trace *bt; rcu_read_lock(); bt = rcu_dereference(rq->q->blk_trace); if (likely(!bt)) { rcu_read_unlock(); return; } if (blk_rq_is_passthrough(rq)) what |= BLK_TC_ACT(BLK_TC_PC); else what |= BLK_TC_ACT(BLK_TC_FS); __blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, rq->cmd_flags, what, blk_status_to_errno(error), 0, NULL, cgid); rcu_read_unlock(); } static void blk_add_trace_rq_insert(void *ignore, struct request *rq) { blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_INSERT, blk_trace_request_get_cgid(rq)); } static void blk_add_trace_rq_issue(void *ignore, struct request *rq) { blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ISSUE, blk_trace_request_get_cgid(rq)); } static void blk_add_trace_rq_merge(void *ignore, struct request *rq) { blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_BACKMERGE, blk_trace_request_get_cgid(rq)); } static void blk_add_trace_rq_requeue(void *ignore, struct request *rq) { blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_REQUEUE, blk_trace_request_get_cgid(rq)); } static void blk_add_trace_rq_complete(void *ignore, struct request *rq, blk_status_t error, unsigned int nr_bytes) { blk_add_trace_rq(rq, error, nr_bytes, BLK_TA_COMPLETE, blk_trace_request_get_cgid(rq)); } static void blk_add_trace_zone_update_request(void *ignore, struct request *rq) { struct blk_trace *bt; rcu_read_lock(); bt = rcu_dereference(rq->q->blk_trace); if (likely(!bt) || bt->version < 2) { rcu_read_unlock(); return; } rcu_read_unlock(); blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ZONE_APPEND, blk_trace_request_get_cgid(rq)); } /** * blk_add_trace_bio - Add a trace for a bio oriented action * @q: queue the io is for * @bio: the source bio * @what: the action * @error: error, if any * * Description: * Records an action against a bio. Will log the bio offset + size. * **/ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, u64 what, int error) { struct blk_trace *bt; rcu_read_lock(); bt = rcu_dereference(q->blk_trace); if (likely(!bt)) { rcu_read_unlock(); return; } __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, bio->bi_opf, what, error, 0, NULL, blk_trace_bio_get_cgid(q, bio)); rcu_read_unlock(); } static void blk_add_trace_bio_complete(void *ignore, struct request_queue *q, struct bio *bio) { blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, blk_status_to_errno(bio->bi_status)); } static void blk_add_trace_bio_backmerge(void *ignore, struct bio *bio) { blk_add_trace_bio(bio->bi_bdev->bd_disk->queue, bio, BLK_TA_BACKMERGE, 0); } static void blk_add_trace_bio_frontmerge(void *ignore, struct bio *bio) { blk_add_trace_bio(bio->bi_bdev->bd_disk->queue, bio, BLK_TA_FRONTMERGE, 0); } static void blk_add_trace_bio_queue(void *ignore, struct bio *bio) { blk_add_trace_bio(bio->bi_bdev->bd_disk->queue, bio, BLK_TA_QUEUE, 0); } static void blk_add_trace_getrq(void *ignore, struct bio *bio) { blk_add_trace_bio(bio->bi_bdev->bd_disk->queue, bio, BLK_TA_GETRQ, 0); } static void blk_add_trace_plug(void *ignore, struct request_queue *q) { struct blk_trace *bt; rcu_read_lock(); bt = rcu_dereference(q->blk_trace); if (bt) __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0); rcu_read_unlock(); } static void blk_add_trace_unplug(void *ignore, struct request_queue *q, unsigned int depth, bool explicit) { struct blk_trace *bt; rcu_read_lock(); bt = rcu_dereference(q->blk_trace); if (bt) { __be64 rpdu = cpu_to_be64(depth); u64 what; if (explicit) what = BLK_TA_UNPLUG_IO; else what = BLK_TA_UNPLUG_TIMER; __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0); } rcu_read_unlock(); } static void blk_add_trace_zone_plug(void *ignore, struct request_queue *q, unsigned int zno, sector_t sector, unsigned int sectors) { struct blk_trace *bt; rcu_read_lock(); bt = rcu_dereference(q->blk_trace); if (bt && bt->version >= 2) __blk_add_trace(bt, sector, sectors << SECTOR_SHIFT, 0, BLK_TA_ZONE_PLUG, 0, 0, NULL, 0); rcu_read_unlock(); return; } static void blk_add_trace_zone_unplug(void *ignore, struct request_queue *q, unsigned int zno, sector_t sector, unsigned int sectors) { struct blk_trace *bt; rcu_read_lock(); bt = rcu_dereference(q->blk_trace); if (bt && bt->version >= 2) __blk_add_trace(bt, sector, sectors << SECTOR_SHIFT, 0, BLK_TA_ZONE_UNPLUG, 0, 0, NULL, 0); rcu_read_unlock(); return; } static void blk_add_trace_split(void *ignore, struct bio *bio, unsigned int pdu) { struct request_queue *q = bio->bi_bdev->bd_disk->queue; struct blk_trace *bt; rcu_read_lock(); bt = rcu_dereference(q->blk_trace); if (bt) { __be64 rpdu = cpu_to_be64(pdu); __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, bio->bi_opf, BLK_TA_SPLIT, blk_status_to_errno(bio->bi_status), sizeof(rpdu), &rpdu, blk_trace_bio_get_cgid(q, bio)); } rcu_read_unlock(); } /** * blk_add_trace_bio_remap - Add a trace for a bio-remap operation * @ignore: trace callback data parameter (not used) * @bio: the source bio * @dev: source device * @from: source sector * * Called after a bio is remapped to a different device and/or sector. **/ static void blk_add_trace_bio_remap(void *ignore, struct bio *bio, dev_t dev, sector_t from) { struct request_queue *q = bio->bi_bdev->bd_disk->queue; struct blk_trace *bt; struct blk_io_trace_remap r; rcu_read_lock(); bt = rcu_dereference(q->blk_trace); if (likely(!bt)) { rcu_read_unlock(); return; } r.device_from = cpu_to_be32(dev); r.device_to = cpu_to_be32(bio_dev(bio)); r.sector_from = cpu_to_be64(from); __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, bio->bi_opf, BLK_TA_REMAP, blk_status_to_errno(bio->bi_status), sizeof(r), &r, blk_trace_bio_get_cgid(q, bio)); rcu_read_unlock(); } /** * blk_add_trace_rq_remap - Add a trace for a request-remap operation * @ignore: trace callback data parameter (not used) * @rq: the source request * @dev: target device * @from: source sector * * Description: * Device mapper remaps request to other devices. * Add a trace for that action. * **/ static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev, sector_t from) { struct blk_trace *bt; struct blk_io_trace_remap r; rcu_read_lock(); bt = rcu_dereference(rq->q->blk_trace); if (likely(!bt)) { rcu_read_unlock(); return; } r.device_from = cpu_to_be32(dev); r.device_to = cpu_to_be32(disk_devt(rq->q->disk)); r.sector_from = cpu_to_be64(from); __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rq->cmd_flags, BLK_TA_REMAP, 0, sizeof(r), &r, blk_trace_request_get_cgid(rq)); rcu_read_unlock(); } /** * blk_add_driver_data - Add binary message with driver-specific data * @rq: io request * @data: driver-specific data * @len: length of driver-specific data * * Description: * Some drivers might want to write driver-specific data per request. * **/ void blk_add_driver_data(struct request *rq, void *data, size_t len) { struct blk_trace *bt; rcu_read_lock(); bt = rcu_dereference(rq->q->blk_trace); if (likely(!bt)) { rcu_read_unlock(); return; } __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, BLK_TA_DRV_DATA, 0, len, data, blk_trace_request_get_cgid(rq)); rcu_read_unlock(); } EXPORT_SYMBOL_GPL(blk_add_driver_data); static void blk_register_tracepoints(void) { int ret; ret = register_trace_block_rq_insert(blk_add_trace_rq_insert, NULL); WARN_ON(ret); ret = register_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); WARN_ON(ret); ret = register_trace_block_rq_merge(blk_add_trace_rq_merge, NULL); WARN_ON(ret); ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); WARN_ON(ret); ret = register_trace_block_rq_complete(blk_add_trace_rq_complete, NULL); WARN_ON(ret); ret = register_trace_block_bio_complete(blk_add_trace_bio_complete, NULL); WARN_ON(ret); ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL); WARN_ON(ret); ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL); WARN_ON(ret); ret = register_trace_block_bio_queue(blk_add_trace_bio_queue, NULL); WARN_ON(ret); ret = register_trace_block_getrq(blk_add_trace_getrq, NULL); WARN_ON(ret); ret = register_trace_blk_zone_append_update_request_bio( blk_add_trace_zone_update_request, NULL); WARN_ON(ret); ret = register_trace_disk_zone_wplug_add_bio(blk_add_trace_zone_plug, NULL); WARN_ON(ret); ret = register_trace_blk_zone_wplug_bio(blk_add_trace_zone_unplug, NULL); WARN_ON(ret); ret = register_trace_block_plug(blk_add_trace_plug, NULL); WARN_ON(ret); ret = register_trace_block_unplug(blk_add_trace_unplug, NULL); WARN_ON(ret); ret = register_trace_block_split(blk_add_trace_split, NULL); WARN_ON(ret); ret = register_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); WARN_ON(ret); ret = register_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); WARN_ON(ret); } static void blk_unregister_tracepoints(void) { unregister_trace_block_rq_remap(blk_add_trace_rq_remap, NULL); unregister_trace_block_bio_remap(blk_add_trace_bio_remap, NULL); unregister_trace_block_split(blk_add_trace_split, NULL); unregister_trace_block_unplug(blk_add_trace_unplug, NULL); unregister_trace_block_plug(blk_add_trace_plug, NULL); unregister_trace_blk_zone_wplug_bio(blk_add_trace_zone_unplug, NULL); unregister_trace_disk_zone_wplug_add_bio(blk_add_trace_zone_plug, NULL); unregister_trace_blk_zone_append_update_request_bio( blk_add_trace_zone_update_request, NULL); unregister_trace_block_getrq(blk_add_trace_getrq, NULL); unregister_trace_block_bio_queue(blk_add_trace_bio_queue, NULL); unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL); unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge, NULL); unregister_trace_block_bio_complete(blk_add_trace_bio_complete, NULL); unregister_trace_block_rq_complete(blk_add_trace_rq_complete, NULL); unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue, NULL); unregister_trace_block_rq_merge(blk_add_trace_rq_merge, NULL); unregister_trace_block_rq_issue(blk_add_trace_rq_issue, NULL); unregister_trace_block_rq_insert(blk_add_trace_rq_insert, NULL); tracepoint_synchronize_unregister(); } /* * struct blk_io_tracer formatting routines */ static void fill_rwbs(char *rwbs, const struct blk_io_trace2 *t) { int i = 0; int tc = t->action >> BLK_TC_SHIFT; if ((t->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE) { rwbs[i++] = 'N'; goto out; } if (tc & BLK_TC_FLUSH) rwbs[i++] = 'F'; if (tc & BLK_TC_DISCARD) rwbs[i++] = 'D'; else if (tc & BLK_TC_WRITE_ZEROES) { rwbs[i++] = 'W'; rwbs[i++] = 'Z'; } else if (tc & BLK_TC_WRITE) rwbs[i++] = 'W'; else if (t->bytes) rwbs[i++] = 'R'; else rwbs[i++] = 'N'; if (tc & BLK_TC_FUA) rwbs[i++] = 'F'; if (tc & BLK_TC_AHEAD) rwbs[i++] = 'A'; if (tc & BLK_TC_SYNC) rwbs[i++] = 'S'; if (tc & BLK_TC_META) rwbs[i++] = 'M'; out: rwbs[i] = '\0'; } static inline const struct blk_io_trace2 *te_blk_io_trace(const struct trace_entry *ent) { return (const struct blk_io_trace2 *)ent; } static inline const void *pdu_start(const struct trace_entry *ent, bool has_cg) { return (void *)(te_blk_io_trace(ent) + 1) + (has_cg ? sizeof(u64) : 0); } static inline u64 t_cgid(const struct trace_entry *ent) { return *(u64 *)(te_blk_io_trace(ent) + 1); } static inline int pdu_real_len(const struct trace_entry *ent, bool has_cg) { return te_blk_io_trace(ent)->pdu_len - (has_cg ? sizeof(u64) : 0); } static inline u32 t_action(const struct trace_entry *ent) { return te_blk_io_trace(ent)->action; } static inline u32 t_bytes(const struct trace_entry *ent) { return te_blk_io_trace(ent)->bytes; } static inline u32 t_sec(const struct trace_entry *ent) { return te_blk_io_trace(ent)->bytes >> 9; } static inline unsigned long long t_sector(const struct trace_entry *ent) { return te_blk_io_trace(ent)->sector; } static inline __u16 t_error(const struct trace_entry *ent) { return te_blk_io_trace(ent)->error; } static __u64 get_pdu_int(const struct trace_entry *ent, bool has_cg) { const __be64 *val = pdu_start(ent, has_cg); return be64_to_cpu(*val); } typedef void (blk_log_action_t) (struct trace_iterator *iter, const char *act, bool has_cg); static void blk_log_action_classic(struct trace_iterator *iter, const char *act, bool has_cg) { char rwbs[RWBS_LEN]; unsigned long long ts = iter->ts; unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC); unsigned secs = (unsigned long)ts; const struct blk_io_trace2 *t = te_blk_io_trace(iter->ent); fill_rwbs(rwbs, t); trace_seq_printf(&iter->seq, "%3d,%-3d %2d %5d.%09lu %5u %2s %3s ", MAJOR(t->device), MINOR(t->device), iter->cpu, secs, nsec_rem, iter->ent->pid, act, rwbs); } static void blk_log_action(struct trace_iterator *iter, const char *act, bool has_cg) { char rwbs[RWBS_LEN]; const struct blk_io_trace2 *t = te_blk_io_trace(iter->ent); fill_rwbs(rwbs, t); if (has_cg) { u64 id = t_cgid(iter->ent); if (blk_tracer_flags.val & TRACE_BLK_OPT_CGNAME) { char blkcg_name_buf[NAME_MAX + 1] = "<...>"; cgroup_path_from_kernfs_id(id, blkcg_name_buf, sizeof(blkcg_name_buf)); trace_seq_printf(&iter->seq, "%3d,%-3d %s %2s %3s ", MAJOR(t->device), MINOR(t->device), blkcg_name_buf, act, rwbs); } else { /* * The cgid portion used to be "INO,GEN". Userland * builds a FILEID_INO32_GEN fid out of them and * opens the cgroup using open_by_handle_at(2). * While 32bit ino setups are still the same, 64bit * ones now use the 64bit ino as the whole ID and * no longer use generation. * * Regardless of the content, always output * "LOW32,HIGH32" so that FILEID_INO32_GEN fid can * be mapped back to @id on both 64 and 32bit ino * setups. See __kernfs_fh_to_dentry(). */ trace_seq_printf(&iter->seq, "%3d,%-3d %llx,%-llx %2s %3s ", MAJOR(t->device), MINOR(t->device), id & U32_MAX, id >> 32, act, rwbs); } } else trace_seq_printf(&iter->seq, "%3d,%-3d %2s %3s ", MAJOR(t->device), MINOR(t->device), act, rwbs); } static void blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent, bool has_cg) { const unsigned char *pdu_buf; int pdu_len; int i, end; pdu_buf = pdu_start(ent, has_cg); pdu_len = pdu_real_len(ent, has_cg); if (!pdu_len) return; /* find the last zero that needs to be printed */ for (end = pdu_len - 1; end >= 0; end--) if (pdu_buf[end]) break; end++; trace_seq_putc(s, '('); for (i = 0; i < pdu_len; i++) { trace_seq_printf(s, "%s%02x", i == 0 ? "" : " ", pdu_buf[i]); /* * stop when the rest is just zeros and indicate so * with a ".." appended */ if (i == end && end != pdu_len - 1) { trace_seq_puts(s, " ..) "); return; } } trace_seq_puts(s, ") "); } static void blk_log_generic(struct trace_seq *s, const struct trace_entry *ent, bool has_cg) { char cmd[TASK_COMM_LEN]; trace_find_cmdline(ent->pid, cmd); if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { trace_seq_printf(s, "%u ", t_bytes(ent)); blk_log_dump_pdu(s, ent, has_cg); trace_seq_printf(s, "[%s]\n", cmd); } else { if (t_sec(ent)) trace_seq_printf(s, "%llu + %u [%s]\n", t_sector(ent), t_sec(ent), cmd); else trace_seq_printf(s, "[%s]\n", cmd); } } static void blk_log_with_error(struct trace_seq *s, const struct trace_entry *ent, bool has_cg) { if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) { blk_log_dump_pdu(s, ent, has_cg); trace_seq_printf(s, "[%d]\n", t_error(ent)); } else { if (t_sec(ent)) trace_seq_printf(s, "%llu + %u [%d]\n", t_sector(ent), t_sec(ent), t_error(ent)); else trace_seq_printf(s, "%llu [%d]\n", t_sector(ent), t_error(ent)); } } static void blk_log_remap(struct trace_seq *s, const struct trace_entry *ent, bool has_cg) { const struct blk_io_trace_remap *__r = pdu_start(ent, has_cg); trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n", t_sector(ent), t_sec(ent), MAJOR(be32_to_cpu(__r->device_from)), MINOR(be32_to_cpu(__r->device_from)), be64_to_cpu(__r->sector_from)); } static void blk_log_plug(struct trace_seq *s, const struct trace_entry *ent, bool has_cg) { char cmd[TASK_COMM_LEN]; trace_find_cmdline(ent->pid, cmd); trace_seq_printf(s, "[%s]\n", cmd); } static void blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent, bool has_cg) { char cmd[TASK_COMM_LEN]; trace_find_cmdline(ent->pid, cmd); trace_seq_printf(s, "[%s] %llu\n", cmd, get_pdu_int(ent, has_cg)); } static void blk_log_split(struct trace_seq *s, const struct trace_entry *ent, bool has_cg) { char cmd[TASK_COMM_LEN]; trace_find_cmdline(ent->pid, cmd); trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent), get_pdu_int(ent, has_cg), cmd); } static void blk_log_msg(struct trace_seq *s, const struct trace_entry *ent, bool has_cg) { trace_seq_putmem(s, pdu_start(ent, has_cg), pdu_real_len(ent, has_cg)); trace_seq_putc(s, '\n'); } /* * struct tracer operations */ static void blk_tracer_print_header(struct seq_file *m) { if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) return; seq_puts(m, "# DEV CPU TIMESTAMP PID ACT FLG\n" "# | | | | | |\n"); } static void blk_tracer_start(struct trace_array *tr) { blk_tracer_enabled = true; } static int blk_tracer_init(struct trace_array *tr) { blk_tr = tr; blk_tracer_start(tr); return 0; } static void blk_tracer_stop(struct trace_array *tr) { blk_tracer_enabled = false; } static void blk_tracer_reset(struct trace_array *tr) { blk_tracer_stop(tr); } static const struct { const char *act[2]; void (*print)(struct trace_seq *s, const struct trace_entry *ent, bool has_cg); } what2act[] = { [__BLK_TA_QUEUE] = {{ "Q", "queue" }, blk_log_generic }, [__BLK_TA_BACKMERGE] = {{ "M", "backmerge" }, blk_log_generic }, [__BLK_TA_FRONTMERGE] = {{ "F", "frontmerge" }, blk_log_generic }, [__BLK_TA_GETRQ] = {{ "G", "getrq" }, blk_log_generic }, [__BLK_TA_SLEEPRQ] = {{ "S", "sleeprq" }, blk_log_generic }, [__BLK_TA_REQUEUE] = {{ "R", "requeue" }, blk_log_with_error }, [__BLK_TA_ISSUE] = {{ "D", "issue" }, blk_log_generic }, [__BLK_TA_COMPLETE] = {{ "C", "complete" }, blk_log_with_error }, [__BLK_TA_PLUG] = {{ "P", "plug" }, blk_log_plug }, [__BLK_TA_UNPLUG_IO] = {{ "U", "unplug_io" }, blk_log_unplug }, [__BLK_TA_UNPLUG_TIMER] = {{ "UT", "unplug_timer" }, blk_log_unplug }, [__BLK_TA_INSERT] = {{ "I", "insert" }, blk_log_generic }, [__BLK_TA_SPLIT] = {{ "X", "split" }, blk_log_split }, [__BLK_TA_REMAP] = {{ "A", "remap" }, blk_log_remap }, }; static enum print_line_t print_one_line(struct trace_iterator *iter, bool classic) { struct trace_array *tr = iter->tr; struct trace_seq *s = &iter->seq; const struct blk_io_trace2 *t; u16 what; bool long_act; blk_log_action_t *log_action; bool has_cg; t = te_blk_io_trace(iter->ent); what = (t->action & ((1 << BLK_TC_SHIFT) - 1)) & ~__BLK_TA_CGROUP; long_act = !!(tr->trace_flags & TRACE_ITER(VERBOSE)); log_action = classic ? &blk_log_action_classic : &blk_log_action; has_cg = t->action & __BLK_TA_CGROUP; if ((t->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE) { log_action(iter, long_act ? "message" : "m", has_cg); blk_log_msg(s, iter->ent, has_cg); return trace_handle_return(s); } if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act))) trace_seq_printf(s, "Unknown action %x\n", what); else { log_action(iter, what2act[what].act[long_act], has_cg); what2act[what].print(s, iter->ent, has_cg); } return trace_handle_return(s); } static enum print_line_t blk_trace_event_print(struct trace_iterator *iter, int flags, struct trace_event *event) { return print_one_line(iter, false); } static void blk_trace_synthesize_old_trace(struct trace_iterator *iter) { struct trace_seq *s = &iter->seq; struct blk_io_trace2 *t = (struct blk_io_trace2 *)iter->ent; const int offset = offsetof(struct blk_io_trace2, sector); struct blk_io_trace old = { .magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION, .time = iter->ts, }; trace_seq_putmem(s, &old, offset); trace_seq_putmem(s, &t->sector, sizeof(old) - offset + t->pdu_len); } static enum print_line_t blk_trace_event_print_binary(struct trace_iterator *iter, int flags, struct trace_event *event) { blk_trace_synthesize_old_trace(iter); return trace_handle_return(&iter->seq); } static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter) { if ((iter->ent->type != TRACE_BLK) || !(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC)) return TRACE_TYPE_UNHANDLED; return print_one_line(iter, true); } static int blk_tracer_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) { /* don't output context-info for blk_classic output */ if (bit == TRACE_BLK_OPT_CLASSIC) { if (set) tr->trace_flags &= ~TRACE_ITER(CONTEXT_INFO); else tr->trace_flags |= TRACE_ITER(CONTEXT_INFO); } return 0; } static struct tracer blk_tracer __read_mostly = { .name = "blk", .init = blk_tracer_init, .reset = blk_tracer_reset, .start = blk_tracer_start, .stop = blk_tracer_stop, .print_header = blk_tracer_print_header, .print_line = blk_tracer_print_line, .flags = &blk_tracer_flags, .set_flag = blk_tracer_set_flag, }; static struct trace_event_functions trace_blk_event_funcs = { .trace = blk_trace_event_print, .binary = blk_trace_event_print_binary, }; static struct trace_event trace_blk_event = { .type = TRACE_BLK, .funcs = &trace_blk_event_funcs, }; static int __init init_blk_tracer(void) { if (!register_trace_event(&trace_blk_event)) { pr_warn("Warning: could not register block events\n"); return 1; } if (register_tracer(&blk_tracer) != 0) { pr_warn("Warning: could not register the block tracer\n"); unregister_trace_event(&trace_blk_event); return 1; } BUILD_BUG_ON(__alignof__(struct blk_user_trace_setup2) % __alignof__(long)); BUILD_BUG_ON(__alignof__(struct blk_io_trace2) % __alignof__(long)); return 0; } device_initcall(init_blk_tracer); static int blk_trace_remove_queue(struct request_queue *q) { struct blk_trace *bt; bt = rcu_replace_pointer(q->blk_trace, NULL, lockdep_is_held(&q->debugfs_mutex)); if (bt == NULL) return -EINVAL; blk_trace_stop(bt); put_probe_ref(); synchronize_rcu(); blk_trace_free(q, bt); return 0; } /* * Setup everything required to start tracing */ static int blk_trace_setup_queue(struct request_queue *q, struct block_device *bdev) { struct blk_trace *bt = NULL; int ret = -ENOMEM; bt = kzalloc(sizeof(*bt), GFP_KERNEL); if (!bt) return -ENOMEM; bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char)); if (!bt->msg_data) goto free_bt; bt->dev = bdev->bd_dev; bt->act_mask = (u16)-1; blk_trace_setup_lba(bt, bdev); rcu_assign_pointer(q->blk_trace, bt); get_probe_ref(); return 0; free_bt: blk_trace_free(q, bt); return ret; } /* * sysfs interface to enable and configure tracing */ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, struct device_attribute *attr, char *buf); static ssize_t sysfs_blk_trace_attr_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count); #define BLK_TRACE_DEVICE_ATTR(_name) \ DEVICE_ATTR(_name, S_IRUGO | S_IWUSR, \ sysfs_blk_trace_attr_show, \ sysfs_blk_trace_attr_store) static BLK_TRACE_DEVICE_ATTR(enable); static BLK_TRACE_DEVICE_ATTR(act_mask); static BLK_TRACE_DEVICE_ATTR(pid); static BLK_TRACE_DEVICE_ATTR(start_lba); static BLK_TRACE_DEVICE_ATTR(end_lba); static struct attribute *blk_trace_attrs[] = { &dev_attr_enable.attr, &dev_attr_act_mask.attr, &dev_attr_pid.attr, &dev_attr_start_lba.attr, &dev_attr_end_lba.attr, NULL }; struct attribute_group blk_trace_attr_group = { .name = "trace", .attrs = blk_trace_attrs, }; static const struct { int mask; const char *str; } mask_maps[] = { { BLK_TC_READ, "read" }, { BLK_TC_WRITE, "write" }, { BLK_TC_FLUSH, "flush" }, { BLK_TC_SYNC, "sync" }, { BLK_TC_QUEUE, "queue" }, { BLK_TC_REQUEUE, "requeue" }, { BLK_TC_ISSUE, "issue" }, { BLK_TC_COMPLETE, "complete" }, { BLK_TC_FS, "fs" }, { BLK_TC_PC, "pc" }, { BLK_TC_NOTIFY, "notify" }, { BLK_TC_AHEAD, "ahead" }, { BLK_TC_META, "meta" }, { BLK_TC_DISCARD, "discard" }, { BLK_TC_DRV_DATA, "drv_data" }, { BLK_TC_FUA, "fua" }, { BLK_TC_WRITE_ZEROES, "write-zeroes" }, }; static int blk_trace_str2mask(const char *str) { int i; int mask = 0; char *buf, *s, *token; buf = kstrdup(str, GFP_KERNEL); if (buf == NULL) return -ENOMEM; s = strstrip(buf); while (1) { token = strsep(&s, ","); if (token == NULL) break; if (*token == '\0') continue; for (i = 0; i < ARRAY_SIZE(mask_maps); i++) { if (strcasecmp(token, mask_maps[i].str) == 0) { mask |= mask_maps[i].mask; break; } } if (i == ARRAY_SIZE(mask_maps)) { mask = -EINVAL; break; } } kfree(buf); return mask; } static ssize_t blk_trace_mask2str(char *buf, int mask) { int i; char *p = buf; for (i = 0; i < ARRAY_SIZE(mask_maps); i++) { if (mask & mask_maps[i].mask) { p += sprintf(p, "%s%s", (p == buf) ? "" : ",", mask_maps[i].str); } } *p++ = '\n'; return p - buf; } static ssize_t sysfs_blk_trace_attr_show(struct device *dev, struct device_attribute *attr, char *buf) { struct block_device *bdev = dev_to_bdev(dev); struct request_queue *q = bdev_get_queue(bdev); struct blk_trace *bt; ssize_t ret = -ENXIO; mutex_lock(&q->debugfs_mutex); bt = rcu_dereference_protected(q->blk_trace, lockdep_is_held(&q->debugfs_mutex)); if (attr == &dev_attr_enable) { ret = sprintf(buf, "%u\n", !!bt); goto out_unlock_bdev; } if (bt == NULL) ret = sprintf(buf, "disabled\n"); else if (attr == &dev_attr_act_mask) ret = blk_trace_mask2str(buf, bt->act_mask); else if (attr == &dev_attr_pid) ret = sprintf(buf, "%u\n", bt->pid); else if (attr == &dev_attr_start_lba) ret = sprintf(buf, "%llu\n", bt->start_lba); else if (attr == &dev_attr_end_lba) ret = sprintf(buf, "%llu\n", bt->end_lba); out_unlock_bdev: mutex_unlock(&q->debugfs_mutex); return ret; } static ssize_t sysfs_blk_trace_attr_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct block_device *bdev = dev_to_bdev(dev); struct request_queue *q = bdev_get_queue(bdev); struct blk_trace *bt; u64 value; ssize_t ret = -EINVAL; if (count == 0) goto out; if (attr == &dev_attr_act_mask) { if (kstrtoull(buf, 0, &value)) { /* Assume it is a list of trace category names */ ret = blk_trace_str2mask(buf); if (ret < 0) goto out; value = ret; } } else { if (kstrtoull(buf, 0, &value)) goto out; } mutex_lock(&q->debugfs_mutex); bt = rcu_dereference_protected(q->blk_trace, lockdep_is_held(&q->debugfs_mutex)); if (attr == &dev_attr_enable) { if (!!value == !!bt) { ret = 0; goto out_unlock_bdev; } if (value) ret = blk_trace_setup_queue(q, bdev); else ret = blk_trace_remove_queue(q); goto out_unlock_bdev; } ret = 0; if (bt == NULL) { ret = blk_trace_setup_queue(q, bdev); bt = rcu_dereference_protected(q->blk_trace, lockdep_is_held(&q->debugfs_mutex)); } if (ret == 0) { if (attr == &dev_attr_act_mask) bt->act_mask = value; else if (attr == &dev_attr_pid) bt->pid = value; else if (attr == &dev_attr_start_lba) bt->start_lba = value; else if (attr == &dev_attr_end_lba) bt->end_lba = value; } out_unlock_bdev: mutex_unlock(&q->debugfs_mutex); out: return ret ? ret : count; } #endif /* CONFIG_BLK_DEV_IO_TRACE */ #ifdef CONFIG_EVENT_TRACING /** * blk_fill_rwbs - Fill the buffer rwbs by mapping op to character string. * @rwbs: buffer to be filled * @opf: request operation type (REQ_OP_XXX) and flags for the tracepoint * * Description: * Maps each request operation and flag to a single character and fills the * buffer provided by the caller with resulting string. * **/ void blk_fill_rwbs(char *rwbs, blk_opf_t opf) { int i = 0; if (opf & REQ_PREFLUSH) rwbs[i++] = 'F'; switch (opf & REQ_OP_MASK) { case REQ_OP_WRITE: rwbs[i++] = 'W'; break; case REQ_OP_DISCARD: rwbs[i++] = 'D'; break; case REQ_OP_SECURE_ERASE: rwbs[i++] = 'D'; rwbs[i++] = 'E'; break; case REQ_OP_FLUSH: rwbs[i++] = 'F'; break; case REQ_OP_READ: rwbs[i++] = 'R'; break; case REQ_OP_ZONE_APPEND: rwbs[i++] = 'Z'; rwbs[i++] = 'A'; break; case REQ_OP_ZONE_RESET: case REQ_OP_ZONE_RESET_ALL: rwbs[i++] = 'Z'; rwbs[i++] = 'R'; if ((opf & REQ_OP_MASK) == REQ_OP_ZONE_RESET_ALL) rwbs[i++] = 'A'; break; case REQ_OP_ZONE_FINISH: rwbs[i++] = 'Z'; rwbs[i++] = 'F'; break; case REQ_OP_ZONE_OPEN: rwbs[i++] = 'Z'; rwbs[i++] = 'O'; break; case REQ_OP_ZONE_CLOSE: rwbs[i++] = 'Z'; rwbs[i++] = 'C'; break; case REQ_OP_WRITE_ZEROES: rwbs[i++] = 'W'; rwbs[i++] = 'Z'; break; default: rwbs[i++] = 'N'; } if (opf & REQ_FUA) rwbs[i++] = 'F'; if (opf & REQ_RAHEAD) rwbs[i++] = 'A'; if (opf & REQ_SYNC) rwbs[i++] = 'S'; if (opf & REQ_META) rwbs[i++] = 'M'; if (opf & REQ_ATOMIC) rwbs[i++] = 'U'; WARN_ON_ONCE(i >= RWBS_LEN); rwbs[i] = '\0'; } EXPORT_SYMBOL_GPL(blk_fill_rwbs); #endif /* CONFIG_EVENT_TRACING */
3 3 3 3 3 3 3 3 3 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2005 Silicon Graphics, Inc. * All Rights Reserved. */ #include "xfs_platform.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_ialloc.h" #include "xfs_alloc.h" #include "xfs_error.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_bmap_btree.h" #include "xfs_alloc_btree.h" #include "xfs_log.h" #include "xfs_rmap_btree.h" #include "xfs_refcount_btree.h" #include "xfs_da_format.h" #include "xfs_health.h" #include "xfs_ag.h" #include "xfs_rtbitmap.h" #include "xfs_exchrange.h" #include "xfs_rtgroup.h" #include "xfs_rtrmap_btree.h" #include "xfs_rtrefcount_btree.h" /* * Physical superblock buffer manipulations. Shared with libxfs in userspace. */ /* * Check that all the V4 feature bits that the V5 filesystem format requires are * correctly set. */ static bool xfs_sb_validate_v5_features( struct xfs_sb *sbp) { /* We must not have any unknown V4 feature bits set */ if (sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) return false; /* * The CRC bit is considered an invalid V4 flag, so we have to add it * manually to the OKBITS mask. */ if (sbp->sb_features2 & ~(XFS_SB_VERSION2_OKBITS | XFS_SB_VERSION2_CRCBIT)) return false; /* Now check all the required V4 feature flags are set. */ #define V5_VERS_FLAGS (XFS_SB_VERSION_NLINKBIT | \ XFS_SB_VERSION_ALIGNBIT | \ XFS_SB_VERSION_LOGV2BIT | \ XFS_SB_VERSION_EXTFLGBIT | \ XFS_SB_VERSION_DIRV2BIT | \ XFS_SB_VERSION_MOREBITSBIT) #define V5_FEAT_FLAGS (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ XFS_SB_VERSION2_ATTR2BIT | \ XFS_SB_VERSION2_PROJID32BIT | \ XFS_SB_VERSION2_CRCBIT) if ((sbp->sb_versionnum & V5_VERS_FLAGS) != V5_VERS_FLAGS) return false; if ((sbp->sb_features2 & V5_FEAT_FLAGS) != V5_FEAT_FLAGS) return false; return true; } /* * We current support XFS v5 formats with known features and v4 superblocks with * at least V2 directories. */ bool xfs_sb_good_version( struct xfs_sb *sbp) { /* * All v5 filesystems are supported, but we must check that all the * required v4 feature flags are enabled correctly as the code checks * those flags and not for v5 support. */ if (xfs_sb_is_v5(sbp)) return xfs_sb_validate_v5_features(sbp); /* versions prior to v4 are not supported */ if (XFS_SB_VERSION_NUM(sbp) != XFS_SB_VERSION_4) return false; /* We must not have any unknown v4 feature bits set */ if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) || ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && (sbp->sb_features2 & ~XFS_SB_VERSION2_OKBITS))) return false; /* V4 filesystems need v2 directories and unwritten extents */ if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT)) return false; if (!(sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT)) return false; /* It's a supported v4 filesystem */ return true; } uint64_t xfs_sb_version_to_features( struct xfs_sb *sbp) { uint64_t features = 0; /* optional V4 features */ if (sbp->sb_rblocks > 0) features |= XFS_FEAT_REALTIME; if (sbp->sb_versionnum & XFS_SB_VERSION_NLINKBIT) features |= XFS_FEAT_NLINK; if (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT) features |= XFS_FEAT_ATTR; if (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT) features |= XFS_FEAT_QUOTA; if (sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT) features |= XFS_FEAT_ALIGN; if (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT) features |= XFS_FEAT_LOGV2; if (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT) features |= XFS_FEAT_DALIGN; if (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT) features |= XFS_FEAT_EXTFLG; if (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT) features |= XFS_FEAT_SECTOR; if (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT) features |= XFS_FEAT_ASCIICI; if (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) { if (sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT) features |= XFS_FEAT_LAZYSBCOUNT; if (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT) features |= XFS_FEAT_PROJID32; if (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE) features |= XFS_FEAT_FTYPE; } if (!xfs_sb_is_v5(sbp)) return features; /* Always on V5 features */ features |= XFS_FEAT_ALIGN | XFS_FEAT_LOGV2 | XFS_FEAT_EXTFLG | XFS_FEAT_LAZYSBCOUNT | XFS_FEAT_PROJID32 | XFS_FEAT_V3INODES | XFS_FEAT_CRC | XFS_FEAT_PQUOTINO; /* Optional V5 features */ if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT) features |= XFS_FEAT_FINOBT; if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_RMAPBT) features |= XFS_FEAT_RMAPBT; if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_REFLINK) features |= XFS_FEAT_REFLINK; if (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_INOBTCNT) features |= XFS_FEAT_INOBTCNT; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_FTYPE) features |= XFS_FEAT_FTYPE; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_SPINODES) features |= XFS_FEAT_SPINODES; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID) features |= XFS_FEAT_META_UUID; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_BIGTIME) features |= XFS_FEAT_BIGTIME; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR) features |= XFS_FEAT_NEEDSREPAIR; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_NREXT64) features |= XFS_FEAT_NREXT64; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_EXCHRANGE) features |= XFS_FEAT_EXCHANGE_RANGE; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_PARENT) features |= XFS_FEAT_PARENT; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) features |= XFS_FEAT_METADIR; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_ZONED) features |= XFS_FEAT_ZONED; return features; } /* Check all the superblock fields we care about when reading one in. */ STATIC int xfs_validate_sb_read( struct xfs_mount *mp, struct xfs_sb *sbp) { if (!xfs_sb_is_v5(sbp)) return 0; /* * Version 5 superblock feature mask validation. Reject combinations * the kernel cannot support up front before checking anything else. */ if (xfs_sb_has_compat_feature(sbp, XFS_SB_FEAT_COMPAT_UNKNOWN)) { xfs_warn(mp, "Superblock has unknown compatible features (0x%x) enabled.", (sbp->sb_features_compat & XFS_SB_FEAT_COMPAT_UNKNOWN)); xfs_warn(mp, "Using a more recent kernel is recommended."); } if (xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { xfs_alert(mp, "Superblock has unknown read-only compatible features (0x%x) enabled.", (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); if (!xfs_is_readonly(mp)) { xfs_warn(mp, "Attempted to mount read-only compatible filesystem read-write."); xfs_warn(mp, "Filesystem can only be safely mounted read only."); return -EINVAL; } } if (xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_UNKNOWN)) { xfs_warn(mp, "Superblock has unknown incompatible features (0x%x) enabled.", (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_UNKNOWN)); xfs_warn(mp, "Filesystem cannot be safely mounted by this kernel."); return -EINVAL; } return 0; } /* Return the number of extents covered by a single rt bitmap file */ static xfs_rtbxlen_t xfs_extents_per_rbm( struct xfs_sb *sbp) { if (xfs_sb_is_v5(sbp) && (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) return sbp->sb_rgextents; return sbp->sb_rextents; } /* * Return the payload size of a single rt bitmap block (without the metadata * header if any). */ static inline unsigned int xfs_rtbmblock_size( struct xfs_sb *sbp) { if (xfs_sb_is_v5(sbp) && (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) return sbp->sb_blocksize - sizeof(struct xfs_rtbuf_blkinfo); return sbp->sb_blocksize; } static uint64_t xfs_expected_rbmblocks( struct xfs_sb *sbp) { if (xfs_sb_is_v5(sbp) && (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_ZONED)) return 0; return howmany_64(xfs_extents_per_rbm(sbp), NBBY * xfs_rtbmblock_size(sbp)); } /* Validate the realtime geometry */ bool xfs_validate_rt_geometry( struct xfs_sb *sbp) { if (xfs_sb_is_v5(sbp) && (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_ZONED)) { if (sbp->sb_rextsize != 1) return false; } else { if (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE || sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) return false; } if (sbp->sb_rblocks == 0) { if (sbp->sb_rextents != 0 || sbp->sb_rbmblocks != 0 || sbp->sb_rextslog != 0 || sbp->sb_frextents != 0) return false; return true; } if (sbp->sb_rextents == 0 || sbp->sb_rextents != div_u64(sbp->sb_rblocks, sbp->sb_rextsize) || sbp->sb_rextslog != xfs_compute_rextslog(sbp->sb_rextents) || sbp->sb_rbmblocks != xfs_expected_rbmblocks(sbp)) return false; if (xfs_sb_is_v5(sbp) && (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_ZONED)) { uint32_t mod; /* * Zoned RT devices must be aligned to the RT group size, * because garbage collection assumes that all zones have the * same size to avoid insane complexity if that weren't the * case. */ div_u64_rem(sbp->sb_rextents, sbp->sb_rgextents, &mod); if (mod) return false; } return true; } /* Check all the superblock fields we care about when writing one out. */ STATIC int xfs_validate_sb_write( struct xfs_mount *mp, struct xfs_buf *bp, struct xfs_sb *sbp) { /* * Carry out additional sb summary counter sanity checks when we write * the superblock. We skip this in the read validator because there * could be newer superblocks in the log and if the values are garbage * even after replay we'll recalculate them at the end of log mount. * * mkfs has traditionally written zeroed counters to inprogress and * secondary superblocks, so allow this usage to continue because * we never read counters from such superblocks. */ if (xfs_buf_daddr(bp) == XFS_SB_DADDR && !sbp->sb_inprogress && (sbp->sb_fdblocks > sbp->sb_dblocks || !xfs_verify_icount(mp, sbp->sb_icount) || sbp->sb_ifree > sbp->sb_icount)) { xfs_warn(mp, "SB summary counter sanity check failed"); return -EFSCORRUPTED; } if (!xfs_sb_is_v5(sbp)) return 0; /* * Version 5 superblock feature mask validation. Reject combinations * the kernel cannot support since we checked for unsupported bits in * the read verifier, which means that memory is corrupt. */ if (!xfs_is_readonly(mp) && xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { xfs_alert(mp, "Corruption detected in superblock read-only compatible features (0x%x)!", (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); return -EFSCORRUPTED; } if (xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_UNKNOWN)) { xfs_warn(mp, "Corruption detected in superblock incompatible features (0x%x)!", (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_UNKNOWN)); return -EFSCORRUPTED; } if (xfs_sb_has_incompat_log_feature(sbp, XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)) { xfs_warn(mp, "Corruption detected in superblock incompatible log features (0x%x)!", (sbp->sb_features_log_incompat & XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)); return -EFSCORRUPTED; } /* * We can't read verify the sb LSN because the read verifier is called * before the log is allocated and processed. We know the log is set up * before write verifier calls, so check it here. */ if (!xfs_log_check_lsn(mp, sbp->sb_lsn)) return -EFSCORRUPTED; return 0; } int xfs_compute_rgblklog( xfs_rtxlen_t rgextents, xfs_rgblock_t rextsize) { uint64_t rgblocks = (uint64_t)rgextents * rextsize; return xfs_highbit64(rgblocks - 1) + 1; } static int xfs_validate_sb_rtgroups( struct xfs_mount *mp, struct xfs_sb *sbp) { uint64_t groups; int rgblklog; if (sbp->sb_rextsize == 0) { xfs_warn(mp, "Realtime extent size must not be zero."); return -EINVAL; } if (sbp->sb_rgextents > XFS_MAX_RGBLOCKS / sbp->sb_rextsize) { xfs_warn(mp, "Realtime group size (%u) must be less than %u rt extents.", sbp->sb_rgextents, XFS_MAX_RGBLOCKS / sbp->sb_rextsize); return -EINVAL; } if (sbp->sb_rgextents < XFS_MIN_RGEXTENTS) { xfs_warn(mp, "Realtime group size (%u) must be at least %u rt extents.", sbp->sb_rgextents, XFS_MIN_RGEXTENTS); return -EINVAL; } if (sbp->sb_rgcount > XFS_MAX_RGNUMBER) { xfs_warn(mp, "Realtime groups (%u) must be less than %u.", sbp->sb_rgcount, XFS_MAX_RGNUMBER); return -EINVAL; } groups = howmany_64(sbp->sb_rextents, sbp->sb_rgextents); if (groups != sbp->sb_rgcount) { xfs_warn(mp, "Realtime groups (%u) do not cover the entire rt section; need (%llu) groups.", sbp->sb_rgcount, groups); return -EINVAL; } /* Exchange-range is required for fsr to work on realtime files */ if (!(sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_EXCHRANGE)) { xfs_warn(mp, "Realtime groups feature requires exchange-range support."); return -EINVAL; } rgblklog = xfs_compute_rgblklog(sbp->sb_rgextents, sbp->sb_rextsize); if (sbp->sb_rgblklog != rgblklog) { xfs_warn(mp, "Realtime group log (%d) does not match expected value (%d).", sbp->sb_rgblklog, rgblklog); return -EINVAL; } return 0; } static int xfs_validate_sb_zoned( struct xfs_mount *mp, struct xfs_sb *sbp) { if (sbp->sb_frextents != 0) { xfs_warn(mp, "sb_frextents must be zero for zoned file systems."); return -EINVAL; } if (sbp->sb_rtstart && sbp->sb_rtstart < sbp->sb_dblocks) { xfs_warn(mp, "sb_rtstart (%lld) overlaps sb_dblocks (%lld).", sbp->sb_rtstart, sbp->sb_dblocks); return -EINVAL; } if (sbp->sb_rtreserved && sbp->sb_rtreserved >= sbp->sb_rblocks) { xfs_warn(mp, "sb_rtreserved (%lld) larger than sb_rblocks (%lld).", sbp->sb_rtreserved, sbp->sb_rblocks); return -EINVAL; } return 0; } /* Check the validity of the SB. */ STATIC int xfs_validate_sb_common( struct xfs_mount *mp, struct xfs_buf *bp, struct xfs_sb *sbp) { struct xfs_dsb *dsb = bp->b_addr; uint32_t agcount = 0; uint32_t rem; bool has_dalign; int error; if (!xfs_verify_magic(bp, dsb->sb_magicnum)) { xfs_warn(mp, "Superblock has bad magic number 0x%x. Not an XFS filesystem?", be32_to_cpu(dsb->sb_magicnum)); return -EWRONGFS; } if (!xfs_sb_good_version(sbp)) { xfs_warn(mp, "Superblock has unknown features enabled or corrupted feature masks."); return -EWRONGFS; } /* * Validate feature flags and state */ if (xfs_sb_is_v5(sbp)) { if (sbp->sb_blocksize < XFS_MIN_CRC_BLOCKSIZE) { xfs_notice(mp, "Block size (%u bytes) too small for Version 5 superblock (minimum %d bytes)", sbp->sb_blocksize, XFS_MIN_CRC_BLOCKSIZE); return -EFSCORRUPTED; } /* V5 has a separate project quota inode */ if (sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) { xfs_notice(mp, "Version 5 of Super block has XFS_OQUOTA bits."); return -EFSCORRUPTED; } /* * Full inode chunks must be aligned to inode chunk size when * sparse inodes are enabled to support the sparse chunk * allocation algorithm and prevent overlapping inode records. */ if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_SPINODES) { uint32_t align; align = XFS_INODES_PER_CHUNK * sbp->sb_inodesize >> sbp->sb_blocklog; if (sbp->sb_inoalignmt != align) { xfs_warn(mp, "Inode block alignment (%u) must match chunk size (%u) for sparse inodes.", sbp->sb_inoalignmt, align); return -EINVAL; } if (sbp->sb_spino_align && (sbp->sb_spino_align > sbp->sb_inoalignmt || (sbp->sb_inoalignmt % sbp->sb_spino_align) != 0)) { xfs_warn(mp, "Sparse inode alignment (%u) is invalid, must be integer factor of (%u).", sbp->sb_spino_align, sbp->sb_inoalignmt); return -EINVAL; } } else if (sbp->sb_spino_align) { xfs_warn(mp, "Sparse inode alignment (%u) should be zero.", sbp->sb_spino_align); return -EINVAL; } if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) { if (memchr_inv(sbp->sb_pad, 0, sizeof(sbp->sb_pad))) { xfs_warn(mp, "Metadir superblock padding fields must be zero."); return -EINVAL; } error = xfs_validate_sb_rtgroups(mp, sbp); if (error) return error; } if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_ZONED) { error = xfs_validate_sb_zoned(mp, sbp); if (error) return error; } } else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD | XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) { xfs_notice(mp, "Superblock earlier than Version 5 has XFS_{P|G}QUOTA_{ENFD|CHKD} bits."); return -EFSCORRUPTED; } if (unlikely( sbp->sb_logstart == 0 && mp->m_logdev_targp == mp->m_ddev_targp)) { xfs_warn(mp, "filesystem is marked as having an external log; " "specify logdev on the mount command line."); return -EINVAL; } if (unlikely( sbp->sb_logstart != 0 && mp->m_logdev_targp != mp->m_ddev_targp)) { xfs_warn(mp, "filesystem is marked as having an internal log; " "do not specify logdev on the mount command line."); return -EINVAL; } /* Compute agcount for this number of dblocks and agblocks */ if (sbp->sb_agblocks) { agcount = div_u64_rem(sbp->sb_dblocks, sbp->sb_agblocks, &rem); if (rem) agcount++; } /* * More sanity checking. Most of these were stolen directly from * xfs_repair. */ if (unlikely( sbp->sb_agcount <= 0 || sbp->sb_sectsize < XFS_MIN_SECTORSIZE || sbp->sb_sectsize > XFS_MAX_SECTORSIZE || sbp->sb_sectlog < XFS_MIN_SECTORSIZE_LOG || sbp->sb_sectlog > XFS_MAX_SECTORSIZE_LOG || sbp->sb_sectsize != (1 << sbp->sb_sectlog) || sbp->sb_blocksize < XFS_MIN_BLOCKSIZE || sbp->sb_blocksize > XFS_MAX_BLOCKSIZE || sbp->sb_blocklog < XFS_MIN_BLOCKSIZE_LOG || sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || sbp->sb_blocksize != (1 << sbp->sb_blocklog) || sbp->sb_dirblklog + sbp->sb_blocklog > XFS_MAX_BLOCKSIZE_LOG || sbp->sb_inodesize < XFS_DINODE_MIN_SIZE || sbp->sb_inodesize > XFS_DINODE_MAX_SIZE || sbp->sb_inodelog < XFS_DINODE_MIN_LOG || sbp->sb_inodelog > XFS_DINODE_MAX_LOG || sbp->sb_inodesize != (1 << sbp->sb_inodelog) || sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || XFS_FSB_TO_B(mp, sbp->sb_agblocks) < XFS_MIN_AG_BYTES || XFS_FSB_TO_B(mp, sbp->sb_agblocks) > XFS_MAX_AG_BYTES || sbp->sb_agblklog != xfs_highbit32(sbp->sb_agblocks - 1) + 1 || agcount == 0 || agcount != sbp->sb_agcount || (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */) || sbp->sb_dblocks == 0 || sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp) || sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp) || sbp->sb_shared_vn != 0)) { xfs_notice(mp, "SB sanity check failed"); return -EFSCORRUPTED; } /* * Logs that are too large are not supported at all. Reject them * outright. Logs that are too small are tolerated on v4 filesystems, * but we can only check that when mounting the log. Hence we skip * those checks here. */ if (sbp->sb_logblocks > XFS_MAX_LOG_BLOCKS) { xfs_notice(mp, "Log size 0x%x blocks too large, maximum size is 0x%llx blocks", sbp->sb_logblocks, XFS_MAX_LOG_BLOCKS); return -EFSCORRUPTED; } if (XFS_FSB_TO_B(mp, sbp->sb_logblocks) > XFS_MAX_LOG_BYTES) { xfs_warn(mp, "log size 0x%llx bytes too large, maximum size is 0x%llx bytes", XFS_FSB_TO_B(mp, sbp->sb_logblocks), XFS_MAX_LOG_BYTES); return -EFSCORRUPTED; } /* * Do not allow filesystems with corrupted log sector or stripe units to * be mounted. We cannot safely size the iclogs or write to the log if * the log stripe unit is not valid. */ if (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT) { if (sbp->sb_logsectsize != (1U << sbp->sb_logsectlog)) { xfs_notice(mp, "log sector size in bytes/log2 (0x%x/0x%x) must match", sbp->sb_logsectsize, 1U << sbp->sb_logsectlog); return -EFSCORRUPTED; } } else if (sbp->sb_logsectsize || sbp->sb_logsectlog) { xfs_notice(mp, "log sector size in bytes/log2 (0x%x/0x%x) are not zero", sbp->sb_logsectsize, sbp->sb_logsectlog); return -EFSCORRUPTED; } if (sbp->sb_logsunit > 1) { if (sbp->sb_logsunit % sbp->sb_blocksize) { xfs_notice(mp, "log stripe unit 0x%x bytes must be a multiple of block size", sbp->sb_logsunit); return -EFSCORRUPTED; } if (sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE) { xfs_notice(mp, "log stripe unit 0x%x bytes over maximum size (0x%x bytes)", sbp->sb_logsunit, XLOG_MAX_RECORD_BSIZE); return -EFSCORRUPTED; } } if (!xfs_validate_rt_geometry(sbp)) { xfs_notice(mp, "realtime %sgeometry check failed", sbp->sb_rblocks ? "" : "zeroed "); return -EFSCORRUPTED; } /* * Either (sb_unit and !hasdalign) or (!sb_unit and hasdalign) * would imply the image is corrupted. */ has_dalign = sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT; if (!!sbp->sb_unit ^ has_dalign) { xfs_notice(mp, "SB stripe alignment sanity check failed"); return -EFSCORRUPTED; } if (!xfs_validate_stripe_geometry(mp, XFS_FSB_TO_B(mp, sbp->sb_unit), XFS_FSB_TO_B(mp, sbp->sb_width), 0, xfs_buf_daddr(bp) == XFS_SB_DADDR, false)) return -EFSCORRUPTED; /* * Currently only very few inode sizes are supported. */ switch (sbp->sb_inodesize) { case 256: case 512: case 1024: case 2048: break; default: xfs_warn(mp, "inode size of %d bytes not supported", sbp->sb_inodesize); return -ENOSYS; } return 0; } void xfs_sb_quota_from_disk(struct xfs_sb *sbp) { if (xfs_sb_is_v5(sbp) && (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) { sbp->sb_uquotino = NULLFSINO; sbp->sb_gquotino = NULLFSINO; sbp->sb_pquotino = NULLFSINO; return; } /* * older mkfs doesn't initialize quota inodes to NULLFSINO. This * leads to in-core values having two different values for a quota * inode to be invalid: 0 and NULLFSINO. Change it to a single value * NULLFSINO. * * Note that this change affect only the in-core values. These * values are not written back to disk unless any quota information * is written to the disk. Even in that case, sb_pquotino field is * not written to disk unless the superblock supports pquotino. */ if (sbp->sb_uquotino == 0) sbp->sb_uquotino = NULLFSINO; if (sbp->sb_gquotino == 0) sbp->sb_gquotino = NULLFSINO; if (sbp->sb_pquotino == 0) sbp->sb_pquotino = NULLFSINO; /* * We need to do these manipilations only if we are working * with an older version of on-disk superblock. */ if (xfs_sb_is_v5(sbp)) return; if (sbp->sb_qflags & XFS_OQUOTA_ENFD) sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ? XFS_PQUOTA_ENFD : XFS_GQUOTA_ENFD; if (sbp->sb_qflags & XFS_OQUOTA_CHKD) sbp->sb_qflags |= (sbp->sb_qflags & XFS_PQUOTA_ACCT) ? XFS_PQUOTA_CHKD : XFS_GQUOTA_CHKD; sbp->sb_qflags &= ~(XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD); if (sbp->sb_qflags & XFS_PQUOTA_ACCT && sbp->sb_gquotino != NULLFSINO) { /* * In older version of superblock, on-disk superblock only * has sb_gquotino, and in-core superblock has both sb_gquotino * and sb_pquotino. But, only one of them is supported at any * point of time. So, if PQUOTA is set in disk superblock, * copy over sb_gquotino to sb_pquotino. The NULLFSINO test * above is to make sure we don't do this twice and wipe them * both out! */ sbp->sb_pquotino = sbp->sb_gquotino; sbp->sb_gquotino = NULLFSINO; } } static void __xfs_sb_from_disk( struct xfs_sb *to, struct xfs_dsb *from, bool convert_xquota) { to->sb_magicnum = be32_to_cpu(from->sb_magicnum); to->sb_blocksize = be32_to_cpu(from->sb_blocksize); to->sb_dblocks = be64_to_cpu(from->sb_dblocks); to->sb_rblocks = be64_to_cpu(from->sb_rblocks); to->sb_rextents = be64_to_cpu(from->sb_rextents); memcpy(&to->sb_uuid, &from->sb_uuid, sizeof(to->sb_uuid)); to->sb_logstart = be64_to_cpu(from->sb_logstart); to->sb_rootino = be64_to_cpu(from->sb_rootino); to->sb_rbmino = be64_to_cpu(from->sb_rbmino); to->sb_rsumino = be64_to_cpu(from->sb_rsumino); to->sb_rextsize = be32_to_cpu(from->sb_rextsize); to->sb_agblocks = be32_to_cpu(from->sb_agblocks); to->sb_agcount = be32_to_cpu(from->sb_agcount); to->sb_rbmblocks = be32_to_cpu(from->sb_rbmblocks); to->sb_logblocks = be32_to_cpu(from->sb_logblocks); to->sb_versionnum = be16_to_cpu(from->sb_versionnum); to->sb_sectsize = be16_to_cpu(from->sb_sectsize); to->sb_inodesize = be16_to_cpu(from->sb_inodesize); to->sb_inopblock = be16_to_cpu(from->sb_inopblock); memcpy(&to->sb_fname, &from->sb_fname, sizeof(to->sb_fname)); to->sb_blocklog = from->sb_blocklog; to->sb_sectlog = from->sb_sectlog; to->sb_inodelog = from->sb_inodelog; to->sb_inopblog = from->sb_inopblog; to->sb_agblklog = from->sb_agblklog; to->sb_rextslog = from->sb_rextslog; to->sb_inprogress = from->sb_inprogress; to->sb_imax_pct = from->sb_imax_pct; to->sb_icount = be64_to_cpu(from->sb_icount); to->sb_ifree = be64_to_cpu(from->sb_ifree); to->sb_fdblocks = be64_to_cpu(from->sb_fdblocks); to->sb_frextents = be64_to_cpu(from->sb_frextents); to->sb_uquotino = be64_to_cpu(from->sb_uquotino); to->sb_gquotino = be64_to_cpu(from->sb_gquotino); to->sb_qflags = be16_to_cpu(from->sb_qflags); to->sb_flags = from->sb_flags; to->sb_shared_vn = from->sb_shared_vn; to->sb_inoalignmt = be32_to_cpu(from->sb_inoalignmt); to->sb_unit = be32_to_cpu(from->sb_unit); to->sb_width = be32_to_cpu(from->sb_width); to->sb_dirblklog = from->sb_dirblklog; to->sb_logsectlog = from->sb_logsectlog; to->sb_logsectsize = be16_to_cpu(from->sb_logsectsize); to->sb_logsunit = be32_to_cpu(from->sb_logsunit); to->sb_features2 = be32_to_cpu(from->sb_features2); to->sb_bad_features2 = be32_to_cpu(from->sb_bad_features2); to->sb_features_compat = be32_to_cpu(from->sb_features_compat); to->sb_features_ro_compat = be32_to_cpu(from->sb_features_ro_compat); to->sb_features_incompat = be32_to_cpu(from->sb_features_incompat); to->sb_features_log_incompat = be32_to_cpu(from->sb_features_log_incompat); /* crc is only used on disk, not in memory; just init to 0 here. */ to->sb_crc = 0; to->sb_spino_align = be32_to_cpu(from->sb_spino_align); to->sb_pquotino = be64_to_cpu(from->sb_pquotino); to->sb_lsn = be64_to_cpu(from->sb_lsn); /* * sb_meta_uuid is only on disk if it differs from sb_uuid and the * feature flag is set; if not set we keep it only in memory. */ if (xfs_sb_is_v5(to) && (to->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID)) uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid); else uuid_copy(&to->sb_meta_uuid, &from->sb_uuid); /* Convert on-disk flags to in-memory flags? */ if (convert_xquota) xfs_sb_quota_from_disk(to); if (to->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) { to->sb_metadirino = be64_to_cpu(from->sb_metadirino); to->sb_rgblklog = from->sb_rgblklog; memcpy(to->sb_pad, from->sb_pad, sizeof(to->sb_pad)); to->sb_rgcount = be32_to_cpu(from->sb_rgcount); to->sb_rgextents = be32_to_cpu(from->sb_rgextents); to->sb_rbmino = NULLFSINO; to->sb_rsumino = NULLFSINO; } else { to->sb_metadirino = NULLFSINO; to->sb_rgcount = 1; to->sb_rgextents = 0; } if (to->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_ZONED) { to->sb_rtstart = be64_to_cpu(from->sb_rtstart); to->sb_rtreserved = be64_to_cpu(from->sb_rtreserved); } else { to->sb_rtstart = 0; to->sb_rtreserved = 0; } } void xfs_sb_from_disk( struct xfs_sb *to, struct xfs_dsb *from) { __xfs_sb_from_disk(to, from, true); } static void xfs_sb_quota_to_disk( struct xfs_dsb *to, struct xfs_sb *from) { uint16_t qflags = from->sb_qflags; if (xfs_sb_is_v5(from) && (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) { to->sb_qflags = cpu_to_be16(from->sb_qflags); to->sb_uquotino = cpu_to_be64(0); to->sb_gquotino = cpu_to_be64(0); to->sb_pquotino = cpu_to_be64(0); return; } to->sb_uquotino = cpu_to_be64(from->sb_uquotino); /* * The in-memory superblock quota state matches the v5 on-disk format so * just write them out and return */ if (xfs_sb_is_v5(from)) { to->sb_qflags = cpu_to_be16(from->sb_qflags); to->sb_gquotino = cpu_to_be64(from->sb_gquotino); to->sb_pquotino = cpu_to_be64(from->sb_pquotino); return; } /* * For older superblocks (v4), the in-core version of sb_qflags do not * have XFS_OQUOTA_* flags, whereas the on-disk version does. So, * convert incore XFS_{PG}QUOTA_* flags to on-disk XFS_OQUOTA_* flags. */ qflags &= ~(XFS_PQUOTA_ENFD | XFS_PQUOTA_CHKD | XFS_GQUOTA_ENFD | XFS_GQUOTA_CHKD); if (from->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD)) qflags |= XFS_OQUOTA_ENFD; if (from->sb_qflags & (XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) qflags |= XFS_OQUOTA_CHKD; to->sb_qflags = cpu_to_be16(qflags); /* * GQUOTINO and PQUOTINO cannot be used together in versions * of superblock that do not have pquotino. from->sb_flags * tells us which quota is active and should be copied to * disk. If neither are active, we should NULL the inode. * * In all cases, the separate pquotino must remain 0 because it * is beyond the "end" of the valid non-pquotino superblock. */ if (from->sb_qflags & XFS_GQUOTA_ACCT) to->sb_gquotino = cpu_to_be64(from->sb_gquotino); else if (from->sb_qflags & XFS_PQUOTA_ACCT) to->sb_gquotino = cpu_to_be64(from->sb_pquotino); else { /* * We can't rely on just the fields being logged to tell us * that it is safe to write NULLFSINO - we should only do that * if quotas are not actually enabled. Hence only write * NULLFSINO if both in-core quota inodes are NULL. */ if (from->sb_gquotino == NULLFSINO && from->sb_pquotino == NULLFSINO) to->sb_gquotino = cpu_to_be64(NULLFSINO); } to->sb_pquotino = 0; } void xfs_sb_to_disk( struct xfs_dsb *to, struct xfs_sb *from) { xfs_sb_quota_to_disk(to, from); to->sb_magicnum = cpu_to_be32(from->sb_magicnum); to->sb_blocksize = cpu_to_be32(from->sb_blocksize); to->sb_dblocks = cpu_to_be64(from->sb_dblocks); to->sb_rblocks = cpu_to_be64(from->sb_rblocks); to->sb_rextents = cpu_to_be64(from->sb_rextents); memcpy(&to->sb_uuid, &from->sb_uuid, sizeof(to->sb_uuid)); to->sb_logstart = cpu_to_be64(from->sb_logstart); to->sb_rootino = cpu_to_be64(from->sb_rootino); to->sb_rbmino = cpu_to_be64(from->sb_rbmino); to->sb_rsumino = cpu_to_be64(from->sb_rsumino); to->sb_rextsize = cpu_to_be32(from->sb_rextsize); to->sb_agblocks = cpu_to_be32(from->sb_agblocks); to->sb_agcount = cpu_to_be32(from->sb_agcount); to->sb_rbmblocks = cpu_to_be32(from->sb_rbmblocks); to->sb_logblocks = cpu_to_be32(from->sb_logblocks); to->sb_versionnum = cpu_to_be16(from->sb_versionnum); to->sb_sectsize = cpu_to_be16(from->sb_sectsize); to->sb_inodesize = cpu_to_be16(from->sb_inodesize); to->sb_inopblock = cpu_to_be16(from->sb_inopblock); memcpy(&to->sb_fname, &from->sb_fname, sizeof(to->sb_fname)); to->sb_blocklog = from->sb_blocklog; to->sb_sectlog = from->sb_sectlog; to->sb_inodelog = from->sb_inodelog; to->sb_inopblog = from->sb_inopblog; to->sb_agblklog = from->sb_agblklog; to->sb_rextslog = from->sb_rextslog; to->sb_inprogress = from->sb_inprogress; to->sb_imax_pct = from->sb_imax_pct; to->sb_icount = cpu_to_be64(from->sb_icount); to->sb_ifree = cpu_to_be64(from->sb_ifree); to->sb_fdblocks = cpu_to_be64(from->sb_fdblocks); to->sb_frextents = cpu_to_be64(from->sb_frextents); to->sb_flags = from->sb_flags; to->sb_shared_vn = from->sb_shared_vn; to->sb_inoalignmt = cpu_to_be32(from->sb_inoalignmt); to->sb_unit = cpu_to_be32(from->sb_unit); to->sb_width = cpu_to_be32(from->sb_width); to->sb_dirblklog = from->sb_dirblklog; to->sb_logsectlog = from->sb_logsectlog; to->sb_logsectsize = cpu_to_be16(from->sb_logsectsize); to->sb_logsunit = cpu_to_be32(from->sb_logsunit); /* * We need to ensure that bad_features2 always matches features2. * Hence we enforce that here rather than having to remember to do it * everywhere else that updates features2. */ from->sb_bad_features2 = from->sb_features2; to->sb_features2 = cpu_to_be32(from->sb_features2); to->sb_bad_features2 = cpu_to_be32(from->sb_bad_features2); if (!xfs_sb_is_v5(from)) return; to->sb_features_compat = cpu_to_be32(from->sb_features_compat); to->sb_features_ro_compat = cpu_to_be32(from->sb_features_ro_compat); to->sb_features_incompat = cpu_to_be32(from->sb_features_incompat); to->sb_features_log_incompat = cpu_to_be32(from->sb_features_log_incompat); to->sb_spino_align = cpu_to_be32(from->sb_spino_align); to->sb_lsn = cpu_to_be64(from->sb_lsn); if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID) uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid); if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) { to->sb_metadirino = cpu_to_be64(from->sb_metadirino); to->sb_rgblklog = from->sb_rgblklog; memset(to->sb_pad, 0, sizeof(to->sb_pad)); to->sb_rgcount = cpu_to_be32(from->sb_rgcount); to->sb_rgextents = cpu_to_be32(from->sb_rgextents); to->sb_rbmino = cpu_to_be64(0); to->sb_rsumino = cpu_to_be64(0); } if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_ZONED) { to->sb_rtstart = cpu_to_be64(from->sb_rtstart); to->sb_rtreserved = cpu_to_be64(from->sb_rtreserved); } } /* * If the superblock has the CRC feature bit set or the CRC field is non-null, * check that the CRC is valid. We check the CRC field is non-null because a * single bit error could clear the feature bit and unused parts of the * superblock are supposed to be zero. Hence a non-null crc field indicates that * we've potentially lost a feature bit and we should check it anyway. * * However, past bugs (i.e. in growfs) left non-zeroed regions beyond the * last field in V4 secondary superblocks. So for secondary superblocks, * we are more forgiving, and ignore CRC failures if the primary doesn't * indicate that the fs version is V5. */ static void xfs_sb_read_verify( struct xfs_buf *bp) { struct xfs_sb sb; struct xfs_mount *mp = bp->b_mount; struct xfs_dsb *dsb = bp->b_addr; int error; /* * open code the version check to avoid needing to convert the entire * superblock from disk order just to check the version number */ if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC) && (((be16_to_cpu(dsb->sb_versionnum) & XFS_SB_VERSION_NUMBITS) == XFS_SB_VERSION_5) || dsb->sb_crc != 0)) { if (!xfs_buf_verify_cksum(bp, XFS_SB_CRC_OFF)) { /* Only fail bad secondaries on a known V5 filesystem */ if (xfs_buf_daddr(bp) == XFS_SB_DADDR || xfs_has_crc(mp)) { error = -EFSBADCRC; goto out_error; } } } /* * Check all the superblock fields. Don't byteswap the xquota flags * because _verify_common checks the on-disk values. */ __xfs_sb_from_disk(&sb, dsb, false); error = xfs_validate_sb_common(mp, bp, &sb); if (error) goto out_error; error = xfs_validate_sb_read(mp, &sb); out_error: if (error == -EFSCORRUPTED || error == -EFSBADCRC) xfs_verifier_error(bp, error, __this_address); else if (error) xfs_buf_ioerror(bp, error); } /* * We may be probed for a filesystem match, so we may not want to emit * messages when the superblock buffer is not actually an XFS superblock. * If we find an XFS superblock, then run a normal, noisy mount because we are * really going to mount it and want to know about errors. */ static void xfs_sb_quiet_read_verify( struct xfs_buf *bp) { struct xfs_dsb *dsb = bp->b_addr; if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) { /* XFS filesystem, verify noisily! */ xfs_sb_read_verify(bp); return; } /* quietly fail */ xfs_buf_ioerror(bp, -EWRONGFS); } static void xfs_sb_write_verify( struct xfs_buf *bp) { struct xfs_sb sb; struct xfs_mount *mp = bp->b_mount; struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_dsb *dsb = bp->b_addr; int error; /* * Check all the superblock fields. Don't byteswap the xquota flags * because _verify_common checks the on-disk values. */ __xfs_sb_from_disk(&sb, dsb, false); error = xfs_validate_sb_common(mp, bp, &sb); if (error) goto out_error; error = xfs_validate_sb_write(mp, bp, &sb); if (error) goto out_error; if (!xfs_sb_is_v5(&sb)) return; if (bip) dsb->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn); xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF); return; out_error: xfs_verifier_error(bp, error, __this_address); } const struct xfs_buf_ops xfs_sb_buf_ops = { .name = "xfs_sb", .magic = { cpu_to_be32(XFS_SB_MAGIC), cpu_to_be32(XFS_SB_MAGIC) }, .verify_read = xfs_sb_read_verify, .verify_write = xfs_sb_write_verify, }; const struct xfs_buf_ops xfs_sb_quiet_buf_ops = { .name = "xfs_sb_quiet", .magic = { cpu_to_be32(XFS_SB_MAGIC), cpu_to_be32(XFS_SB_MAGIC) }, .verify_read = xfs_sb_quiet_read_verify, .verify_write = xfs_sb_write_verify, }; /* Compute cached rt geometry from the incore sb. */ void xfs_sb_mount_rextsize( struct xfs_mount *mp, struct xfs_sb *sbp) { struct xfs_groups *rgs = &mp->m_groups[XG_TYPE_RTG]; mp->m_rtxblklog = log2_if_power2(sbp->sb_rextsize); mp->m_rtxblkmask = mask64_if_power2(sbp->sb_rextsize); if (xfs_sb_is_v5(sbp) && (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) { rgs->blocks = sbp->sb_rgextents * sbp->sb_rextsize; rgs->blklog = mp->m_sb.sb_rgblklog; rgs->blkmask = xfs_mask32lo(mp->m_sb.sb_rgblklog); rgs->start_fsb = mp->m_sb.sb_rtstart; if (xfs_sb_has_incompat_feature(sbp, XFS_SB_FEAT_INCOMPAT_ZONE_GAPS)) rgs->has_daddr_gaps = true; } else { rgs->blocks = 0; rgs->blklog = 0; rgs->blkmask = (uint64_t)-1; } } /* Update incore sb rt extent size, then recompute the cached rt geometry. */ void xfs_mount_sb_set_rextsize( struct xfs_mount *mp, struct xfs_sb *sbp, xfs_agblock_t rextsize) { sbp->sb_rextsize = rextsize; if (xfs_sb_is_v5(sbp) && (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) sbp->sb_rgblklog = xfs_compute_rgblklog(sbp->sb_rgextents, rextsize); xfs_sb_mount_rextsize(mp, sbp); } /* * xfs_mount_common * * Mount initialization code establishing various mount * fields from the superblock associated with the given * mount structure. * * Inode geometry are calculated in xfs_ialloc_setup_geometry. */ void xfs_sb_mount_common( struct xfs_mount *mp, struct xfs_sb *sbp) { struct xfs_groups *ags = &mp->m_groups[XG_TYPE_AG]; mp->m_agfrotor = 0; atomic_set(&mp->m_agirotor, 0); mp->m_maxagi = mp->m_sb.sb_agcount; mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG; mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; mp->m_blockmask = sbp->sb_blocksize - 1; mp->m_blockwsize = xfs_rtbmblock_size(sbp) >> XFS_WORDLOG; mp->m_rtx_per_rbmblock = mp->m_blockwsize << XFS_NBWORDLOG; ags->blocks = mp->m_sb.sb_agblocks; ags->blklog = mp->m_sb.sb_agblklog; ags->blkmask = xfs_mask32lo(mp->m_sb.sb_agblklog); xfs_sb_mount_rextsize(mp, sbp); mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, true); mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, false); mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2; mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2; mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, true); mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, false); mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2; mp->m_bmap_dmnr[1] = mp->m_bmap_dmxr[1] / 2; mp->m_rmap_mxr[0] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, true); mp->m_rmap_mxr[1] = xfs_rmapbt_maxrecs(mp, sbp->sb_blocksize, false); mp->m_rmap_mnr[0] = mp->m_rmap_mxr[0] / 2; mp->m_rmap_mnr[1] = mp->m_rmap_mxr[1] / 2; mp->m_rtrmap_mxr[0] = xfs_rtrmapbt_maxrecs(mp, sbp->sb_blocksize, true); mp->m_rtrmap_mxr[1] = xfs_rtrmapbt_maxrecs(mp, sbp->sb_blocksize, false); mp->m_rtrmap_mnr[0] = mp->m_rtrmap_mxr[0] / 2; mp->m_rtrmap_mnr[1] = mp->m_rtrmap_mxr[1] / 2; mp->m_refc_mxr[0] = xfs_refcountbt_maxrecs(mp, sbp->sb_blocksize, true); mp->m_refc_mxr[1] = xfs_refcountbt_maxrecs(mp, sbp->sb_blocksize, false); mp->m_refc_mnr[0] = mp->m_refc_mxr[0] / 2; mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2; mp->m_rtrefc_mxr[0] = xfs_rtrefcountbt_maxrecs(mp, sbp->sb_blocksize, true); mp->m_rtrefc_mxr[1] = xfs_rtrefcountbt_maxrecs(mp, sbp->sb_blocksize, false); mp->m_rtrefc_mnr[0] = mp->m_rtrefc_mxr[0] / 2; mp->m_rtrefc_mnr[1] = mp->m_rtrefc_mxr[1] / 2; mp->m_bsize = XFS_FSB_TO_BB(mp, 1); mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); mp->m_ag_max_usable = xfs_alloc_ag_max_usable(mp); } /* * xfs_log_sb() can be used to copy arbitrary changes to the in-core superblock * into the superblock buffer to be logged. It does not provide the higher * level of locking that is needed to protect the in-core superblock from * concurrent access. */ void xfs_log_sb( struct xfs_trans *tp) { struct xfs_mount *mp = tp->t_mountp; struct xfs_buf *bp = xfs_trans_getsb(tp); /* * Lazy sb counters don't update the in-core superblock so do that now. * If this is at unmount, the counters will be exactly correct, but at * any other time they will only be ballpark correct because of * reservations that have been taken out percpu counters. If we have an * unclean shutdown, this will be corrected by log recovery rebuilding * the counters from the AGF block counts. */ if (xfs_has_lazysbcount(mp)) { mp->m_sb.sb_icount = percpu_counter_sum_positive(&mp->m_icount); mp->m_sb.sb_ifree = min_t(uint64_t, percpu_counter_sum_positive(&mp->m_ifree), mp->m_sb.sb_icount); mp->m_sb.sb_fdblocks = xfs_sum_freecounter(mp, XC_FREE_BLOCKS); } /* * sb_frextents was added to the lazy sb counters when the rt groups * feature was introduced. This counter can go negative due to the way * we handle nearly-lockless reservations, so we must use the _positive * variant here to avoid writing out nonsense frextents. */ if (xfs_has_rtgroups(mp) && !xfs_has_zoned(mp)) { mp->m_sb.sb_frextents = xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS); } xfs_sb_to_disk(bp->b_addr, &mp->m_sb); xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF); xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb) - 1); } /* * xfs_sync_sb * * Sync the superblock to disk. * * Note that the caller is responsible for checking the frozen state of the * filesystem. This procedure uses the non-blocking transaction allocator and * thus will allow modifications to a frozen fs. This is required because this * code can be called during the process of freezing where use of the high-level * allocator would deadlock. */ int xfs_sync_sb( struct xfs_mount *mp, bool wait) { struct xfs_trans *tp; int error; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_sb, 0, 0, XFS_TRANS_NO_WRITECOUNT, &tp); if (error) return error; xfs_log_sb(tp); if (wait) xfs_trans_set_sync(tp); return xfs_trans_commit(tp); } /* * Update all the secondary superblocks to match the new state of the primary. * Because we are completely overwriting all the existing fields in the * secondary superblock buffers, there is no need to read them in from disk. * Just get a new buffer, stamp it and write it. * * The sb buffers need to be cached here so that we serialise against other * operations that access the secondary superblocks, but we don't want to keep * them in memory once it is written so we mark it as a one-shot buffer. */ int xfs_update_secondary_sbs( struct xfs_mount *mp) { struct xfs_perag *pag = NULL; int saved_error = 0; int error = 0; LIST_HEAD (buffer_list); /* update secondary superblocks. */ while ((pag = xfs_perag_next_from(mp, pag, 1))) { struct xfs_buf *bp; error = xfs_buf_get(mp->m_ddev_targp, XFS_AG_DADDR(mp, pag_agno(pag), XFS_SB_DADDR), XFS_FSS_TO_BB(mp, 1), &bp); /* * If we get an error reading or writing alternate superblocks, * continue. xfs_repair chooses the "best" superblock based * on most matches; if we break early, we'll leave more * superblocks un-updated than updated, and xfs_repair may * pick them over the properly-updated primary. */ if (error) { xfs_warn(mp, "error allocating secondary superblock for ag %d", pag_agno(pag)); if (!saved_error) saved_error = error; continue; } bp->b_ops = &xfs_sb_buf_ops; xfs_buf_oneshot(bp); xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); xfs_sb_to_disk(bp->b_addr, &mp->m_sb); xfs_buf_delwri_queue(bp, &buffer_list); xfs_buf_relse(bp); /* don't hold too many buffers at once */ if (pag_agno(pag) % 16) continue; error = xfs_buf_delwri_submit(&buffer_list); if (error) { xfs_warn(mp, "write error %d updating a secondary superblock near ag %d", error, pag_agno(pag)); if (!saved_error) saved_error = error; continue; } } error = xfs_buf_delwri_submit(&buffer_list); if (error) xfs_warn(mp, "error %d writing secondary superblocks", error); return saved_error ? saved_error : error; } /* * Same behavior as xfs_sync_sb, except that it is always synchronous and it * also writes the superblock buffer to disk sector 0 immediately. */ int xfs_sync_sb_buf( struct xfs_mount *mp, bool update_rtsb) { struct xfs_trans *tp; struct xfs_buf *bp; struct xfs_buf *rtsb_bp = NULL; int error; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_sb, 0, 0, 0, &tp); if (error) return error; bp = xfs_trans_getsb(tp); xfs_log_sb(tp); xfs_trans_bhold(tp, bp); if (update_rtsb) { rtsb_bp = xfs_log_rtsb(tp, bp); if (rtsb_bp) xfs_trans_bhold(tp, rtsb_bp); } xfs_trans_set_sync(tp); error = xfs_trans_commit(tp); if (error) goto out; /* * write out the sb buffer to get the changes to disk */ error = xfs_bwrite(bp); if (!error && rtsb_bp) error = xfs_bwrite(rtsb_bp); out: if (rtsb_bp) xfs_buf_relse(rtsb_bp); xfs_buf_relse(bp); return error; } void xfs_fs_geometry( struct xfs_mount *mp, struct xfs_fsop_geom *geo, int struct_version) { struct xfs_sb *sbp = &mp->m_sb; memset(geo, 0, sizeof(struct xfs_fsop_geom)); geo->blocksize = sbp->sb_blocksize; geo->rtextsize = sbp->sb_rextsize; geo->agblocks = sbp->sb_agblocks; geo->agcount = sbp->sb_agcount; geo->logblocks = sbp->sb_logblocks; geo->sectsize = sbp->sb_sectsize; geo->inodesize = sbp->sb_inodesize; geo->imaxpct = sbp->sb_imax_pct; geo->datablocks = sbp->sb_dblocks; geo->rtblocks = sbp->sb_rblocks; geo->rtextents = sbp->sb_rextents; geo->logstart = sbp->sb_logstart; BUILD_BUG_ON(sizeof(geo->uuid) != sizeof(sbp->sb_uuid)); memcpy(geo->uuid, &sbp->sb_uuid, sizeof(sbp->sb_uuid)); if (struct_version < 2) return; geo->sunit = sbp->sb_unit; geo->swidth = sbp->sb_width; if (struct_version < 3) return; geo->version = XFS_FSOP_GEOM_VERSION; geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK | XFS_FSOP_GEOM_FLAGS_DIRV2 | XFS_FSOP_GEOM_FLAGS_EXTFLG | XFS_FSOP_GEOM_FLAGS_ATTR2; if (xfs_has_attr(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR; if (xfs_has_quota(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_QUOTA; if (xfs_has_align(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_IALIGN; if (xfs_has_dalign(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_DALIGN; if (xfs_has_asciici(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_DIRV2CI; if (xfs_has_lazysbcount(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_LAZYSB; if (xfs_has_projid32(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_PROJID32; if (xfs_has_crc(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_V5SB; if (xfs_has_ftype(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_FTYPE; if (xfs_has_finobt(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_FINOBT; if (xfs_has_sparseinodes(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_SPINODES; if (xfs_has_rmapbt(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_RMAPBT; if (xfs_has_reflink(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_REFLINK; if (xfs_has_bigtime(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_BIGTIME; if (xfs_has_inobtcounts(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_INOBTCNT; if (xfs_has_parent(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_PARENT; if (xfs_has_sector(mp)) { geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR; geo->logsectsize = sbp->sb_logsectsize; } else { geo->logsectsize = BBSIZE; } if (xfs_has_large_extent_counts(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_NREXT64; if (xfs_has_exchange_range(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_EXCHANGE_RANGE; if (xfs_has_metadir(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_METADIR; if (xfs_has_zoned(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_ZONED; geo->rtsectsize = sbp->sb_blocksize; geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp); if (struct_version < 4) return; if (xfs_has_logv2(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_LOGV2; geo->logsunit = sbp->sb_logsunit; if (struct_version < 5) return; geo->version = XFS_FSOP_GEOM_VERSION_V5; if (xfs_has_rtgroups(mp)) { geo->rgcount = sbp->sb_rgcount; geo->rgextents = sbp->sb_rgextents; } if (xfs_has_zoned(mp)) { geo->rtstart = sbp->sb_rtstart; geo->rtreserved = sbp->sb_rtreserved; } } /* Read a secondary superblock. */ int xfs_sb_read_secondary( struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, struct xfs_buf **bpp) { struct xfs_buf *bp; int error; ASSERT(agno != 0 && agno != NULLAGNUMBER); error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, agno, XFS_SB_BLOCK(mp)), XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops); if (xfs_metadata_is_sick(error)) xfs_agno_mark_sick(mp, agno, XFS_SICK_AG_SB); if (error) return error; xfs_buf_set_ref(bp, XFS_SSB_REF); *bpp = bp; return 0; } /* Get an uninitialised secondary superblock buffer. */ int xfs_sb_get_secondary( struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, struct xfs_buf **bpp) { struct xfs_buf *bp; int error; ASSERT(agno != 0 && agno != NULLAGNUMBER); error = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, agno, XFS_SB_BLOCK(mp)), XFS_FSS_TO_BB(mp, 1), 0, &bp); if (error) return error; bp->b_ops = &xfs_sb_buf_ops; xfs_buf_oneshot(bp); *bpp = bp; return 0; } /* * sunit, swidth, sectorsize(optional with 0) should be all in bytes, so users * won't be confused by values in error messages. This function returns false * if the stripe geometry is invalid and the caller is unable to repair the * stripe configuration later in the mount process. */ bool xfs_validate_stripe_geometry( struct xfs_mount *mp, __s64 sunit, __s64 swidth, int sectorsize, bool may_repair, bool silent) { if (swidth > INT_MAX) { if (!silent) xfs_notice(mp, "stripe width (%lld) is too large", swidth); goto check_override; } if (sunit > swidth) { if (!silent) xfs_notice(mp, "stripe unit (%lld) is larger than the stripe width (%lld)", sunit, swidth); goto check_override; } if (sectorsize && (int)sunit % sectorsize) { if (!silent) xfs_notice(mp, "stripe unit (%lld) must be a multiple of the sector size (%d)", sunit, sectorsize); goto check_override; } if (sunit && !swidth) { if (!silent) xfs_notice(mp, "invalid stripe unit (%lld) and stripe width of 0", sunit); goto check_override; } if (!sunit && swidth) { if (!silent) xfs_notice(mp, "invalid stripe width (%lld) and stripe unit of 0", swidth); goto check_override; } if (sunit && (int)swidth % (int)sunit) { if (!silent) xfs_notice(mp, "stripe width (%lld) must be a multiple of the stripe unit (%lld)", swidth, sunit); goto check_override; } return true; check_override: if (!may_repair) return false; /* * During mount, mp->m_dalign will not be set unless the sunit mount * option was set. If it was set, ignore the bad stripe alignment values * and allow the validation and overwrite later in the mount process to * attempt to overwrite the bad stripe alignment values with the values * supplied by mount options. */ if (!mp->m_dalign) return false; if (!silent) xfs_notice(mp, "Will try to correct with specified mount options sunit (%d) and swidth (%d)", BBTOB(mp->m_dalign), BBTOB(mp->m_swidth)); return true; } /* * Compute the maximum level number of the realtime summary file, as defined by * mkfs. The historic use of highbit32 on a 64-bit quantity prohibited correct * use of rt volumes with more than 2^32 extents. */ uint8_t xfs_compute_rextslog( xfs_rtbxlen_t rtextents) { if (!rtextents) return 0; return xfs_highbit64(rtextents); }
2154 2149 2136 2141 11 11 11 2137 2138 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 // SPDX-License-Identifier: GPL-2.0 /* * of.c The helpers for hcd device tree support * * Copyright (C) 2016 Freescale Semiconductor, Inc. * Author: Peter Chen <peter.chen@freescale.com> * Copyright (C) 2017 Johan Hovold <johan@kernel.org> */ #include <linux/of.h> #include <linux/of_graph.h> #include <linux/usb/of.h> /** * usb_of_get_device_node() - get a USB device node * @hub: hub to which device is connected * @port1: one-based index of port * * Look up the node of a USB device given its parent hub device and one-based * port number. * * Return: A pointer to the node with incremented refcount if found, or * %NULL otherwise. */ struct device_node *usb_of_get_device_node(struct usb_device *hub, int port1) { struct device_node *node; u32 reg; for_each_child_of_node(hub->dev.of_node, node) { if (of_property_read_u32(node, "reg", &reg)) continue; if (reg == port1) return node; } return NULL; } EXPORT_SYMBOL_GPL(usb_of_get_device_node); /** * usb_of_has_combined_node() - determine whether a device has a combined node * @udev: USB device * * Determine whether a USB device has a so called combined node which is * shared with its sole interface. This is the case if and only if the device * has a node and its descriptors report the following: * * 1) bDeviceClass is 0 or 9, and * 2) bNumConfigurations is 1, and * 3) bNumInterfaces is 1. * * Return: True iff the device has a device node and its descriptors match the * criteria for a combined node. */ bool usb_of_has_combined_node(struct usb_device *udev) { struct usb_device_descriptor *ddesc = &udev->descriptor; struct usb_config_descriptor *cdesc; if (!udev->dev.of_node) return false; switch (ddesc->bDeviceClass) { case USB_CLASS_PER_INTERFACE: case USB_CLASS_HUB: if (ddesc->bNumConfigurations == 1) { cdesc = &udev->config->desc; if (cdesc->bNumInterfaces == 1) return true; } } return false; } EXPORT_SYMBOL_GPL(usb_of_has_combined_node); static bool usb_of_has_devices_or_graph(const struct usb_device *hub) { const struct device_node *np = hub->dev.of_node; struct device_node *child; if (of_graph_is_present(np)) return true; for_each_child_of_node(np, child) { if (of_property_present(child, "reg")) { of_node_put(child); return true; } } return false; } /** * usb_of_get_connect_type() - get a USB hub's port connect_type * @hub: hub to which port is for @port1 * @port1: one-based index of port * * Get the connect_type of @port1 based on the device node for @hub. If the * port is described in the OF graph, the connect_type is "hotplug". If the * @hub has a child device has with a 'reg' property equal to @port1 the * connect_type is "hard-wired". If there isn't an OF graph or child node at * all then the connect_type is "unknown". Otherwise, the port is considered * "unused" because it isn't described at all. * * Return: A connect_type for @port1 based on the device node for @hub. */ enum usb_port_connect_type usb_of_get_connect_type(struct usb_device *hub, int port1) { struct device_node *np, *child, *ep, *remote_np; enum usb_port_connect_type connect_type; /* Only set connect_type if binding has ports/hardwired devices. */ if (!usb_of_has_devices_or_graph(hub)) return USB_PORT_CONNECT_TYPE_UNKNOWN; /* Assume port is unused if there's a graph or a child node. */ connect_type = USB_PORT_NOT_USED; np = hub->dev.of_node; /* * Hotplug ports are connected to an available remote node, e.g. * usb-a-connector compatible node, in the OF graph. */ if (of_graph_is_present(np)) { ep = of_graph_get_endpoint_by_regs(np, port1, -1); if (ep) { remote_np = of_graph_get_remote_port_parent(ep); of_node_put(ep); if (of_device_is_available(remote_np)) connect_type = USB_PORT_CONNECT_TYPE_HOT_PLUG; of_node_put(remote_np); } } /* * Hard-wired ports are child nodes with a reg property corresponding * to the port number, i.e. a usb device. */ child = usb_of_get_device_node(hub, port1); if (of_device_is_available(child)) connect_type = USB_PORT_CONNECT_TYPE_HARD_WIRED; of_node_put(child); return connect_type; } EXPORT_SYMBOL_GPL(usb_of_get_connect_type); /** * usb_of_get_interface_node() - get a USB interface node * @udev: USB device of interface * @config: configuration value * @ifnum: interface number * * Look up the node of a USB interface given its USB device, configuration * value and interface number. * * Return: A pointer to the node with incremented refcount if found, or * %NULL otherwise. */ struct device_node * usb_of_get_interface_node(struct usb_device *udev, u8 config, u8 ifnum) { struct device_node *node; u32 reg[2]; for_each_child_of_node(udev->dev.of_node, node) { if (of_property_read_u32_array(node, "reg", reg, 2)) continue; if (reg[0] == ifnum && reg[1] == config) return node; } return NULL; } EXPORT_SYMBOL_GPL(usb_of_get_interface_node);
3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 6 4 1 1 1 2 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 // SPDX-License-Identifier: GPL-2.0-or-later /* incoming call handling * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/net.h> #include <linux/skbuff.h> #include <linux/errqueue.h> #include <linux/udp.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/icmp.h> #include <linux/gfp.h> #include <linux/circ_buf.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include <net/ip.h> #include "ar-internal.h" static void rxrpc_dummy_notify(struct sock *sk, struct rxrpc_call *call, unsigned long user_call_ID) { } /* * Preallocate a single service call, connection and peer and, if possible, * give them a user ID and attach the user's side of the ID to them. */ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, struct rxrpc_backlog *b, rxrpc_notify_rx_t notify_rx, unsigned long user_call_ID, gfp_t gfp, unsigned int debug_id) { struct rxrpc_call *call, *xcall; struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); struct rb_node *parent, **pp; int max, tmp; unsigned int size = RXRPC_BACKLOG_MAX; unsigned int head, tail, call_head, call_tail; max = rx->sk.sk_max_ack_backlog; tmp = rx->sk.sk_ack_backlog; if (tmp >= max) { _leave(" = -ENOBUFS [full %u]", max); return -ENOBUFS; } max -= tmp; /* We don't need more conns and peers than we have calls, but on the * other hand, we shouldn't ever use more peers than conns or conns * than calls. */ call_head = b->call_backlog_head; call_tail = READ_ONCE(b->call_backlog_tail); tmp = CIRC_CNT(call_head, call_tail, size); if (tmp >= max) { _leave(" = -ENOBUFS [enough %u]", tmp); return -ENOBUFS; } max = tmp + 1; head = b->peer_backlog_head; tail = READ_ONCE(b->peer_backlog_tail); if (CIRC_CNT(head, tail, size) < max) { struct rxrpc_peer *peer; peer = rxrpc_alloc_peer(rx->local, gfp, rxrpc_peer_new_prealloc); if (!peer) return -ENOMEM; b->peer_backlog[head] = peer; smp_store_release(&b->peer_backlog_head, (head + 1) & (size - 1)); } head = b->conn_backlog_head; tail = READ_ONCE(b->conn_backlog_tail); if (CIRC_CNT(head, tail, size) < max) { struct rxrpc_connection *conn; conn = rxrpc_prealloc_service_connection(rxnet, gfp); if (!conn) return -ENOMEM; b->conn_backlog[head] = conn; smp_store_release(&b->conn_backlog_head, (head + 1) & (size - 1)); } /* Now it gets complicated, because calls get registered with the * socket here, with a user ID preassigned by the user. */ call = rxrpc_alloc_call(rx, gfp, debug_id); if (!call) return -ENOMEM; call->flags |= (1 << RXRPC_CALL_IS_SERVICE); rxrpc_set_call_state(call, RXRPC_CALL_SERVER_PREALLOC); __set_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events); trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), user_call_ID, rxrpc_call_new_prealloc_service); write_lock(&rx->call_lock); /* Check the user ID isn't already in use */ pp = &rx->calls.rb_node; parent = NULL; while (*pp) { parent = *pp; xcall = rb_entry(parent, struct rxrpc_call, sock_node); if (user_call_ID < xcall->user_call_ID) pp = &(*pp)->rb_left; else if (user_call_ID > xcall->user_call_ID) pp = &(*pp)->rb_right; else goto id_in_use; } call->user_call_ID = user_call_ID; call->notify_rx = notify_rx; if (rx->app_ops && rx->app_ops->user_attach_call) { rxrpc_get_call(call, rxrpc_call_get_kernel_service); rx->app_ops->user_attach_call(call, user_call_ID); } rxrpc_get_call(call, rxrpc_call_get_userid); rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); set_bit(RXRPC_CALL_HAS_USERID, &call->flags); list_add(&call->sock_link, &rx->sock_calls); write_unlock(&rx->call_lock); rxnet = call->rxnet; spin_lock(&rxnet->call_lock); list_add_tail_rcu(&call->link, &rxnet->calls); spin_unlock(&rxnet->call_lock); b->call_backlog[call_head] = call; smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1)); _leave(" = 0 [%d -> %lx]", call->debug_id, user_call_ID); return 0; id_in_use: write_unlock(&rx->call_lock); rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, -EBADSLT); rxrpc_cleanup_call(call); _leave(" = -EBADSLT"); return -EBADSLT; } /* * Allocate the preallocation buffers for incoming service calls. These must * be charged manually. */ int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp) { struct rxrpc_backlog *b = rx->backlog; if (!b) { b = kzalloc(sizeof(struct rxrpc_backlog), gfp); if (!b) return -ENOMEM; rx->backlog = b; } return 0; } /* * Discard the preallocation on a service. */ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) { struct rxrpc_backlog *b = rx->backlog; struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); unsigned int size = RXRPC_BACKLOG_MAX, head, tail; if (!b) return; rx->backlog = NULL; /* Make sure that there aren't any incoming calls in progress before we * clear the preallocation buffers. */ spin_lock_irq(&rx->incoming_lock); spin_unlock_irq(&rx->incoming_lock); head = b->peer_backlog_head; tail = b->peer_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { struct rxrpc_peer *peer = b->peer_backlog[tail]; rxrpc_put_local(peer->local, rxrpc_local_put_prealloc_peer); kfree(peer); tail = (tail + 1) & (size - 1); } head = b->conn_backlog_head; tail = b->conn_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { struct rxrpc_connection *conn = b->conn_backlog[tail]; write_lock(&rxnet->conn_lock); list_del(&conn->link); list_del(&conn->proc_link); write_unlock(&rxnet->conn_lock); kfree(conn); if (atomic_dec_and_test(&rxnet->nr_conns)) wake_up_var(&rxnet->nr_conns); tail = (tail + 1) & (size - 1); } head = b->call_backlog_head; tail = b->call_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { struct rxrpc_call *call = b->call_backlog[tail]; rxrpc_see_call(call, rxrpc_call_see_discard); rcu_assign_pointer(call->socket, rx); if (rx->app_ops && rx->app_ops->discard_new_call) { _debug("discard %lx", call->user_call_ID); rx->app_ops->discard_new_call(call, call->user_call_ID); if (call->notify_rx) call->notify_rx = rxrpc_dummy_notify; rxrpc_put_call(call, rxrpc_call_put_kernel); } rxrpc_call_completed(call); rxrpc_release_call(rx, call); rxrpc_put_call(call, rxrpc_call_put_discard_prealloc); tail = (tail + 1) & (size - 1); } kfree(b); } /* * Allocate a new incoming call from the prealloc pool, along with a connection * and a peer as necessary. */ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, struct rxrpc_local *local, struct rxrpc_peer *peer, struct rxrpc_connection *conn, const struct rxrpc_security *sec, struct sockaddr_rxrpc *peer_srx, struct sk_buff *skb) { struct rxrpc_backlog *b = rx->backlog; struct rxrpc_call *call; unsigned short call_head, conn_head, peer_head; unsigned short call_tail, conn_tail, peer_tail; unsigned short call_count, conn_count; if (!b) return NULL; /* #calls >= #conns >= #peers must hold true. */ call_head = smp_load_acquire(&b->call_backlog_head); call_tail = b->call_backlog_tail; call_count = CIRC_CNT(call_head, call_tail, RXRPC_BACKLOG_MAX); conn_head = smp_load_acquire(&b->conn_backlog_head); conn_tail = b->conn_backlog_tail; conn_count = CIRC_CNT(conn_head, conn_tail, RXRPC_BACKLOG_MAX); ASSERTCMP(conn_count, >=, call_count); peer_head = smp_load_acquire(&b->peer_backlog_head); peer_tail = b->peer_backlog_tail; ASSERTCMP(CIRC_CNT(peer_head, peer_tail, RXRPC_BACKLOG_MAX), >=, conn_count); if (call_count == 0) return NULL; if (!conn) { if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_service_conn)) peer = NULL; if (!peer) { peer = b->peer_backlog[peer_tail]; peer->srx = *peer_srx; b->peer_backlog[peer_tail] = NULL; smp_store_release(&b->peer_backlog_tail, (peer_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); rxrpc_new_incoming_peer(local, peer); } /* Now allocate and set up the connection */ conn = b->conn_backlog[conn_tail]; b->conn_backlog[conn_tail] = NULL; smp_store_release(&b->conn_backlog_tail, (conn_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); conn->local = rxrpc_get_local(local, rxrpc_local_get_prealloc_conn); conn->peer = peer; rxrpc_see_connection(conn, rxrpc_conn_see_new_service_conn); rxrpc_new_incoming_connection(rx, conn, sec, skb); } else { rxrpc_get_connection(conn, rxrpc_conn_get_service_conn); atomic_inc(&conn->active); } /* And now we can allocate and set up a new call */ call = b->call_backlog[call_tail]; b->call_backlog[call_tail] = NULL; smp_store_release(&b->call_backlog_tail, (call_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); rxrpc_see_call(call, rxrpc_call_see_accept); call->local = rxrpc_get_local(conn->local, rxrpc_local_get_call); call->conn = conn; call->security = conn->security; call->security_ix = conn->security_ix; call->peer = rxrpc_get_peer(conn->peer, rxrpc_peer_get_accept); call->dest_srx = peer->srx; call->cong_ssthresh = call->peer->cong_ssthresh; call->tx_last_sent = ktime_get_real(); return call; } /* * Set up a new incoming call. Called from the I/O thread. * * If this is for a kernel service, when we allocate the call, it will have * three refs on it: (1) the kernel service, (2) the user_call_ID tree, (3) the * retainer ref obtained from the backlog buffer. Prealloc calls for userspace * services only have the ref from the backlog buffer. * * If we want to report an error, we mark the skb with the packet type and * abort code and return false. */ bool rxrpc_new_incoming_call(struct rxrpc_local *local, struct rxrpc_peer *peer, struct rxrpc_connection *conn, struct sockaddr_rxrpc *peer_srx, struct sk_buff *skb) { const struct rxrpc_security *sec = NULL; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_call *call = NULL; struct rxrpc_sock *rx; _enter(""); /* Don't set up a call for anything other than a DATA packet. */ if (sp->hdr.type != RXRPC_PACKET_TYPE_DATA) return rxrpc_protocol_error(skb, rxrpc_eproto_no_service_call); read_lock_irq(&local->services_lock); /* Weed out packets to services we're not offering. Packets that would * begin a call are explicitly rejected and the rest are just * discarded. */ rx = local->service; if (!rx || (sp->hdr.serviceId != rx->srx.srx_service && sp->hdr.serviceId != rx->second_service) ) { if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && sp->hdr.seq == 1) goto unsupported_service; goto discard; } if (!conn) { sec = rxrpc_get_incoming_security(rx, skb); if (!sec) goto unsupported_security; } spin_lock(&rx->incoming_lock); if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED || rx->sk.sk_state == RXRPC_CLOSE) { rxrpc_direct_conn_abort(skb, rxrpc_abort_shut_down, RX_INVALID_OPERATION, -ESHUTDOWN); goto no_call; } call = rxrpc_alloc_incoming_call(rx, local, peer, conn, sec, peer_srx, skb); if (!call) { skb->mark = RXRPC_SKB_MARK_REJECT_BUSY; goto no_call; } trace_rxrpc_receive(call, rxrpc_receive_incoming, sp->hdr.serial, sp->hdr.seq); /* Make the call live. */ rxrpc_incoming_call(rx, call, skb); conn = call->conn; if (rx->app_ops && rx->app_ops->notify_new_call) rx->app_ops->notify_new_call(&rx->sk, call, call->user_call_ID); spin_lock(&conn->state_lock); if (conn->state == RXRPC_CONN_SERVICE_UNSECURED) { conn->state = RXRPC_CONN_SERVICE_CHALLENGING; set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events); rxrpc_queue_conn(call->conn, rxrpc_conn_queue_challenge); } spin_unlock(&conn->state_lock); spin_unlock(&rx->incoming_lock); read_unlock_irq(&local->services_lock); rxrpc_assess_MTU_size(local, call->peer); if (hlist_unhashed(&call->error_link)) { spin_lock_irq(&call->peer->lock); hlist_add_head(&call->error_link, &call->peer->error_targets); spin_unlock_irq(&call->peer->lock); } _leave(" = %p{%d}", call, call->debug_id); rxrpc_queue_rx_call_packet(call, skb); rxrpc_put_call(call, rxrpc_call_put_input); return true; unsupported_service: read_unlock_irq(&local->services_lock); return rxrpc_direct_conn_abort(skb, rxrpc_abort_service_not_offered, RX_INVALID_OPERATION, -EOPNOTSUPP); unsupported_security: read_unlock_irq(&local->services_lock); return rxrpc_direct_conn_abort(skb, rxrpc_abort_service_not_offered, RX_INVALID_OPERATION, -EKEYREJECTED); no_call: spin_unlock(&rx->incoming_lock); read_unlock_irq(&local->services_lock); _leave(" = f [%u]", skb->mark); return false; discard: read_unlock_irq(&local->services_lock); return true; } /* * Charge up socket with preallocated calls, attaching user call IDs. */ int rxrpc_user_charge_accept(struct rxrpc_sock *rx, unsigned long user_call_ID) { struct rxrpc_backlog *b = rx->backlog; if (rx->sk.sk_state == RXRPC_CLOSE) return -ESHUTDOWN; return rxrpc_service_prealloc_one(rx, b, NULL, user_call_ID, GFP_KERNEL, atomic_inc_return(&rxrpc_debug_id)); } /* * rxrpc_kernel_charge_accept - Charge up socket with preallocated calls * @sock: The socket on which to preallocate * @notify_rx: Event notification function for the call * @user_call_ID: The tag to attach to the preallocated call * @gfp: The allocation conditions. * @debug_id: The tracing debug ID. * * Charge up the socket with preallocated calls, each with a user ID. The * ->user_attach_call() callback function should be provided to effect the * attachment from the user's side. The user is given a ref to hold on the * call. * * Note that the call may be come connected before this function returns. */ int rxrpc_kernel_charge_accept(struct socket *sock, rxrpc_notify_rx_t notify_rx, unsigned long user_call_ID, gfp_t gfp, unsigned int debug_id) { struct rxrpc_sock *rx = rxrpc_sk(sock->sk); struct rxrpc_backlog *b = rx->backlog; if (sock->sk->sk_state == RXRPC_CLOSE) return -ESHUTDOWN; return rxrpc_service_prealloc_one(rx, b, notify_rx, user_call_ID, gfp, debug_id); } EXPORT_SYMBOL(rxrpc_kernel_charge_accept);
49 49 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 // SPDX-License-Identifier: GPL-2.0-or-later /* * NetLabel NETLINK Interface * * This file defines the NETLINK interface for the NetLabel system. The * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * * Author: Paul Moore <paul@paul-moore.com> */ /* * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 */ #include <linux/init.h> #include <linux/types.h> #include <linux/list.h> #include <linux/socket.h> #include <linux/audit.h> #include <linux/tty.h> #include <linux/security.h> #include <linux/gfp.h> #include <net/sock.h> #include <net/netlink.h> #include <net/genetlink.h> #include <net/netlabel.h> #include <asm/bug.h> #include "netlabel_mgmt.h" #include "netlabel_unlabeled.h" #include "netlabel_cipso_v4.h" #include "netlabel_calipso.h" #include "netlabel_user.h" /* * NetLabel NETLINK Setup Functions */ /** * netlbl_netlink_init - Initialize the NETLINK communication channel * * Description: * Call out to the NetLabel components so they can register their families and * commands with the Generic NETLINK mechanism. Returns zero on success and * non-zero on failure. * */ int __init netlbl_netlink_init(void) { int ret_val; ret_val = netlbl_mgmt_genl_init(); if (ret_val != 0) return ret_val; ret_val = netlbl_cipsov4_genl_init(); if (ret_val != 0) return ret_val; ret_val = netlbl_calipso_genl_init(); if (ret_val != 0) return ret_val; return netlbl_unlabel_genl_init(); } /* * NetLabel Audit Functions */ /** * netlbl_audit_start_common - Start an audit message * @type: audit message type * @audit_info: NetLabel audit information * * Description: * Start an audit message using the type specified in @type and fill the audit * message with some fields common to all NetLabel audit messages. Returns * a pointer to the audit buffer on success, NULL on failure. * */ struct audit_buffer *netlbl_audit_start_common(int type, struct netlbl_audit *audit_info) { struct audit_buffer *audit_buf; if (audit_enabled == AUDIT_OFF) return NULL; audit_buf = audit_log_start(audit_context(), GFP_ATOMIC, type); if (audit_buf == NULL) return NULL; audit_log_format(audit_buf, "netlabel: auid=%u ses=%u", from_kuid(&init_user_ns, audit_info->loginuid), audit_info->sessionid); audit_log_subj_ctx(audit_buf, &audit_info->prop); return audit_buf; }
5 1 2 4 4 1 1 12 2 1 2 2 6 8 1 1 3 5 3 3 2 5 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 /* * Copyright (c) 2010-2011 Atheros Communications Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "htc.h" static int htc_issue_send(struct htc_target *target, struct sk_buff* skb, u16 len, u8 flags, u8 epid) { struct htc_frame_hdr *hdr; struct htc_endpoint *endpoint = &target->endpoint[epid]; int status; hdr = skb_push(skb, sizeof(struct htc_frame_hdr)); hdr->endpoint_id = epid; hdr->flags = flags; hdr->payload_len = cpu_to_be16(len); memset(hdr->control, 0, sizeof(hdr->control)); status = target->hif->send(target->hif_dev, endpoint->ul_pipeid, skb); return status; } static struct htc_endpoint *get_next_avail_ep(struct htc_endpoint *endpoint) { enum htc_endpoint_id avail_epid; for (avail_epid = (ENDPOINT_MAX - 1); avail_epid > ENDPOINT0; avail_epid--) if (endpoint[avail_epid].service_id == 0) return &endpoint[avail_epid]; return NULL; } static u8 service_to_ulpipe(u16 service_id) { switch (service_id) { case WMI_CONTROL_SVC: return 4; case WMI_BEACON_SVC: case WMI_CAB_SVC: case WMI_UAPSD_SVC: case WMI_MGMT_SVC: case WMI_DATA_VO_SVC: case WMI_DATA_VI_SVC: case WMI_DATA_BE_SVC: case WMI_DATA_BK_SVC: return 1; default: return 0; } } static u8 service_to_dlpipe(u16 service_id) { switch (service_id) { case WMI_CONTROL_SVC: return 3; case WMI_BEACON_SVC: case WMI_CAB_SVC: case WMI_UAPSD_SVC: case WMI_MGMT_SVC: case WMI_DATA_VO_SVC: case WMI_DATA_VI_SVC: case WMI_DATA_BE_SVC: case WMI_DATA_BK_SVC: return 2; default: return 0; } } static void htc_process_target_rdy(struct htc_target *target, void *buf) { struct htc_endpoint *endpoint; struct htc_ready_msg *htc_ready_msg = buf; target->credit_size = be16_to_cpu(htc_ready_msg->credit_size); endpoint = &target->endpoint[ENDPOINT0]; endpoint->service_id = HTC_CTRL_RSVD_SVC; endpoint->max_msglen = HTC_MAX_CONTROL_MESSAGE_LENGTH; atomic_inc(&target->tgt_ready); complete(&target->target_wait); } static void htc_process_conn_rsp(struct htc_target *target, struct htc_frame_hdr *htc_hdr) { struct htc_conn_svc_rspmsg *svc_rspmsg; struct htc_endpoint *endpoint, *tmp_endpoint = NULL; u16 service_id; u16 max_msglen; enum htc_endpoint_id epid, tepid; svc_rspmsg = (struct htc_conn_svc_rspmsg *) ((void *) htc_hdr + sizeof(struct htc_frame_hdr)); if (svc_rspmsg->status == HTC_SERVICE_SUCCESS) { epid = svc_rspmsg->endpoint_id; /* Check that the received epid for the endpoint to attach * a new service is valid. ENDPOINT0 can't be used here as it * is already reserved for HTC_CTRL_RSVD_SVC service and thus * should not be modified. */ if (epid <= ENDPOINT0 || epid >= ENDPOINT_MAX) return; service_id = be16_to_cpu(svc_rspmsg->service_id); max_msglen = be16_to_cpu(svc_rspmsg->max_msg_len); endpoint = &target->endpoint[epid]; for (tepid = (ENDPOINT_MAX - 1); tepid > ENDPOINT0; tepid--) { tmp_endpoint = &target->endpoint[tepid]; if (tmp_endpoint->service_id == service_id) { tmp_endpoint->service_id = 0; break; } } if (tepid == ENDPOINT0) return; endpoint->service_id = service_id; endpoint->max_txqdepth = tmp_endpoint->max_txqdepth; endpoint->ep_callbacks = tmp_endpoint->ep_callbacks; endpoint->ul_pipeid = tmp_endpoint->ul_pipeid; endpoint->dl_pipeid = tmp_endpoint->dl_pipeid; endpoint->max_msglen = max_msglen; target->conn_rsp_epid = epid; complete(&target->cmd_wait); } else { target->conn_rsp_epid = ENDPOINT_UNUSED; } } static int htc_config_pipe_credits(struct htc_target *target) { struct sk_buff *skb; struct htc_config_pipe_msg *cp_msg; int ret; unsigned long time_left; skb = alloc_skb(50 + sizeof(struct htc_frame_hdr), GFP_ATOMIC); if (!skb) { dev_err(target->dev, "failed to allocate send buffer\n"); return -ENOMEM; } skb_reserve(skb, sizeof(struct htc_frame_hdr)); cp_msg = skb_put(skb, sizeof(struct htc_config_pipe_msg)); cp_msg->message_id = cpu_to_be16(HTC_MSG_CONFIG_PIPE_ID); cp_msg->pipe_id = USB_WLAN_TX_PIPE; cp_msg->credits = target->credits; target->htc_flags |= HTC_OP_CONFIG_PIPE_CREDITS; ret = htc_issue_send(target, skb, skb->len, 0, ENDPOINT0); if (ret) goto err; time_left = wait_for_completion_timeout(&target->cmd_wait, HZ); if (!time_left) { dev_err(target->dev, "HTC credit config timeout\n"); return -ETIMEDOUT; } return 0; err: kfree_skb(skb); return -EINVAL; } static int htc_setup_complete(struct htc_target *target) { struct sk_buff *skb; struct htc_comp_msg *comp_msg; int ret = 0; unsigned long time_left; skb = alloc_skb(50 + sizeof(struct htc_frame_hdr), GFP_ATOMIC); if (!skb) { dev_err(target->dev, "failed to allocate send buffer\n"); return -ENOMEM; } skb_reserve(skb, sizeof(struct htc_frame_hdr)); comp_msg = skb_put(skb, sizeof(struct htc_comp_msg)); comp_msg->msg_id = cpu_to_be16(HTC_MSG_SETUP_COMPLETE_ID); target->htc_flags |= HTC_OP_START_WAIT; ret = htc_issue_send(target, skb, skb->len, 0, ENDPOINT0); if (ret) goto err; time_left = wait_for_completion_timeout(&target->cmd_wait, HZ); if (!time_left) { dev_err(target->dev, "HTC start timeout\n"); return -ETIMEDOUT; } return 0; err: kfree_skb(skb); return -EINVAL; } /* HTC APIs */ int htc_init(struct htc_target *target) { int ret; ret = htc_config_pipe_credits(target); if (ret) return ret; return htc_setup_complete(target); } int htc_connect_service(struct htc_target *target, struct htc_service_connreq *service_connreq, enum htc_endpoint_id *conn_rsp_epid) { struct sk_buff *skb; struct htc_endpoint *endpoint; struct htc_conn_svc_msg *conn_msg; int ret; unsigned long time_left; /* Find an available endpoint */ endpoint = get_next_avail_ep(target->endpoint); if (!endpoint) { dev_err(target->dev, "Endpoint is not available for service %d\n", service_connreq->service_id); return -EINVAL; } endpoint->service_id = service_connreq->service_id; endpoint->max_txqdepth = service_connreq->max_send_qdepth; endpoint->ul_pipeid = service_to_ulpipe(service_connreq->service_id); endpoint->dl_pipeid = service_to_dlpipe(service_connreq->service_id); endpoint->ep_callbacks = service_connreq->ep_callbacks; skb = alloc_skb(sizeof(struct htc_conn_svc_msg) + sizeof(struct htc_frame_hdr), GFP_ATOMIC); if (!skb) { dev_err(target->dev, "Failed to allocate buf to send" "service connect req\n"); return -ENOMEM; } skb_reserve(skb, sizeof(struct htc_frame_hdr)); conn_msg = skb_put(skb, sizeof(struct htc_conn_svc_msg)); conn_msg->service_id = cpu_to_be16(service_connreq->service_id); conn_msg->msg_id = cpu_to_be16(HTC_MSG_CONNECT_SERVICE_ID); conn_msg->con_flags = cpu_to_be16(service_connreq->con_flags); conn_msg->dl_pipeid = endpoint->dl_pipeid; conn_msg->ul_pipeid = endpoint->ul_pipeid; /* To prevent infoleak */ conn_msg->svc_meta_len = 0; conn_msg->pad = 0; ret = htc_issue_send(target, skb, skb->len, 0, ENDPOINT0); if (ret) goto err; time_left = wait_for_completion_timeout(&target->cmd_wait, HZ); if (!time_left) { dev_err(target->dev, "Service connection timeout for: %d\n", service_connreq->service_id); return -ETIMEDOUT; } if (target->conn_rsp_epid < 0 || target->conn_rsp_epid >= ENDPOINT_MAX) return -EINVAL; *conn_rsp_epid = target->conn_rsp_epid; return 0; err: kfree_skb(skb); return ret; } int htc_send(struct htc_target *target, struct sk_buff *skb) { struct ath9k_htc_tx_ctl *tx_ctl; tx_ctl = HTC_SKB_CB(skb); return htc_issue_send(target, skb, skb->len, 0, tx_ctl->epid); } int htc_send_epid(struct htc_target *target, struct sk_buff *skb, enum htc_endpoint_id epid) { return htc_issue_send(target, skb, skb->len, 0, epid); } void htc_stop(struct htc_target *target) { target->hif->stop(target->hif_dev); } void htc_start(struct htc_target *target) { target->hif->start(target->hif_dev); } void htc_sta_drain(struct htc_target *target, u8 idx) { target->hif->sta_drain(target->hif_dev, idx); } void ath9k_htc_txcompletion_cb(struct htc_target *htc_handle, struct sk_buff *skb, bool txok) { struct htc_endpoint *endpoint; struct htc_frame_hdr *htc_hdr = NULL; if (htc_handle->htc_flags & HTC_OP_CONFIG_PIPE_CREDITS) { complete(&htc_handle->cmd_wait); htc_handle->htc_flags &= ~HTC_OP_CONFIG_PIPE_CREDITS; goto ret; } if (htc_handle->htc_flags & HTC_OP_START_WAIT) { complete(&htc_handle->cmd_wait); htc_handle->htc_flags &= ~HTC_OP_START_WAIT; goto ret; } if (skb) { htc_hdr = (struct htc_frame_hdr *) skb->data; if (htc_hdr->endpoint_id >= ARRAY_SIZE(htc_handle->endpoint)) goto ret; endpoint = &htc_handle->endpoint[htc_hdr->endpoint_id]; skb_pull(skb, sizeof(struct htc_frame_hdr)); if (endpoint->ep_callbacks.tx) { endpoint->ep_callbacks.tx(endpoint->ep_callbacks.priv, skb, htc_hdr->endpoint_id, txok); } else { kfree_skb(skb); } } return; ret: kfree_skb(skb); } static void ath9k_htc_fw_panic_report(struct htc_target *htc_handle, struct sk_buff *skb, u32 len) { uint32_t *pattern = (uint32_t *)skb->data; if (*pattern == 0x33221199 && len >= sizeof(struct htc_panic_bad_vaddr)) { struct htc_panic_bad_vaddr *htc_panic; htc_panic = (struct htc_panic_bad_vaddr *) skb->data; dev_err(htc_handle->dev, "ath: firmware panic! " "exccause: 0x%08x; pc: 0x%08x; badvaddr: 0x%08x.\n", htc_panic->exccause, htc_panic->pc, htc_panic->badvaddr); return; } if (*pattern == 0x33221299) { struct htc_panic_bad_epid *htc_panic; htc_panic = (struct htc_panic_bad_epid *) skb->data; dev_err(htc_handle->dev, "ath: firmware panic! " "bad epid: 0x%08x\n", htc_panic->epid); return; } dev_err(htc_handle->dev, "ath: unknown panic pattern!\n"); } /* * HTC Messages are handled directly here and the obtained SKB * is freed. * * Service messages (Data, WMI) are passed to the corresponding * endpoint RX handlers, which have to free the SKB. */ void ath9k_htc_rx_msg(struct htc_target *htc_handle, struct sk_buff *skb, u32 len, u8 pipe_id) { struct htc_frame_hdr *htc_hdr; enum htc_endpoint_id epid; struct htc_endpoint *endpoint; __be16 *msg_id; if (!htc_handle || !skb) return; /* A valid message requires len >= 8. * * sizeof(struct htc_frame_hdr) == 8 * sizeof(struct htc_ready_msg) == 8 * sizeof(struct htc_panic_bad_vaddr) == 16 * sizeof(struct htc_panic_bad_epid) == 8 */ if (unlikely(len < sizeof(struct htc_frame_hdr))) goto invalid; htc_hdr = (struct htc_frame_hdr *) skb->data; epid = htc_hdr->endpoint_id; if (epid == 0x99) { ath9k_htc_fw_panic_report(htc_handle, skb, len); kfree_skb(skb); return; } if (epid < 0 || epid >= ENDPOINT_MAX) { invalid: if (pipe_id != USB_REG_IN_PIPE) dev_kfree_skb_any(skb); else kfree_skb(skb); return; } if (epid == ENDPOINT0) { /* Handle trailer */ if (htc_hdr->flags & HTC_FLAGS_RECV_TRAILER) { if (be32_to_cpu(*(__be32 *) skb->data) == 0x00C60000) { /* Move past the Watchdog pattern */ htc_hdr = (struct htc_frame_hdr *)(skb->data + 4); len -= 4; } } /* Get the message ID */ if (unlikely(len < sizeof(struct htc_frame_hdr) + sizeof(__be16))) goto invalid; msg_id = (__be16 *) ((void *) htc_hdr + sizeof(struct htc_frame_hdr)); /* Now process HTC messages */ switch (be16_to_cpu(*msg_id)) { case HTC_MSG_READY_ID: if (unlikely(len < sizeof(struct htc_ready_msg))) goto invalid; htc_process_target_rdy(htc_handle, htc_hdr); break; case HTC_MSG_CONNECT_SERVICE_RESPONSE_ID: if (unlikely(len < sizeof(struct htc_frame_hdr) + sizeof(struct htc_conn_svc_rspmsg))) goto invalid; htc_process_conn_rsp(htc_handle, htc_hdr); break; default: break; } kfree_skb(skb); } else { if (htc_hdr->flags & HTC_FLAGS_RECV_TRAILER) skb_trim(skb, len - htc_hdr->control[0]); skb_pull(skb, sizeof(struct htc_frame_hdr)); endpoint = &htc_handle->endpoint[epid]; if (endpoint->ep_callbacks.rx) endpoint->ep_callbacks.rx(endpoint->ep_callbacks.priv, skb, epid); else goto invalid; } } struct htc_target *ath9k_htc_hw_alloc(void *hif_handle, struct ath9k_htc_hif *hif, struct device *dev) { struct htc_endpoint *endpoint; struct htc_target *target; target = kzalloc(sizeof(struct htc_target), GFP_KERNEL); if (!target) return NULL; init_completion(&target->target_wait); init_completion(&target->cmd_wait); target->hif = hif; target->hif_dev = hif_handle; target->dev = dev; /* Assign control endpoint pipe IDs */ endpoint = &target->endpoint[ENDPOINT0]; endpoint->ul_pipeid = hif->control_ul_pipe; endpoint->dl_pipeid = hif->control_dl_pipe; atomic_set(&target->tgt_ready, 0); return target; } void ath9k_htc_hw_free(struct htc_target *htc) { kfree(htc); } int ath9k_htc_hw_init(struct htc_target *target, struct device *dev, u16 devid, char *product, u32 drv_info) { if (ath9k_htc_probe_device(target, dev, devid, product, drv_info)) { pr_err("Failed to initialize the device\n"); return -ENODEV; } return 0; } void ath9k_htc_hw_deinit(struct htc_target *target, bool hot_unplug) { if (target) ath9k_htc_disconnect_device(target, hot_unplug); }
227 230 229 230 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 // SPDX-License-Identifier: GPL-2.0-or-later /* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com> */ #include <net/ip.h> #include "ipvlan.h" static unsigned int ipvlan_netid __read_mostly; struct ipvlan_netns { unsigned int ipvl_nf_hook_refcnt; }; static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb, struct net_device *dev) { struct ipvl_addr *addr = NULL; struct ipvl_port *port; int addr_type; void *lyr3h; if (!dev || !netif_is_ipvlan_port(dev)) goto out; port = ipvlan_port_get_rcu(dev); if (!port || port->mode != IPVLAN_MODE_L3S) goto out; lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type); if (!lyr3h) goto out; addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); out: return addr; } static struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, u16 proto) { struct ipvl_addr *addr; struct net_device *sdev; addr = ipvlan_skb_to_addr(skb, dev); if (!addr) goto out; sdev = addr->master->dev; switch (proto) { case AF_INET: { const struct iphdr *ip4h = ip_hdr(skb); int err; err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr, ip4h_dscp(ip4h), sdev); if (unlikely(err)) goto out; break; } #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: { struct dst_entry *dst; struct ipv6hdr *ip6h = ipv6_hdr(skb); int flags = RT6_LOOKUP_F_HAS_SADDR; struct flowi6 fl6 = { .flowi6_iif = sdev->ifindex, .daddr = ip6h->daddr, .saddr = ip6h->saddr, .flowlabel = ip6_flowinfo(ip6h), .flowi6_mark = skb->mark, .flowi6_proto = ip6h->nexthdr, }; skb_dst_drop(skb); dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, skb, flags); skb_dst_set(skb, dst); break; } #endif default: break; } out: return skb; } static const struct l3mdev_ops ipvl_l3mdev_ops = { .l3mdev_l3_rcv = ipvlan_l3_rcv, }; static unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { struct ipvl_addr *addr; unsigned int len; addr = ipvlan_skb_to_addr(skb, skb->dev); if (!addr) goto out; skb->dev = addr->master->dev; skb->skb_iif = skb->dev->ifindex; #if IS_ENABLED(CONFIG_IPV6) if (addr->atype == IPVL_IPV6) IP6CB(skb)->iif = skb->dev->ifindex; #endif len = skb->len + ETH_HLEN; ipvlan_count_rx(addr->master, len, true, false); out: return NF_ACCEPT; } static const struct nf_hook_ops ipvl_nfops[] = { { .hook = ipvlan_nf_input, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = INT_MAX, }, #if IS_ENABLED(CONFIG_IPV6) { .hook = ipvlan_nf_input, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = INT_MAX, }, #endif }; static int ipvlan_register_nf_hook(struct net *net) { struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); int err = 0; if (!vnet->ipvl_nf_hook_refcnt) { err = nf_register_net_hooks(net, ipvl_nfops, ARRAY_SIZE(ipvl_nfops)); if (!err) vnet->ipvl_nf_hook_refcnt = 1; } else { vnet->ipvl_nf_hook_refcnt++; } return err; } static void ipvlan_unregister_nf_hook(struct net *net) { struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); if (WARN_ON(!vnet->ipvl_nf_hook_refcnt)) return; vnet->ipvl_nf_hook_refcnt--; if (!vnet->ipvl_nf_hook_refcnt) nf_unregister_net_hooks(net, ipvl_nfops, ARRAY_SIZE(ipvl_nfops)); } void ipvlan_migrate_l3s_hook(struct net *oldnet, struct net *newnet) { struct ipvlan_netns *old_vnet; ASSERT_RTNL(); old_vnet = net_generic(oldnet, ipvlan_netid); if (!old_vnet->ipvl_nf_hook_refcnt) return; ipvlan_register_nf_hook(newnet); ipvlan_unregister_nf_hook(oldnet); } static void ipvlan_ns_exit(struct net *net) { struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); if (WARN_ON_ONCE(vnet->ipvl_nf_hook_refcnt)) { vnet->ipvl_nf_hook_refcnt = 0; nf_unregister_net_hooks(net, ipvl_nfops, ARRAY_SIZE(ipvl_nfops)); } } static struct pernet_operations ipvlan_net_ops = { .id = &ipvlan_netid, .size = sizeof(struct ipvlan_netns), .exit = ipvlan_ns_exit, }; int ipvlan_l3s_init(void) { return register_pernet_subsys(&ipvlan_net_ops); } void ipvlan_l3s_cleanup(void) { unregister_pernet_subsys(&ipvlan_net_ops); } int ipvlan_l3s_register(struct ipvl_port *port) { struct net_device *dev = port->dev; int ret; ASSERT_RTNL(); ret = ipvlan_register_nf_hook(read_pnet(&port->pnet)); if (!ret) { dev->l3mdev_ops = &ipvl_l3mdev_ops; dev->priv_flags |= IFF_L3MDEV_RX_HANDLER; } return ret; } void ipvlan_l3s_unregister(struct ipvl_port *port) { struct net_device *dev = port->dev; ASSERT_RTNL(); dev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER; ipvlan_unregister_nf_hook(read_pnet(&port->pnet)); }
4 4 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2017, Mellanox Technologies inc. All rights reserved. */ #ifndef _UVERBS_IOCTL_ #define _UVERBS_IOCTL_ #include <rdma/uverbs_types.h> #include <linux/uaccess.h> #include <rdma/rdma_user_ioctl.h> #include <rdma/ib_user_ioctl_verbs.h> #include <rdma/ib_user_ioctl_cmds.h> /* * ======================================= * Verbs action specifications * ======================================= */ enum uverbs_attr_type { UVERBS_ATTR_TYPE_NA, UVERBS_ATTR_TYPE_PTR_IN, UVERBS_ATTR_TYPE_PTR_OUT, UVERBS_ATTR_TYPE_IDR, UVERBS_ATTR_TYPE_FD, UVERBS_ATTR_TYPE_RAW_FD, UVERBS_ATTR_TYPE_ENUM_IN, UVERBS_ATTR_TYPE_IDRS_ARRAY, }; enum uverbs_obj_access { UVERBS_ACCESS_READ, UVERBS_ACCESS_WRITE, UVERBS_ACCESS_NEW, UVERBS_ACCESS_DESTROY }; /* Specification of a single attribute inside the ioctl message */ /* good size 16 */ struct uverbs_attr_spec { u8 type; /* * Support extending attributes by length. Allow the user to provide * more bytes than ptr.len, but check that everything after is zero'd * by the user. */ u8 zero_trailing:1; /* * Valid only for PTR_IN. Allocate and copy the data inside * the parser */ u8 alloc_and_copy:1; u8 mandatory:1; /* True if this is from UVERBS_ATTR_UHW */ u8 is_udata:1; union { struct { /* Current known size to kernel */ u16 len; /* User isn't allowed to provide something < min_len */ u16 min_len; } ptr; struct { /* * higher bits mean the namespace and lower bits mean * the type id within the namespace. */ u16 obj_type; u8 access; } obj; struct { u8 num_elems; } enum_def; } u; /* This weird split lets us remove some padding */ union { struct { /* * The enum attribute can select one of the attributes * contained in the ids array. Currently only PTR_IN * attributes are supported in the ids array. */ const struct uverbs_attr_spec *ids; } enum_def; struct { /* * higher bits mean the namespace and lower bits mean * the type id within the namespace. */ u16 obj_type; u16 min_len; u16 max_len; u8 access; } objs_arr; } u2; }; /* * Information about the API is loaded into a radix tree. For IOCTL we start * with a tuple of: * object_id, attr_id, method_id * * Which is a 48 bit value, with most of the bits guaranteed to be zero. Based * on the current kernel support this is compressed into 16 bit key for the * radix tree. Since this compression is entirely internal to the kernel the * below limits can be revised if the kernel gains additional data. * * With 64 leafs per node this is a 3 level radix tree. * * The tree encodes multiple types, and uses a scheme where OBJ_ID,0,0 returns * the object slot, and OBJ_ID,METH_ID,0 and returns the method slot. * * This also encodes the tables for the write() and write() extended commands * using the coding * OBJ_ID,UVERBS_API_METHOD_IS_WRITE,command # * OBJ_ID,UVERBS_API_METHOD_IS_WRITE_EX,command_ex # * ie the WRITE path is treated as a special method type in the ioctl * framework. */ enum uapi_radix_data { UVERBS_API_NS_FLAG = 1U << UVERBS_ID_NS_SHIFT, UVERBS_API_ATTR_KEY_BITS = 6, UVERBS_API_ATTR_KEY_MASK = GENMASK(UVERBS_API_ATTR_KEY_BITS - 1, 0), UVERBS_API_ATTR_BKEY_LEN = (1 << UVERBS_API_ATTR_KEY_BITS) - 1, UVERBS_API_WRITE_KEY_NUM = 1 << UVERBS_API_ATTR_KEY_BITS, UVERBS_API_METHOD_KEY_BITS = 5, UVERBS_API_METHOD_KEY_SHIFT = UVERBS_API_ATTR_KEY_BITS, UVERBS_API_METHOD_KEY_NUM_CORE = 22, UVERBS_API_METHOD_IS_WRITE = 30 << UVERBS_API_METHOD_KEY_SHIFT, UVERBS_API_METHOD_IS_WRITE_EX = 31 << UVERBS_API_METHOD_KEY_SHIFT, UVERBS_API_METHOD_KEY_NUM_DRIVER = (UVERBS_API_METHOD_IS_WRITE >> UVERBS_API_METHOD_KEY_SHIFT) - UVERBS_API_METHOD_KEY_NUM_CORE, UVERBS_API_METHOD_KEY_MASK = GENMASK( UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT - 1, UVERBS_API_METHOD_KEY_SHIFT), UVERBS_API_OBJ_KEY_BITS = 5, UVERBS_API_OBJ_KEY_SHIFT = UVERBS_API_METHOD_KEY_BITS + UVERBS_API_METHOD_KEY_SHIFT, UVERBS_API_OBJ_KEY_NUM_CORE = 20, UVERBS_API_OBJ_KEY_NUM_DRIVER = (1 << UVERBS_API_OBJ_KEY_BITS) - UVERBS_API_OBJ_KEY_NUM_CORE, UVERBS_API_OBJ_KEY_MASK = GENMASK(31, UVERBS_API_OBJ_KEY_SHIFT), /* This id guaranteed to not exist in the radix tree */ UVERBS_API_KEY_ERR = 0xFFFFFFFF, }; static inline __attribute_const__ u32 uapi_key_obj(u32 id) { if (id & UVERBS_API_NS_FLAG) { id &= ~UVERBS_API_NS_FLAG; if (id >= UVERBS_API_OBJ_KEY_NUM_DRIVER) return UVERBS_API_KEY_ERR; id = id + UVERBS_API_OBJ_KEY_NUM_CORE; } else { if (id >= UVERBS_API_OBJ_KEY_NUM_CORE) return UVERBS_API_KEY_ERR; } return id << UVERBS_API_OBJ_KEY_SHIFT; } static inline __attribute_const__ bool uapi_key_is_object(u32 key) { return (key & ~UVERBS_API_OBJ_KEY_MASK) == 0; } static inline __attribute_const__ u32 uapi_key_ioctl_method(u32 id) { if (id & UVERBS_API_NS_FLAG) { id &= ~UVERBS_API_NS_FLAG; if (id >= UVERBS_API_METHOD_KEY_NUM_DRIVER) return UVERBS_API_KEY_ERR; id = id + UVERBS_API_METHOD_KEY_NUM_CORE; } else { id++; if (id >= UVERBS_API_METHOD_KEY_NUM_CORE) return UVERBS_API_KEY_ERR; } return id << UVERBS_API_METHOD_KEY_SHIFT; } static inline __attribute_const__ u32 uapi_key_write_method(u32 id) { if (id >= UVERBS_API_WRITE_KEY_NUM) return UVERBS_API_KEY_ERR; return UVERBS_API_METHOD_IS_WRITE | id; } static inline __attribute_const__ u32 uapi_key_write_ex_method(u32 id) { if (id >= UVERBS_API_WRITE_KEY_NUM) return UVERBS_API_KEY_ERR; return UVERBS_API_METHOD_IS_WRITE_EX | id; } static inline __attribute_const__ u32 uapi_key_attr_to_ioctl_method(u32 attr_key) { return attr_key & (UVERBS_API_OBJ_KEY_MASK | UVERBS_API_METHOD_KEY_MASK); } static inline __attribute_const__ bool uapi_key_is_ioctl_method(u32 key) { unsigned int method = key & UVERBS_API_METHOD_KEY_MASK; return method != 0 && method < UVERBS_API_METHOD_IS_WRITE && (key & UVERBS_API_ATTR_KEY_MASK) == 0; } static inline __attribute_const__ bool uapi_key_is_write_method(u32 key) { return (key & UVERBS_API_METHOD_KEY_MASK) == UVERBS_API_METHOD_IS_WRITE; } static inline __attribute_const__ bool uapi_key_is_write_ex_method(u32 key) { return (key & UVERBS_API_METHOD_KEY_MASK) == UVERBS_API_METHOD_IS_WRITE_EX; } static inline __attribute_const__ u32 uapi_key_attrs_start(u32 ioctl_method_key) { /* 0 is the method slot itself */ return ioctl_method_key + 1; } static inline __attribute_const__ u32 uapi_key_attr(u32 id) { /* * The attr is designed to fit in the typical single radix tree node * of 64 entries. Since allmost all methods have driver attributes we * organize things so that the driver and core attributes interleave to * reduce the length of the attributes array in typical cases. */ if (id & UVERBS_API_NS_FLAG) { id &= ~UVERBS_API_NS_FLAG; id++; if (id >= 1 << (UVERBS_API_ATTR_KEY_BITS - 1)) return UVERBS_API_KEY_ERR; id = (id << 1) | 0; } else { if (id >= 1 << (UVERBS_API_ATTR_KEY_BITS - 1)) return UVERBS_API_KEY_ERR; id = (id << 1) | 1; } return id; } /* Only true for ioctl methods */ static inline __attribute_const__ bool uapi_key_is_attr(u32 key) { unsigned int method = key & UVERBS_API_METHOD_KEY_MASK; return method != 0 && method < UVERBS_API_METHOD_IS_WRITE && (key & UVERBS_API_ATTR_KEY_MASK) != 0; } /* * This returns a value in the range [0 to UVERBS_API_ATTR_BKEY_LEN), * basically it undoes the reservation of 0 in the ID numbering. attr_key * must already be masked with UVERBS_API_ATTR_KEY_MASK, or be the output of * uapi_key_attr(). */ static inline __attribute_const__ u32 uapi_bkey_attr(u32 attr_key) { return attr_key - 1; } static inline __attribute_const__ u32 uapi_bkey_to_key_attr(u32 attr_bkey) { return attr_bkey + 1; } /* * ======================================= * Verbs definitions * ======================================= */ struct uverbs_attr_def { u16 id; struct uverbs_attr_spec attr; }; struct uverbs_method_def { u16 id; /* Combination of bits from enum UVERBS_ACTION_FLAG_XXXX */ u32 flags; size_t num_attrs; const struct uverbs_attr_def * const (*attrs)[]; int (*handler)(struct uverbs_attr_bundle *attrs); }; struct uverbs_object_def { u16 id; const struct uverbs_obj_type *type_attrs; size_t num_methods; const struct uverbs_method_def * const (*methods)[]; }; enum uapi_definition_kind { UAPI_DEF_END = 0, UAPI_DEF_OBJECT_START, UAPI_DEF_WRITE, UAPI_DEF_CHAIN_OBJ_TREE, UAPI_DEF_CHAIN, UAPI_DEF_IS_SUPPORTED_FUNC, UAPI_DEF_IS_SUPPORTED_DEV_FN, }; enum uapi_definition_scope { UAPI_SCOPE_OBJECT = 1, UAPI_SCOPE_METHOD = 2, }; struct uapi_definition { u8 kind; u8 scope; union { struct { u16 object_id; } object_start; struct { u16 command_num; u8 is_ex:1; u8 has_udata:1; u8 has_resp:1; u8 req_size; u8 resp_size; } write; }; union { bool (*func_is_supported)(struct ib_device *device); int (*func_write)(struct uverbs_attr_bundle *attrs); const struct uapi_definition *chain; const struct uverbs_object_def *chain_obj_tree; size_t needs_fn_offset; }; }; /* Define things connected to object_id */ #define DECLARE_UVERBS_OBJECT(_object_id, ...) \ { \ .kind = UAPI_DEF_OBJECT_START, \ .object_start = { .object_id = _object_id }, \ }, \ ##__VA_ARGS__ /* Use in a var_args of DECLARE_UVERBS_OBJECT */ #define DECLARE_UVERBS_WRITE(_command_num, _func, _cmd_desc, ...) \ { \ .kind = UAPI_DEF_WRITE, \ .scope = UAPI_SCOPE_OBJECT, \ .write = { .is_ex = 0, .command_num = _command_num }, \ .func_write = _func, \ _cmd_desc, \ }, \ ##__VA_ARGS__ /* Use in a var_args of DECLARE_UVERBS_OBJECT */ #define DECLARE_UVERBS_WRITE_EX(_command_num, _func, _cmd_desc, ...) \ { \ .kind = UAPI_DEF_WRITE, \ .scope = UAPI_SCOPE_OBJECT, \ .write = { .is_ex = 1, .command_num = _command_num }, \ .func_write = _func, \ _cmd_desc, \ }, \ ##__VA_ARGS__ /* * Object is only supported if the function pointer named ibdev_fn in struct * ib_device is not NULL. */ #define UAPI_DEF_OBJ_NEEDS_FN(ibdev_fn) \ { \ .kind = UAPI_DEF_IS_SUPPORTED_DEV_FN, \ .scope = UAPI_SCOPE_OBJECT, \ .needs_fn_offset = \ offsetof(struct ib_device_ops, ibdev_fn) + \ BUILD_BUG_ON_ZERO(sizeof_field(struct ib_device_ops, \ ibdev_fn) != \ sizeof(void *)), \ } /* * Method is only supported if the function pointer named ibdev_fn in struct * ib_device is not NULL. */ #define UAPI_DEF_METHOD_NEEDS_FN(ibdev_fn) \ { \ .kind = UAPI_DEF_IS_SUPPORTED_DEV_FN, \ .scope = UAPI_SCOPE_METHOD, \ .needs_fn_offset = \ offsetof(struct ib_device_ops, ibdev_fn) + \ BUILD_BUG_ON_ZERO(sizeof_field(struct ib_device_ops, \ ibdev_fn) != \ sizeof(void *)), \ } /* Call a function to determine if the entire object is supported or not */ #define UAPI_DEF_IS_OBJ_SUPPORTED(_func) \ { \ .kind = UAPI_DEF_IS_SUPPORTED_FUNC, \ .scope = UAPI_SCOPE_OBJECT, .func_is_supported = _func, \ } /* Include another struct uapi_definition in this one */ #define UAPI_DEF_CHAIN(_def_var) \ { \ .kind = UAPI_DEF_CHAIN, .chain = _def_var, \ } /* Temporary until the tree base description is replaced */ #define UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, _object_ptr, ...) \ { \ .kind = UAPI_DEF_CHAIN_OBJ_TREE, \ .object_start = { .object_id = _object_enum }, \ .chain_obj_tree = _object_ptr, \ }, \ ##__VA_ARGS__ #define UAPI_DEF_CHAIN_OBJ_TREE_NAMED(_object_enum, ...) \ UAPI_DEF_CHAIN_OBJ_TREE(_object_enum, \ PTR_IF(IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS), \ &UVERBS_OBJECT(_object_enum)), \ ##__VA_ARGS__) /* * ======================================= * Attribute Specifications * ======================================= */ #define UVERBS_ATTR_SIZE(_min_len, _len) \ .u.ptr.min_len = _min_len, .u.ptr.len = _len #define UVERBS_ATTR_NO_DATA() UVERBS_ATTR_SIZE(0, 0) /* * Specifies a uapi structure that cannot be extended. The user must always * supply the whole structure and nothing more. The structure must be declared * in a header under include/uapi/rdma. */ #define UVERBS_ATTR_TYPE(_type) \ .u.ptr.min_len = sizeof(_type), .u.ptr.len = sizeof(_type) /* * Specifies a uapi structure where the user must provide at least up to * member 'last'. Anything after last and up until the end of the structure * can be non-zero, anything longer than the end of the structure must be * zero. The structure must be declared in a header under include/uapi/rdma. */ #define UVERBS_ATTR_STRUCT(_type, _last) \ .zero_trailing = 1, \ UVERBS_ATTR_SIZE(offsetofend(_type, _last), sizeof(_type)) /* * Specifies at least min_len bytes must be passed in, but the amount can be * larger, up to the protocol maximum size. No check for zeroing is done. */ #define UVERBS_ATTR_MIN_SIZE(_min_len) UVERBS_ATTR_SIZE(_min_len, USHRT_MAX) /* Must be used in the '...' of any UVERBS_ATTR */ #define UA_ALLOC_AND_COPY .alloc_and_copy = 1 #define UA_MANDATORY .mandatory = 1 #define UA_OPTIONAL .mandatory = 0 /* * min_len must be bigger than 0 and _max_len must be smaller than 4095. Only * READ\WRITE accesses are supported. */ #define UVERBS_ATTR_IDRS_ARR(_attr_id, _idr_type, _access, _min_len, _max_len, \ ...) \ (&(const struct uverbs_attr_def){ \ .id = (_attr_id) + \ BUILD_BUG_ON_ZERO((_min_len) == 0 || \ (_max_len) > \ PAGE_SIZE / sizeof(void *) || \ (_min_len) > (_max_len) || \ (_access) == UVERBS_ACCESS_NEW || \ (_access) == UVERBS_ACCESS_DESTROY), \ .attr = { .type = UVERBS_ATTR_TYPE_IDRS_ARRAY, \ .u2.objs_arr.obj_type = _idr_type, \ .u2.objs_arr.access = _access, \ .u2.objs_arr.min_len = _min_len, \ .u2.objs_arr.max_len = _max_len, \ __VA_ARGS__ } }) /* * Only for use with UVERBS_ATTR_IDR, allows any uobject type to be accepted, * the user must validate the type of the uobject instead. */ #define UVERBS_IDR_ANY_OBJECT 0xFFFF #define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...) \ (&(const struct uverbs_attr_def){ \ .id = _attr_id, \ .attr = { .type = UVERBS_ATTR_TYPE_IDR, \ .u.obj.obj_type = _idr_type, \ .u.obj.access = _access, \ __VA_ARGS__ } }) #define UVERBS_ATTR_FD(_attr_id, _fd_type, _access, ...) \ (&(const struct uverbs_attr_def){ \ .id = (_attr_id) + \ BUILD_BUG_ON_ZERO((_access) != UVERBS_ACCESS_NEW && \ (_access) != UVERBS_ACCESS_READ), \ .attr = { .type = UVERBS_ATTR_TYPE_FD, \ .u.obj.obj_type = _fd_type, \ .u.obj.access = _access, \ __VA_ARGS__ } }) #define UVERBS_ATTR_RAW_FD(_attr_id, ...) \ (&(const struct uverbs_attr_def){ \ .id = (_attr_id), \ .attr = { .type = UVERBS_ATTR_TYPE_RAW_FD, __VA_ARGS__ } }) #define UVERBS_ATTR_PTR_IN(_attr_id, _type, ...) \ (&(const struct uverbs_attr_def){ \ .id = _attr_id, \ .attr = { .type = UVERBS_ATTR_TYPE_PTR_IN, \ _type, \ __VA_ARGS__ } }) #define UVERBS_ATTR_PTR_OUT(_attr_id, _type, ...) \ (&(const struct uverbs_attr_def){ \ .id = _attr_id, \ .attr = { .type = UVERBS_ATTR_TYPE_PTR_OUT, \ _type, \ __VA_ARGS__ } }) /* _enum_arry should be a 'static const union uverbs_attr_spec[]' */ #define UVERBS_ATTR_ENUM_IN(_attr_id, _enum_arr, ...) \ (&(const struct uverbs_attr_def){ \ .id = _attr_id, \ .attr = { .type = UVERBS_ATTR_TYPE_ENUM_IN, \ .u2.enum_def.ids = _enum_arr, \ .u.enum_def.num_elems = ARRAY_SIZE(_enum_arr), \ __VA_ARGS__ }, \ }) /* An input value that is a member in the enum _enum_type. */ #define UVERBS_ATTR_CONST_IN(_attr_id, _enum_type, ...) \ UVERBS_ATTR_PTR_IN( \ _attr_id, \ UVERBS_ATTR_SIZE( \ sizeof(u64) + BUILD_BUG_ON_ZERO(!sizeof(_enum_type)), \ sizeof(u64)), \ __VA_ARGS__) /* * An input value that is a bitwise combination of values of _enum_type. * This permits the flag value to be passed as either a u32 or u64, it must * be retrieved via uverbs_get_flag(). */ #define UVERBS_ATTR_FLAGS_IN(_attr_id, _enum_type, ...) \ UVERBS_ATTR_PTR_IN( \ _attr_id, \ UVERBS_ATTR_SIZE(sizeof(u32) + BUILD_BUG_ON_ZERO( \ !sizeof(_enum_type *)), \ sizeof(u64)), \ __VA_ARGS__) /* * This spec is used in order to pass information to the hardware driver in a * legacy way. Every verb that could get driver specific data should get this * spec. */ #define UVERBS_ATTR_UHW() \ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN, \ UVERBS_ATTR_MIN_SIZE(0), \ UA_OPTIONAL, \ .is_udata = 1), \ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT, \ UVERBS_ATTR_MIN_SIZE(0), \ UA_OPTIONAL, \ .is_udata = 1) /* ================================================= * Parsing infrastructure * ================================================= */ struct uverbs_ptr_attr { /* * If UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY is set then the 'ptr' is * used. */ union { void *ptr; u64 data; }; u16 len; u16 uattr_idx; u8 enum_id; }; struct uverbs_obj_attr { struct ib_uobject *uobject; const struct uverbs_api_attr *attr_elm; }; struct uverbs_objs_arr_attr { struct ib_uobject **uobjects; u16 len; }; struct uverbs_attr { union { struct uverbs_ptr_attr ptr_attr; struct uverbs_obj_attr obj_attr; struct uverbs_objs_arr_attr objs_arr_attr; }; }; struct uverbs_attr_bundle { struct_group_tagged(uverbs_attr_bundle_hdr, hdr, struct ib_udata driver_udata; struct ib_udata ucore; struct ib_uverbs_file *ufile; struct ib_ucontext *context; struct ib_uobject *uobject; DECLARE_BITMAP(attr_present, UVERBS_API_ATTR_BKEY_LEN); ); struct uverbs_attr attrs[]; }; static inline bool uverbs_attr_is_valid(const struct uverbs_attr_bundle *attrs_bundle, unsigned int idx) { return test_bit(uapi_bkey_attr(uapi_key_attr(idx)), attrs_bundle->attr_present); } /** * rdma_udata_to_drv_context - Helper macro to get the driver's context out of * ib_udata which is embedded in uverbs_attr_bundle. * * If udata is not NULL this cannot fail. Otherwise a NULL udata will result * in a NULL ucontext pointer, as a safety precaution. Callers should be using * 'udata' to determine if the driver call is in user or kernel mode, not * 'ucontext'. * */ static inline struct uverbs_attr_bundle * rdma_udata_to_uverbs_attr_bundle(struct ib_udata *udata) { return container_of(udata, struct uverbs_attr_bundle, driver_udata); } #define rdma_udata_to_drv_context(udata, drv_dev_struct, member) \ (udata ? container_of(rdma_udata_to_uverbs_attr_bundle(udata)->context, \ drv_dev_struct, member) : (drv_dev_struct *)NULL) #define IS_UVERBS_COPY_ERR(_ret) ((_ret) && (_ret) != -ENOENT) static inline const struct uverbs_attr *uverbs_attr_get(const struct uverbs_attr_bundle *attrs_bundle, u16 idx) { if (!uverbs_attr_is_valid(attrs_bundle, idx)) return ERR_PTR(-ENOENT); return &attrs_bundle->attrs[uapi_bkey_attr(uapi_key_attr(idx))]; } static inline int uverbs_attr_get_enum_id(const struct uverbs_attr_bundle *attrs_bundle, u16 idx) { const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); if (IS_ERR(attr)) return PTR_ERR(attr); return attr->ptr_attr.enum_id; } static inline void *uverbs_attr_get_obj(const struct uverbs_attr_bundle *attrs_bundle, u16 idx) { const struct uverbs_attr *attr; attr = uverbs_attr_get(attrs_bundle, idx); if (IS_ERR(attr)) return ERR_CAST(attr); return attr->obj_attr.uobject->object; } static inline struct ib_uobject *uverbs_attr_get_uobject(const struct uverbs_attr_bundle *attrs_bundle, u16 idx) { const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); if (IS_ERR(attr)) return ERR_CAST(attr); return attr->obj_attr.uobject; } static inline int uverbs_attr_get_len(const struct uverbs_attr_bundle *attrs_bundle, u16 idx) { const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); if (IS_ERR(attr)) return PTR_ERR(attr); return attr->ptr_attr.len; } void uverbs_finalize_uobj_create(const struct uverbs_attr_bundle *attrs_bundle, u16 idx); /* * uverbs_attr_ptr_get_array_size() - Get array size pointer by a ptr * attribute. * @attrs: The attribute bundle * @idx: The ID of the attribute * @elem_size: The size of the element in the array */ static inline int uverbs_attr_ptr_get_array_size(struct uverbs_attr_bundle *attrs, u16 idx, size_t elem_size) { int size = uverbs_attr_get_len(attrs, idx); if (size < 0) return size; if (size % elem_size) return -EINVAL; return size / elem_size; } /** * uverbs_attr_get_uobjs_arr() - Provides array's properties for attribute for * UVERBS_ATTR_TYPE_IDRS_ARRAY. * @arr: Returned pointer to array of pointers for uobjects or NULL if * the attribute isn't provided. * * Return: The array length or 0 if no attribute was provided. */ static inline int uverbs_attr_get_uobjs_arr( const struct uverbs_attr_bundle *attrs_bundle, u16 attr_idx, struct ib_uobject ***arr) { const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, attr_idx); if (IS_ERR(attr)) { *arr = NULL; return 0; } *arr = attr->objs_arr_attr.uobjects; return attr->objs_arr_attr.len; } static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr) { return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data); } static inline void *uverbs_attr_get_alloced_ptr( const struct uverbs_attr_bundle *attrs_bundle, u16 idx) { const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); if (IS_ERR(attr)) return (void *)attr; return uverbs_attr_ptr_is_inline(attr) ? (void *)&attr->ptr_attr.data : attr->ptr_attr.ptr; } static inline int _uverbs_copy_from(void *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, size_t size) { const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); if (IS_ERR(attr)) return PTR_ERR(attr); /* * Validation ensures attr->ptr_attr.len >= size. If the caller is * using UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO then it must call * uverbs_copy_from_or_zero. */ if (unlikely(size < attr->ptr_attr.len)) return -EINVAL; if (uverbs_attr_ptr_is_inline(attr)) memcpy(to, &attr->ptr_attr.data, attr->ptr_attr.len); else if (copy_from_user(to, u64_to_user_ptr(attr->ptr_attr.data), attr->ptr_attr.len)) return -EFAULT; return 0; } static inline int _uverbs_copy_from_or_zero(void *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, size_t size) { const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx); size_t min_size; if (IS_ERR(attr)) return PTR_ERR(attr); min_size = min_t(size_t, size, attr->ptr_attr.len); if (uverbs_attr_ptr_is_inline(attr)) memcpy(to, &attr->ptr_attr.data, min_size); else if (copy_from_user(to, u64_to_user_ptr(attr->ptr_attr.data), min_size)) return -EFAULT; if (size > min_size) memset(to + min_size, 0, size - min_size); return 0; } #define uverbs_copy_from(to, attrs_bundle, idx) \ _uverbs_copy_from(to, attrs_bundle, idx, sizeof(*to)) #define uverbs_copy_from_or_zero(to, attrs_bundle, idx) \ _uverbs_copy_from_or_zero(to, attrs_bundle, idx, sizeof(*to)) static inline struct ib_ucontext * ib_uverbs_get_ucontext(const struct uverbs_attr_bundle *attrs) { return ib_uverbs_get_ucontext_file(attrs->ufile); } #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, u64 allowed_bits); int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, u64 allowed_bits); int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, size_t idx, const void *from, size_t size); __malloc void *_uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size, gfp_t flags); static inline __malloc void *uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size) { return _uverbs_alloc(bundle, size, GFP_KERNEL); } static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle, size_t size) { return _uverbs_alloc(bundle, size, GFP_KERNEL | __GFP_ZERO); } static inline __malloc void *uverbs_kcalloc(struct uverbs_attr_bundle *bundle, size_t n, size_t size) { size_t bytes; if (unlikely(check_mul_overflow(n, size, &bytes))) return ERR_PTR(-EOVERFLOW); return uverbs_zalloc(bundle, bytes); } int _uverbs_get_const_signed(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, s64 lower_bound, u64 upper_bound, s64 *def_val); int _uverbs_get_const_unsigned(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, u64 upper_bound, u64 *def_val); int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, size_t idx, const void *from, size_t size); #else static inline int uverbs_get_flags64(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, u64 allowed_bits) { return -EINVAL; } static inline int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, u64 allowed_bits) { return -EINVAL; } static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, size_t idx, const void *from, size_t size) { return -EINVAL; } static inline __malloc void *uverbs_alloc(struct uverbs_attr_bundle *bundle, size_t size) { return ERR_PTR(-EINVAL); } static inline __malloc void *uverbs_zalloc(struct uverbs_attr_bundle *bundle, size_t size) { return ERR_PTR(-EINVAL); } static inline int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, s64 lower_bound, u64 upper_bound, s64 *def_val) { return -EINVAL; } static inline int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle, size_t idx, const void *from, size_t size) { return -EINVAL; } static inline int _uverbs_get_const_signed(s64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, s64 lower_bound, u64 upper_bound, s64 *def_val) { return -EINVAL; } static inline int _uverbs_get_const_unsigned(u64 *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx, u64 upper_bound, u64 *def_val) { return -EINVAL; } #endif #define uverbs_get_const_signed(_to, _attrs_bundle, _idx) \ ({ \ s64 _val; \ int _ret = \ _uverbs_get_const_signed(&_val, _attrs_bundle, _idx, \ type_min(typeof(*(_to))), \ type_max(typeof(*(_to))), NULL); \ (*(_to)) = _val; \ _ret; \ }) #define uverbs_get_const_unsigned(_to, _attrs_bundle, _idx) \ ({ \ u64 _val; \ int _ret = \ _uverbs_get_const_unsigned(&_val, _attrs_bundle, _idx, \ type_max(typeof(*(_to))), NULL); \ (*(_to)) = _val; \ _ret; \ }) #define uverbs_get_const_default_signed(_to, _attrs_bundle, _idx, _default) \ ({ \ s64 _val; \ s64 _def_val = _default; \ int _ret = \ _uverbs_get_const_signed(&_val, _attrs_bundle, _idx, \ type_min(typeof(*(_to))), \ type_max(typeof(*(_to))), &_def_val); \ (*(_to)) = _val; \ _ret; \ }) #define uverbs_get_const_default_unsigned(_to, _attrs_bundle, _idx, _default) \ ({ \ u64 _val; \ u64 _def_val = _default; \ int _ret = \ _uverbs_get_const_unsigned(&_val, _attrs_bundle, _idx, \ type_max(typeof(*(_to))), &_def_val); \ (*(_to)) = _val; \ _ret; \ }) #define uverbs_get_const(_to, _attrs_bundle, _idx) \ (is_signed_type(typeof(*(_to))) ? \ uverbs_get_const_signed(_to, _attrs_bundle, _idx) : \ uverbs_get_const_unsigned(_to, _attrs_bundle, _idx)) \ #define uverbs_get_const_default(_to, _attrs_bundle, _idx, _default) \ (is_signed_type(typeof(*(_to))) ? \ uverbs_get_const_default_signed(_to, _attrs_bundle, _idx, \ _default) : \ uverbs_get_const_default_unsigned(_to, _attrs_bundle, _idx, \ _default)) static inline int uverbs_get_raw_fd(int *to, const struct uverbs_attr_bundle *attrs_bundle, size_t idx) { return uverbs_get_const_signed(to, attrs_bundle, idx); } #endif
7 6 1 6 9 8 9 9 5 8 8 8 8 1 1 1 1 4 4 4 1 4 4 4 4 4 4 1 1 4 4 3 4 4 4 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 // SPDX-License-Identifier: GPL-2.0-or-later /* * Core registration and callback routines for MTD * drivers and users. * * Copyright © 1999-2010 David Woodhouse <dwmw2@infradead.org> * Copyright © 2006 Red Hat UK Limited */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/ptrace.h> #include <linux/seq_file.h> #include <linux/string.h> #include <linux/timer.h> #include <linux/major.h> #include <linux/fs.h> #include <linux/err.h> #include <linux/ioctl.h> #include <linux/init.h> #include <linux/of.h> #include <linux/proc_fs.h> #include <linux/idr.h> #include <linux/backing-dev.h> #include <linux/gfp.h> #include <linux/random.h> #include <linux/slab.h> #include <linux/reboot.h> #include <linux/leds.h> #include <linux/debugfs.h> #include <linux/nvmem-provider.h> #include <linux/root_dev.h> #include <linux/error-injection.h> #include <linux/mtd/mtd.h> #include <linux/mtd/partitions.h> #include "mtdcore.h" struct backing_dev_info *mtd_bdi; #ifdef CONFIG_PM_SLEEP static int mtd_cls_suspend(struct device *dev) { struct mtd_info *mtd = dev_get_drvdata(dev); return mtd ? mtd_suspend(mtd) : 0; } static int mtd_cls_resume(struct device *dev) { struct mtd_info *mtd = dev_get_drvdata(dev); if (mtd) mtd_resume(mtd); return 0; } static SIMPLE_DEV_PM_OPS(mtd_cls_pm_ops, mtd_cls_suspend, mtd_cls_resume); #define MTD_CLS_PM_OPS (&mtd_cls_pm_ops) #else #define MTD_CLS_PM_OPS NULL #endif static struct class mtd_class = { .name = "mtd", .pm = MTD_CLS_PM_OPS, }; static DEFINE_IDR(mtd_idr); /* These are exported solely for the purpose of mtd_blkdevs.c. You should not use them for _anything_ else */ DEFINE_MUTEX(mtd_table_mutex); EXPORT_SYMBOL_GPL(mtd_table_mutex); struct mtd_info *__mtd_next_device(int i) { return idr_get_next(&mtd_idr, &i); } EXPORT_SYMBOL_GPL(__mtd_next_device); static LIST_HEAD(mtd_notifiers); #define MTD_DEVT(index) MKDEV(MTD_CHAR_MAJOR, (index)*2) /* REVISIT once MTD uses the driver model better, whoever allocates * the mtd_info will probably want to use the release() hook... */ static void mtd_release(struct device *dev) { struct mtd_info *mtd = dev_get_drvdata(dev); dev_t index = MTD_DEVT(mtd->index); idr_remove(&mtd_idr, mtd->index); of_node_put(mtd_get_of_node(mtd)); if (mtd_is_partition(mtd)) release_mtd_partition(mtd); /* remove /dev/mtdXro node */ device_destroy(&mtd_class, index + 1); } static void mtd_device_release(struct kref *kref) { struct mtd_info *mtd = container_of(kref, struct mtd_info, refcnt); bool is_partition = mtd_is_partition(mtd); debugfs_remove_recursive(mtd->dbg.dfs_dir); /* Try to remove the NVMEM provider */ nvmem_unregister(mtd->nvmem); device_unregister(&mtd->dev); /* * Clear dev so mtd can be safely re-registered later if desired. * Should not be done for partition, * as it was already destroyed in device_unregister(). */ if (!is_partition) memset(&mtd->dev, 0, sizeof(mtd->dev)); module_put(THIS_MODULE); } #define MTD_DEVICE_ATTR_RO(name) \ static DEVICE_ATTR(name, 0444, mtd_##name##_show, NULL) #define MTD_DEVICE_ATTR_RW(name) \ static DEVICE_ATTR(name, 0644, mtd_##name##_show, mtd_##name##_store) static ssize_t mtd_type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); char *type; switch (mtd->type) { case MTD_ABSENT: type = "absent"; break; case MTD_RAM: type = "ram"; break; case MTD_ROM: type = "rom"; break; case MTD_NORFLASH: type = "nor"; break; case MTD_NANDFLASH: type = "nand"; break; case MTD_DATAFLASH: type = "dataflash"; break; case MTD_UBIVOLUME: type = "ubi"; break; case MTD_MLCNANDFLASH: type = "mlc-nand"; break; default: type = "unknown"; } return sysfs_emit(buf, "%s\n", type); } MTD_DEVICE_ATTR_RO(type); static ssize_t mtd_flags_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return sysfs_emit(buf, "0x%lx\n", (unsigned long)mtd->flags); } MTD_DEVICE_ATTR_RO(flags); static ssize_t mtd_size_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return sysfs_emit(buf, "%llu\n", (unsigned long long)mtd->size); } MTD_DEVICE_ATTR_RO(size); static ssize_t mtd_erasesize_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return sysfs_emit(buf, "%lu\n", (unsigned long)mtd->erasesize); } MTD_DEVICE_ATTR_RO(erasesize); static ssize_t mtd_writesize_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return sysfs_emit(buf, "%lu\n", (unsigned long)mtd->writesize); } MTD_DEVICE_ATTR_RO(writesize); static ssize_t mtd_subpagesize_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); unsigned int subpagesize = mtd->writesize >> mtd->subpage_sft; return sysfs_emit(buf, "%u\n", subpagesize); } MTD_DEVICE_ATTR_RO(subpagesize); static ssize_t mtd_oobsize_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return sysfs_emit(buf, "%lu\n", (unsigned long)mtd->oobsize); } MTD_DEVICE_ATTR_RO(oobsize); static ssize_t mtd_oobavail_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return sysfs_emit(buf, "%u\n", mtd->oobavail); } MTD_DEVICE_ATTR_RO(oobavail); static ssize_t mtd_numeraseregions_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return sysfs_emit(buf, "%u\n", mtd->numeraseregions); } MTD_DEVICE_ATTR_RO(numeraseregions); static ssize_t mtd_name_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return sysfs_emit(buf, "%s\n", mtd->name); } MTD_DEVICE_ATTR_RO(name); static ssize_t mtd_ecc_strength_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return sysfs_emit(buf, "%u\n", mtd->ecc_strength); } MTD_DEVICE_ATTR_RO(ecc_strength); static ssize_t mtd_bitflip_threshold_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return sysfs_emit(buf, "%u\n", mtd->bitflip_threshold); } static ssize_t mtd_bitflip_threshold_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct mtd_info *mtd = dev_get_drvdata(dev); unsigned int bitflip_threshold; int retval; retval = kstrtouint(buf, 0, &bitflip_threshold); if (retval) return retval; mtd->bitflip_threshold = bitflip_threshold; return count; } MTD_DEVICE_ATTR_RW(bitflip_threshold); static ssize_t mtd_ecc_step_size_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); return sysfs_emit(buf, "%u\n", mtd->ecc_step_size); } MTD_DEVICE_ATTR_RO(ecc_step_size); static ssize_t mtd_corrected_bits_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); struct mtd_ecc_stats *ecc_stats = &mtd->ecc_stats; return sysfs_emit(buf, "%u\n", ecc_stats->corrected); } MTD_DEVICE_ATTR_RO(corrected_bits); /* ecc stats corrected */ static ssize_t mtd_ecc_failures_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); struct mtd_ecc_stats *ecc_stats = &mtd->ecc_stats; return sysfs_emit(buf, "%u\n", ecc_stats->failed); } MTD_DEVICE_ATTR_RO(ecc_failures); /* ecc stats errors */ static ssize_t mtd_bad_blocks_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); struct mtd_ecc_stats *ecc_stats = &mtd->ecc_stats; return sysfs_emit(buf, "%u\n", ecc_stats->badblocks); } MTD_DEVICE_ATTR_RO(bad_blocks); static ssize_t mtd_bbt_blocks_show(struct device *dev, struct device_attribute *attr, char *buf) { struct mtd_info *mtd = dev_get_drvdata(dev); struct mtd_ecc_stats *ecc_stats = &mtd->ecc_stats; return sysfs_emit(buf, "%u\n", ecc_stats->bbtblocks); } MTD_DEVICE_ATTR_RO(bbt_blocks); static struct attribute *mtd_attrs[] = { &dev_attr_type.attr, &dev_attr_flags.attr, &dev_attr_size.attr, &dev_attr_erasesize.attr, &dev_attr_writesize.attr, &dev_attr_subpagesize.attr, &dev_attr_oobsize.attr, &dev_attr_oobavail.attr, &dev_attr_numeraseregions.attr, &dev_attr_name.attr, &dev_attr_ecc_strength.attr, &dev_attr_ecc_step_size.attr, &dev_attr_corrected_bits.attr, &dev_attr_ecc_failures.attr, &dev_attr_bad_blocks.attr, &dev_attr_bbt_blocks.attr, &dev_attr_bitflip_threshold.attr, NULL, }; ATTRIBUTE_GROUPS(mtd); static const struct device_type mtd_devtype = { .name = "mtd", .groups = mtd_groups, .release = mtd_release, }; static bool mtd_expert_analysis_mode; #ifdef CONFIG_DEBUG_FS bool mtd_check_expert_analysis_mode(void) { const char *mtd_expert_analysis_warning = "Bad block checks have been entirely disabled.\n" "This is only reserved for post-mortem forensics and debug purposes.\n" "Never enable this mode if you do not know what you are doing!\n"; return WARN_ONCE(mtd_expert_analysis_mode, mtd_expert_analysis_warning); } EXPORT_SYMBOL_GPL(mtd_check_expert_analysis_mode); #endif static struct dentry *dfs_dir_mtd; static int mtd_ooblayout_show(struct seq_file *s, void *p, int (*iter)(struct mtd_info *, int section, struct mtd_oob_region *region)) { struct mtd_info *mtd = s->private; int section; for (section = 0;; section++) { struct mtd_oob_region region; int err; err = iter(mtd, section, &region); if (err) { if (err == -ERANGE) break; return err; } seq_printf(s, "%-3d %4u %4u\n", section, region.offset, region.length); } return 0; } static int mtd_ooblayout_ecc_show(struct seq_file *s, void *p) { return mtd_ooblayout_show(s, p, mtd_ooblayout_ecc); } DEFINE_SHOW_ATTRIBUTE(mtd_ooblayout_ecc); static int mtd_ooblayout_free_show(struct seq_file *s, void *p) { return mtd_ooblayout_show(s, p, mtd_ooblayout_free); } DEFINE_SHOW_ATTRIBUTE(mtd_ooblayout_free); static void mtd_debugfs_populate(struct mtd_info *mtd) { struct device *dev = &mtd->dev; struct mtd_oob_region region; if (IS_ERR_OR_NULL(dfs_dir_mtd)) return; mtd->dbg.dfs_dir = debugfs_create_dir(dev_name(dev), dfs_dir_mtd); if (IS_ERR_OR_NULL(mtd->dbg.dfs_dir)) return; /* Create ooblayout files only if at least one region is present. */ if (mtd_ooblayout_ecc(mtd, 0, &region) == 0) debugfs_create_file("ooblayout_ecc", 0444, mtd->dbg.dfs_dir, mtd, &mtd_ooblayout_ecc_fops); if (mtd_ooblayout_free(mtd, 0, &region) == 0) debugfs_create_file("ooblayout_free", 0444, mtd->dbg.dfs_dir, mtd, &mtd_ooblayout_free_fops); } #ifndef CONFIG_MMU unsigned mtd_mmap_capabilities(struct mtd_info *mtd) { switch (mtd->type) { case MTD_RAM: return NOMMU_MAP_COPY | NOMMU_MAP_DIRECT | NOMMU_MAP_EXEC | NOMMU_MAP_READ | NOMMU_MAP_WRITE; case MTD_ROM: return NOMMU_MAP_COPY | NOMMU_MAP_DIRECT | NOMMU_MAP_EXEC | NOMMU_MAP_READ; default: return NOMMU_MAP_COPY; } } EXPORT_SYMBOL_GPL(mtd_mmap_capabilities); #endif static int mtd_reboot_notifier(struct notifier_block *n, unsigned long state, void *cmd) { struct mtd_info *mtd; mtd = container_of(n, struct mtd_info, reboot_notifier); mtd->_reboot(mtd); return NOTIFY_DONE; } /** * mtd_wunit_to_pairing_info - get pairing information of a wunit * @mtd: pointer to new MTD device info structure * @wunit: write unit we are interested in * @info: returned pairing information * * Retrieve pairing information associated to the wunit. * This is mainly useful when dealing with MLC/TLC NANDs where pages can be * paired together, and where programming a page may influence the page it is * paired with. * The notion of page is replaced by the term wunit (write-unit) to stay * consistent with the ->writesize field. * * The @wunit argument can be extracted from an absolute offset using * mtd_offset_to_wunit(). @info is filled with the pairing information attached * to @wunit. * * From the pairing info the MTD user can find all the wunits paired with * @wunit using the following loop: * * for (i = 0; i < mtd_pairing_groups(mtd); i++) { * info.pair = i; * mtd_pairing_info_to_wunit(mtd, &info); * ... * } */ int mtd_wunit_to_pairing_info(struct mtd_info *mtd, int wunit, struct mtd_pairing_info *info) { struct mtd_info *master = mtd_get_master(mtd); int npairs = mtd_wunit_per_eb(master) / mtd_pairing_groups(master); if (wunit < 0 || wunit >= npairs) return -EINVAL; if (master->pairing && master->pairing->get_info) return master->pairing->get_info(master, wunit, info); info->group = 0; info->pair = wunit; return 0; } EXPORT_SYMBOL_GPL(mtd_wunit_to_pairing_info); /** * mtd_pairing_info_to_wunit - get wunit from pairing information * @mtd: pointer to new MTD device info structure * @info: pairing information struct * * Returns a positive number representing the wunit associated to the info * struct, or a negative error code. * * This is the reverse of mtd_wunit_to_pairing_info(), and can help one to * iterate over all wunits of a given pair (see mtd_wunit_to_pairing_info() * doc). * * It can also be used to only program the first page of each pair (i.e. * page attached to group 0), which allows one to use an MLC NAND in * software-emulated SLC mode: * * info.group = 0; * npairs = mtd_wunit_per_eb(mtd) / mtd_pairing_groups(mtd); * for (info.pair = 0; info.pair < npairs; info.pair++) { * wunit = mtd_pairing_info_to_wunit(mtd, &info); * mtd_write(mtd, mtd_wunit_to_offset(mtd, blkoffs, wunit), * mtd->writesize, &retlen, buf + (i * mtd->writesize)); * } */ int mtd_pairing_info_to_wunit(struct mtd_info *mtd, const struct mtd_pairing_info *info) { struct mtd_info *master = mtd_get_master(mtd); int ngroups = mtd_pairing_groups(master); int npairs = mtd_wunit_per_eb(master) / ngroups; if (!info || info->pair < 0 || info->pair >= npairs || info->group < 0 || info->group >= ngroups) return -EINVAL; if (master->pairing && master->pairing->get_wunit) return mtd->pairing->get_wunit(master, info); return info->pair; } EXPORT_SYMBOL_GPL(mtd_pairing_info_to_wunit); /** * mtd_pairing_groups - get the number of pairing groups * @mtd: pointer to new MTD device info structure * * Returns the number of pairing groups. * * This number is usually equal to the number of bits exposed by a single * cell, and can be used in conjunction with mtd_pairing_info_to_wunit() * to iterate over all pages of a given pair. */ int mtd_pairing_groups(struct mtd_info *mtd) { struct mtd_info *master = mtd_get_master(mtd); if (!master->pairing || !master->pairing->ngroups) return 1; return master->pairing->ngroups; } EXPORT_SYMBOL_GPL(mtd_pairing_groups); static int mtd_nvmem_reg_read(void *priv, unsigned int offset, void *val, size_t bytes) { struct mtd_info *mtd = priv; size_t retlen; int err; err = mtd_read(mtd, offset, bytes, &retlen, val); if (err && err != -EUCLEAN) return err; return retlen == bytes ? 0 : -EIO; } static int mtd_nvmem_add(struct mtd_info *mtd) { struct device_node *node = mtd_get_of_node(mtd); struct nvmem_config config = {}; config.id = NVMEM_DEVID_NONE; config.dev = &mtd->dev; config.name = dev_name(&mtd->dev); config.owner = THIS_MODULE; config.add_legacy_fixed_of_cells = of_device_is_compatible(node, "nvmem-cells"); config.reg_read = mtd_nvmem_reg_read; config.size = mtd->size; config.word_size = 1; config.stride = 1; config.read_only = true; config.root_only = true; config.ignore_wp = true; config.priv = mtd; mtd->nvmem = nvmem_register(&config); if (IS_ERR(mtd->nvmem)) { /* Just ignore if there is no NVMEM support in the kernel */ if (PTR_ERR(mtd->nvmem) == -EOPNOTSUPP) mtd->nvmem = NULL; else return dev_err_probe(&mtd->dev, PTR_ERR(mtd->nvmem), "Failed to register NVMEM device\n"); } return 0; } static void mtd_check_of_node(struct mtd_info *mtd) { struct device_node *partitions, *parent_dn, *mtd_dn = NULL; const char *pname, *prefix = "partition-"; int plen, mtd_name_len, offset, prefix_len; /* Check if MTD already has a device node */ if (mtd_get_of_node(mtd)) return; if (!mtd_is_partition(mtd)) return; parent_dn = of_node_get(mtd_get_of_node(mtd->parent)); if (!parent_dn) return; if (mtd_is_partition(mtd->parent)) partitions = of_node_get(parent_dn); else partitions = of_get_child_by_name(parent_dn, "partitions"); if (!partitions) goto exit_parent; prefix_len = strlen(prefix); mtd_name_len = strlen(mtd->name); /* Search if a partition is defined with the same name */ for_each_child_of_node(partitions, mtd_dn) { /* Skip partition with no/wrong prefix */ if (!of_node_name_prefix(mtd_dn, prefix)) continue; /* Label have priority. Check that first */ if (!of_property_read_string(mtd_dn, "label", &pname)) { offset = 0; } else { pname = mtd_dn->name; offset = prefix_len; } plen = strlen(pname) - offset; if (plen == mtd_name_len && !strncmp(mtd->name, pname + offset, plen)) { mtd_set_of_node(mtd, mtd_dn); of_node_put(mtd_dn); break; } } of_node_put(partitions); exit_parent: of_node_put(parent_dn); } /** * add_mtd_device - register an MTD device * @mtd: pointer to new MTD device info structure * * Add a device to the list of MTD devices present in the system, and * notify each currently active MTD 'user' of its arrival. Returns * zero on success or non-zero on failure. */ int add_mtd_device(struct mtd_info *mtd) { struct device_node *np = mtd_get_of_node(mtd); struct mtd_info *master = mtd_get_master(mtd); struct mtd_notifier *not; int i, error, ofidx; /* * May occur, for instance, on buggy drivers which call * mtd_device_parse_register() multiple times on the same master MTD, * especially with CONFIG_MTD_PARTITIONED_MASTER=y. */ if (WARN_ONCE(mtd->dev.type, "MTD already registered\n")) return -EEXIST; BUG_ON(mtd->writesize == 0); /* * MTD drivers should implement ->_{write,read}() or * ->_{write,read}_oob(), but not both. */ if (WARN_ON((mtd->_write && mtd->_write_oob) || (mtd->_read && mtd->_read_oob))) return -EINVAL; if (WARN_ON((!mtd->erasesize || !master->_erase) && !(mtd->flags & MTD_NO_ERASE))) return -EINVAL; /* * MTD_SLC_ON_MLC_EMULATION can only be set on partitions, when the * master is an MLC NAND and has a proper pairing scheme defined. * We also reject masters that implement ->_writev() for now, because * NAND controller drivers don't implement this hook, and adding the * SLC -> MLC address/length conversion to this path is useless if we * don't have a user. */ if (mtd->flags & MTD_SLC_ON_MLC_EMULATION && (!mtd_is_partition(mtd) || master->type != MTD_MLCNANDFLASH || !master->pairing || master->_writev)) return -EINVAL; mutex_lock(&mtd_table_mutex); ofidx = -1; if (np) ofidx = of_alias_get_id(np, "mtd"); if (ofidx >= 0) i = idr_alloc(&mtd_idr, mtd, ofidx, ofidx + 1, GFP_KERNEL); else i = idr_alloc(&mtd_idr, mtd, 0, 0, GFP_KERNEL); if (i < 0) { error = i; goto fail_locked; } mtd->index = i; kref_init(&mtd->refcnt); /* default value if not set by driver */ if (mtd->bitflip_threshold == 0) mtd->bitflip_threshold = mtd->ecc_strength; if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) { int ngroups = mtd_pairing_groups(master); mtd->erasesize /= ngroups; mtd->size = (u64)mtd_div_by_eb(mtd->size, master) * mtd->erasesize; } if (is_power_of_2(mtd->erasesize)) mtd->erasesize_shift = ffs(mtd->erasesize) - 1; else mtd->erasesize_shift = 0; if (is_power_of_2(mtd->writesize)) mtd->writesize_shift = ffs(mtd->writesize) - 1; else mtd->writesize_shift = 0; mtd->erasesize_mask = (1 << mtd->erasesize_shift) - 1; mtd->writesize_mask = (1 << mtd->writesize_shift) - 1; /* Some chips always power up locked. Unlock them now */ if ((mtd->flags & MTD_WRITEABLE) && (mtd->flags & MTD_POWERUP_LOCK)) { error = mtd_unlock(mtd, 0, mtd->size); if (error && error != -EOPNOTSUPP) printk(KERN_WARNING "%s: unlock failed, writes may not work\n", mtd->name); /* Ignore unlock failures? */ error = 0; } /* Caller should have set dev.parent to match the * physical device, if appropriate. */ mtd->dev.type = &mtd_devtype; mtd->dev.class = &mtd_class; mtd->dev.devt = MTD_DEVT(i); error = dev_set_name(&mtd->dev, "mtd%d", i); if (error) goto fail_devname; dev_set_drvdata(&mtd->dev, mtd); mtd_check_of_node(mtd); of_node_get(mtd_get_of_node(mtd)); error = device_register(&mtd->dev); if (error) { put_device(&mtd->dev); goto fail_added; } /* Add the nvmem provider */ error = mtd_nvmem_add(mtd); if (error) goto fail_nvmem_add; mtd_debugfs_populate(mtd); device_create(&mtd_class, mtd->dev.parent, MTD_DEVT(i) + 1, NULL, "mtd%dro", i); pr_debug("mtd: Giving out device %d to %s\n", i, mtd->name); /* No need to get a refcount on the module containing the notifier, since we hold the mtd_table_mutex */ list_for_each_entry(not, &mtd_notifiers, list) not->add(mtd); mutex_unlock(&mtd_table_mutex); if (of_property_read_bool(mtd_get_of_node(mtd), "linux,rootfs")) { if (IS_BUILTIN(CONFIG_MTD)) { pr_info("mtd: setting mtd%d (%s) as root device\n", mtd->index, mtd->name); ROOT_DEV = MKDEV(MTD_BLOCK_MAJOR, mtd->index); } else { pr_warn("mtd: can't set mtd%d (%s) as root device - mtd must be builtin\n", mtd->index, mtd->name); } } /* We _know_ we aren't being removed, because our caller is still holding us here. So none of this try_ nonsense, and no bitching about it either. :) */ __module_get(THIS_MODULE); return 0; fail_nvmem_add: device_unregister(&mtd->dev); fail_added: of_node_put(mtd_get_of_node(mtd)); fail_devname: idr_remove(&mtd_idr, i); fail_locked: mutex_unlock(&mtd_table_mutex); return error; } /** * del_mtd_device - unregister an MTD device * @mtd: pointer to MTD device info structure * * Remove a device from the list of MTD devices present in the system, * and notify each currently active MTD 'user' of its departure. * Returns zero on success or 1 on failure, which currently will happen * if the requested device does not appear to be present in the list. */ int del_mtd_device(struct mtd_info *mtd) { int ret; struct mtd_notifier *not; mutex_lock(&mtd_table_mutex); if (idr_find(&mtd_idr, mtd->index) != mtd) { ret = -ENODEV; goto out_error; } /* No need to get a refcount on the module containing the notifier, since we hold the mtd_table_mutex */ list_for_each_entry(not, &mtd_notifiers, list) not->remove(mtd); kref_put(&mtd->refcnt, mtd_device_release); ret = 0; out_error: mutex_unlock(&mtd_table_mutex); return ret; } /* * Set a few defaults based on the parent devices, if not provided by the * driver */ static void mtd_set_dev_defaults(struct mtd_info *mtd) { if (mtd->dev.parent) { if (!mtd->owner && mtd->dev.parent->driver) mtd->owner = mtd->dev.parent->driver->owner; if (!mtd->name) mtd->name = dev_name(mtd->dev.parent); } else { pr_debug("mtd device won't show a device symlink in sysfs\n"); } INIT_LIST_HEAD(&mtd->partitions); mutex_init(&mtd->master.partitions_lock); mutex_init(&mtd->master.chrdev_lock); } static ssize_t mtd_otp_size(struct mtd_info *mtd, bool is_user) { struct otp_info *info; ssize_t size = 0; unsigned int i; size_t retlen; int ret; info = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!info) return -ENOMEM; if (is_user) ret = mtd_get_user_prot_info(mtd, PAGE_SIZE, &retlen, info); else ret = mtd_get_fact_prot_info(mtd, PAGE_SIZE, &retlen, info); if (ret) goto err; for (i = 0; i < retlen / sizeof(*info); i++) size += info[i].length; kfree(info); return size; err: kfree(info); /* ENODATA means there is no OTP region. */ return ret == -ENODATA ? 0 : ret; } static struct nvmem_device *mtd_otp_nvmem_register(struct mtd_info *mtd, const char *compatible, int size, nvmem_reg_read_t reg_read) { struct nvmem_device *nvmem = NULL; struct nvmem_config config = {}; struct device_node *np; /* DT binding is optional */ np = of_get_compatible_child(mtd->dev.of_node, compatible); /* OTP nvmem will be registered on the physical device */ config.dev = mtd->dev.parent; config.name = compatible; config.id = NVMEM_DEVID_AUTO; config.owner = THIS_MODULE; config.add_legacy_fixed_of_cells = !mtd_type_is_nand(mtd); config.type = NVMEM_TYPE_OTP; config.root_only = true; config.ignore_wp = true; config.reg_read = reg_read; config.size = size; config.of_node = np; config.priv = mtd; nvmem = nvmem_register(&config); /* Just ignore if there is no NVMEM support in the kernel */ if (IS_ERR(nvmem) && PTR_ERR(nvmem) == -EOPNOTSUPP) nvmem = NULL; of_node_put(np); return nvmem; } static int mtd_nvmem_user_otp_reg_read(void *priv, unsigned int offset, void *val, size_t bytes) { struct mtd_info *mtd = priv; size_t retlen; int ret; ret = mtd_read_user_prot_reg(mtd, offset, bytes, &retlen, val); if (ret) return ret; return retlen == bytes ? 0 : -EIO; } static int mtd_nvmem_fact_otp_reg_read(void *priv, unsigned int offset, void *val, size_t bytes) { struct mtd_info *mtd = priv; size_t retlen; int ret; ret = mtd_read_fact_prot_reg(mtd, offset, bytes, &retlen, val); if (ret) return ret; return retlen == bytes ? 0 : -EIO; } static int mtd_otp_nvmem_add(struct mtd_info *mtd) { struct device *dev = mtd->dev.parent; struct nvmem_device *nvmem; ssize_t size; int err; if (mtd->_get_user_prot_info && mtd->_read_user_prot_reg) { size = mtd_otp_size(mtd, true); if (size < 0) { err = size; goto err; } if (size > 0) { nvmem = mtd_otp_nvmem_register(mtd, "user-otp", size, mtd_nvmem_user_otp_reg_read); if (IS_ERR(nvmem)) { err = PTR_ERR(nvmem); goto err; } mtd->otp_user_nvmem = nvmem; } } if (mtd->_get_fact_prot_info && mtd->_read_fact_prot_reg) { size = mtd_otp_size(mtd, false); if (size < 0) { err = size; goto err; } if (size > 0) { /* * The factory OTP contains thing such as a unique serial * number and is small, so let's read it out and put it * into the entropy pool. */ void *otp; otp = kmalloc(size, GFP_KERNEL); if (!otp) { err = -ENOMEM; goto err; } err = mtd_nvmem_fact_otp_reg_read(mtd, 0, otp, size); if (err < 0) { kfree(otp); goto err; } add_device_randomness(otp, err); kfree(otp); nvmem = mtd_otp_nvmem_register(mtd, "factory-otp", size, mtd_nvmem_fact_otp_reg_read); if (IS_ERR(nvmem)) { err = PTR_ERR(nvmem); goto err; } mtd->otp_factory_nvmem = nvmem; } } return 0; err: nvmem_unregister(mtd->otp_user_nvmem); /* Don't report error if OTP is not supported. */ if (err == -EOPNOTSUPP) return 0; return dev_err_probe(dev, err, "Failed to register OTP NVMEM device\n"); } /** * mtd_device_parse_register - parse partitions and register an MTD device. * * @mtd: the MTD device to register * @types: the list of MTD partition probes to try, see * 'parse_mtd_partitions()' for more information * @parser_data: MTD partition parser-specific data * @parts: fallback partition information to register, if parsing fails; * only valid if %nr_parts > %0 * @nr_parts: the number of partitions in parts, if zero then the full * MTD device is registered if no partition info is found * * This function aggregates MTD partitions parsing (done by * 'parse_mtd_partitions()') and MTD device and partitions registering. It * basically follows the most common pattern found in many MTD drivers: * * * If the MTD_PARTITIONED_MASTER option is set, then the device as a whole is * registered first. * * Then It tries to probe partitions on MTD device @mtd using parsers * specified in @types (if @types is %NULL, then the default list of parsers * is used, see 'parse_mtd_partitions()' for more information). If none are * found this functions tries to fallback to information specified in * @parts/@nr_parts. * * If no partitions were found this function just registers the MTD device * @mtd and exits. * * Returns zero in case of success and a negative error code in case of failure. */ int mtd_device_parse_register(struct mtd_info *mtd, const char * const *types, struct mtd_part_parser_data *parser_data, const struct mtd_partition *parts, int nr_parts) { int ret, err; mtd_set_dev_defaults(mtd); ret = mtd_otp_nvmem_add(mtd); if (ret) goto out; if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) { ret = add_mtd_device(mtd); if (ret) goto out; } /* Prefer parsed partitions over driver-provided fallback */ ret = parse_mtd_partitions(mtd, types, parser_data); if (ret == -EPROBE_DEFER) goto out; if (ret > 0) ret = 0; else if (nr_parts) ret = add_mtd_partitions(mtd, parts, nr_parts); else if (!device_is_registered(&mtd->dev)) ret = add_mtd_device(mtd); else ret = 0; if (ret) goto out; /* * FIXME: some drivers unfortunately call this function more than once. * So we have to check if we've already assigned the reboot notifier. * * Generally, we can make multiple calls work for most cases, but it * does cause problems with parse_mtd_partitions() above (e.g., * cmdlineparts will register partitions more than once). */ WARN_ONCE(mtd->_reboot && mtd->reboot_notifier.notifier_call, "MTD already registered\n"); if (mtd->_reboot && !mtd->reboot_notifier.notifier_call) { mtd->reboot_notifier.notifier_call = mtd_reboot_notifier; register_reboot_notifier(&mtd->reboot_notifier); } out: if (ret) { nvmem_unregister(mtd->otp_user_nvmem); nvmem_unregister(mtd->otp_factory_nvmem); } if (ret && device_is_registered(&mtd->dev)) { err = del_mtd_device(mtd); if (err) pr_err("Error when deleting MTD device (%d)\n", err); } return ret; } EXPORT_SYMBOL_GPL(mtd_device_parse_register); /** * mtd_device_unregister - unregister an existing MTD device. * * @master: the MTD device to unregister. This will unregister both the master * and any partitions if registered. */ int mtd_device_unregister(struct mtd_info *master) { int err; if (master->_reboot) { unregister_reboot_notifier(&master->reboot_notifier); memset(&master->reboot_notifier, 0, sizeof(master->reboot_notifier)); } nvmem_unregister(master->otp_user_nvmem); nvmem_unregister(master->otp_factory_nvmem); err = del_mtd_partitions(master); if (err) return err; if (!device_is_registered(&master->dev)) return 0; return del_mtd_device(master); } EXPORT_SYMBOL_GPL(mtd_device_unregister); /** * register_mtd_user - register a 'user' of MTD devices. * @new: pointer to notifier info structure * * Registers a pair of callbacks function to be called upon addition * or removal of MTD devices. Causes the 'add' callback to be immediately * invoked for each MTD device currently present in the system. */ void register_mtd_user (struct mtd_notifier *new) { struct mtd_info *mtd; mutex_lock(&mtd_table_mutex); list_add(&new->list, &mtd_notifiers); __module_get(THIS_MODULE); mtd_for_each_device(mtd) new->add(mtd); mutex_unlock(&mtd_table_mutex); } EXPORT_SYMBOL_GPL(register_mtd_user); /** * unregister_mtd_user - unregister a 'user' of MTD devices. * @old: pointer to notifier info structure * * Removes a callback function pair from the list of 'users' to be * notified upon addition or removal of MTD devices. Causes the * 'remove' callback to be immediately invoked for each MTD device * currently present in the system. */ int unregister_mtd_user (struct mtd_notifier *old) { struct mtd_info *mtd; mutex_lock(&mtd_table_mutex); module_put(THIS_MODULE); mtd_for_each_device(mtd) old->remove(mtd); list_del(&old->list); mutex_unlock(&mtd_table_mutex); return 0; } EXPORT_SYMBOL_GPL(unregister_mtd_user); /** * get_mtd_device - obtain a validated handle for an MTD device * @mtd: last known address of the required MTD device * @num: internal device number of the required MTD device * * Given a number and NULL address, return the num'th entry in the device * table, if any. Given an address and num == -1, search the device table * for a device with that address and return if it's still present. Given * both, return the num'th driver only if its address matches. Return * error code if not. */ struct mtd_info *get_mtd_device(struct mtd_info *mtd, int num) { struct mtd_info *ret = NULL, *other; int err = -ENODEV; mutex_lock(&mtd_table_mutex); if (num == -1) { mtd_for_each_device(other) { if (other == mtd) { ret = mtd; break; } } } else if (num >= 0) { ret = idr_find(&mtd_idr, num); if (mtd && mtd != ret) ret = NULL; } if (!ret) { ret = ERR_PTR(err); goto out; } err = __get_mtd_device(ret); if (err) ret = ERR_PTR(err); out: mutex_unlock(&mtd_table_mutex); return ret; } EXPORT_SYMBOL_GPL(get_mtd_device); int __get_mtd_device(struct mtd_info *mtd) { struct mtd_info *master = mtd_get_master(mtd); int err; if (master->_get_device) { err = master->_get_device(mtd); if (err) return err; } if (!try_module_get(master->owner)) { if (master->_put_device) master->_put_device(master); return -ENODEV; } while (mtd) { if (mtd != master) kref_get(&mtd->refcnt); mtd = mtd->parent; } if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) kref_get(&master->refcnt); return 0; } EXPORT_SYMBOL_GPL(__get_mtd_device); /** * of_get_mtd_device_by_node - obtain an MTD device associated with a given node * * @np: device tree node */ struct mtd_info *of_get_mtd_device_by_node(struct device_node *np) { struct mtd_info *mtd = NULL; struct mtd_info *tmp; int err; mutex_lock(&mtd_table_mutex); err = -EPROBE_DEFER; mtd_for_each_device(tmp) { if (mtd_get_of_node(tmp) == np) { mtd = tmp; err = __get_mtd_device(mtd); break; } } mutex_unlock(&mtd_table_mutex); return err ? ERR_PTR(err) : mtd; } EXPORT_SYMBOL_GPL(of_get_mtd_device_by_node); /** * get_mtd_device_nm - obtain a validated handle for an MTD device by * device name * @name: MTD device name to open * * This function returns MTD device description structure in case of * success and an error code in case of failure. */ struct mtd_info *get_mtd_device_nm(const char *name) { int err = -ENODEV; struct mtd_info *mtd = NULL, *other; mutex_lock(&mtd_table_mutex); mtd_for_each_device(other) { if (!strcmp(name, other->name)) { mtd = other; break; } } if (!mtd) goto out_unlock; err = __get_mtd_device(mtd); if (err) goto out_unlock; mutex_unlock(&mtd_table_mutex); return mtd; out_unlock: mutex_unlock(&mtd_table_mutex); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(get_mtd_device_nm); void put_mtd_device(struct mtd_info *mtd) { mutex_lock(&mtd_table_mutex); __put_mtd_device(mtd); mutex_unlock(&mtd_table_mutex); } EXPORT_SYMBOL_GPL(put_mtd_device); void __put_mtd_device(struct mtd_info *mtd) { struct mtd_info *master = mtd_get_master(mtd); while (mtd) { /* kref_put() can relese mtd, so keep a reference mtd->parent */ struct mtd_info *parent = mtd->parent; if (mtd != master) kref_put(&mtd->refcnt, mtd_device_release); mtd = parent; } if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) kref_put(&master->refcnt, mtd_device_release); module_put(master->owner); /* must be the last as master can be freed in the _put_device */ if (master->_put_device) master->_put_device(master); } EXPORT_SYMBOL_GPL(__put_mtd_device); /* * Erase is an synchronous operation. Device drivers are epected to return a * negative error code if the operation failed and update instr->fail_addr * to point the portion that was not properly erased. */ int mtd_erase(struct mtd_info *mtd, struct erase_info *instr) { struct mtd_info *master = mtd_get_master(mtd); u64 mst_ofs = mtd_get_master_ofs(mtd, 0); struct erase_info adjinstr; int ret; instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN; adjinstr = *instr; if (!mtd->erasesize || !master->_erase) return -ENOTSUPP; if (instr->addr >= mtd->size || instr->len > mtd->size - instr->addr) return -EINVAL; if (!(mtd->flags & MTD_WRITEABLE)) return -EROFS; if (!instr->len) return 0; ledtrig_mtd_activity(); if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) { adjinstr.addr = (loff_t)mtd_div_by_eb(instr->addr, mtd) * master->erasesize; adjinstr.len = ((u64)mtd_div_by_eb(instr->addr + instr->len, mtd) * master->erasesize) - adjinstr.addr; } adjinstr.addr += mst_ofs; ret = master->_erase(master, &adjinstr); if (adjinstr.fail_addr != MTD_FAIL_ADDR_UNKNOWN) { instr->fail_addr = adjinstr.fail_addr - mst_ofs; if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) { instr->fail_addr = mtd_div_by_eb(instr->fail_addr, master); instr->fail_addr *= mtd->erasesize; } } return ret; } EXPORT_SYMBOL_GPL(mtd_erase); ALLOW_ERROR_INJECTION(mtd_erase, ERRNO); /* * This stuff for eXecute-In-Place. phys is optional and may be set to NULL. */ int mtd_point(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, void **virt, resource_size_t *phys) { struct mtd_info *master = mtd_get_master(mtd); *retlen = 0; *virt = NULL; if (phys) *phys = 0; if (!master->_point) return -EOPNOTSUPP; if (from < 0 || from >= mtd->size || len > mtd->size - from) return -EINVAL; if (!len) return 0; from = mtd_get_master_ofs(mtd, from); return master->_point(master, from, len, retlen, virt, phys); } EXPORT_SYMBOL_GPL(mtd_point); /* We probably shouldn't allow XIP if the unpoint isn't a NULL */ int mtd_unpoint(struct mtd_info *mtd, loff_t from, size_t len) { struct mtd_info *master = mtd_get_master(mtd); if (!master->_unpoint) return -EOPNOTSUPP; if (from < 0 || from >= mtd->size || len > mtd->size - from) return -EINVAL; if (!len) return 0; return master->_unpoint(master, mtd_get_master_ofs(mtd, from), len); } EXPORT_SYMBOL_GPL(mtd_unpoint); /* * Allow NOMMU mmap() to directly map the device (if not NULL) * - return the address to which the offset maps * - return -ENOSYS to indicate refusal to do the mapping */ unsigned long mtd_get_unmapped_area(struct mtd_info *mtd, unsigned long len, unsigned long offset, unsigned long flags) { size_t retlen; void *virt; int ret; ret = mtd_point(mtd, offset, len, &retlen, &virt, NULL); if (ret) return ret; if (retlen != len) { mtd_unpoint(mtd, offset, retlen); return -ENOSYS; } return (unsigned long)virt; } EXPORT_SYMBOL_GPL(mtd_get_unmapped_area); static void mtd_update_ecc_stats(struct mtd_info *mtd, struct mtd_info *master, const struct mtd_ecc_stats *old_stats) { struct mtd_ecc_stats diff; if (master == mtd) return; diff = master->ecc_stats; diff.failed -= old_stats->failed; diff.corrected -= old_stats->corrected; while (mtd->parent) { mtd->ecc_stats.failed += diff.failed; mtd->ecc_stats.corrected += diff.corrected; mtd = mtd->parent; } } int mtd_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { struct mtd_oob_ops ops = { .len = len, .datbuf = buf, }; int ret; ret = mtd_read_oob(mtd, from, &ops); *retlen = ops.retlen; WARN_ON_ONCE(*retlen != len && mtd_is_bitflip_or_eccerr(ret)); return ret; } EXPORT_SYMBOL_GPL(mtd_read); ALLOW_ERROR_INJECTION(mtd_read, ERRNO); int mtd_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf) { struct mtd_oob_ops ops = { .len = len, .datbuf = (u8 *)buf, }; int ret; ret = mtd_write_oob(mtd, to, &ops); *retlen = ops.retlen; return ret; } EXPORT_SYMBOL_GPL(mtd_write); ALLOW_ERROR_INJECTION(mtd_write, ERRNO); /* * In blackbox flight recorder like scenarios we want to make successful writes * in interrupt context. panic_write() is only intended to be called when its * known the kernel is about to panic and we need the write to succeed. Since * the kernel is not going to be running for much longer, this function can * break locks and delay to ensure the write succeeds (but not sleep). */ int mtd_panic_write(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf) { struct mtd_info *master = mtd_get_master(mtd); *retlen = 0; if (!master->_panic_write) return -EOPNOTSUPP; if (to < 0 || to >= mtd->size || len > mtd->size - to) return -EINVAL; if (!(mtd->flags & MTD_WRITEABLE)) return -EROFS; if (!len) return 0; if (!master->oops_panic_write) master->oops_panic_write = true; return master->_panic_write(master, mtd_get_master_ofs(mtd, to), len, retlen, buf); } EXPORT_SYMBOL_GPL(mtd_panic_write); static int mtd_check_oob_ops(struct mtd_info *mtd, loff_t offs, struct mtd_oob_ops *ops) { /* * Some users are setting ->datbuf or ->oobbuf to NULL, but are leaving * ->len or ->ooblen uninitialized. Force ->len and ->ooblen to 0 in * this case. */ if (!ops->datbuf) ops->len = 0; if (!ops->oobbuf) ops->ooblen = 0; if (offs < 0 || offs + ops->len > mtd->size) return -EINVAL; if (ops->ooblen) { size_t maxooblen; if (ops->ooboffs >= mtd_oobavail(mtd, ops)) return -EINVAL; maxooblen = ((size_t)(mtd_div_by_ws(mtd->size, mtd) - mtd_div_by_ws(offs, mtd)) * mtd_oobavail(mtd, ops)) - ops->ooboffs; if (ops->ooblen > maxooblen) return -EINVAL; } return 0; } static int mtd_read_oob_std(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops) { struct mtd_info *master = mtd_get_master(mtd); int ret; from = mtd_get_master_ofs(mtd, from); if (master->_read_oob) ret = master->_read_oob(master, from, ops); else ret = master->_read(master, from, ops->len, &ops->retlen, ops->datbuf); return ret; } static int mtd_write_oob_std(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops) { struct mtd_info *master = mtd_get_master(mtd); int ret; to = mtd_get_master_ofs(mtd, to); if (master->_write_oob) ret = master->_write_oob(master, to, ops); else ret = master->_write(master, to, ops->len, &ops->retlen, ops->datbuf); return ret; } static int mtd_io_emulated_slc(struct mtd_info *mtd, loff_t start, bool read, struct mtd_oob_ops *ops) { struct mtd_info *master = mtd_get_master(mtd); int ngroups = mtd_pairing_groups(master); int npairs = mtd_wunit_per_eb(master) / ngroups; struct mtd_oob_ops adjops = *ops; unsigned int wunit, oobavail; struct mtd_pairing_info info; int max_bitflips = 0; u32 ebofs, pageofs; loff_t base, pos; ebofs = mtd_mod_by_eb(start, mtd); base = (loff_t)mtd_div_by_eb(start, mtd) * master->erasesize; info.group = 0; info.pair = mtd_div_by_ws(ebofs, mtd); pageofs = mtd_mod_by_ws(ebofs, mtd); oobavail = mtd_oobavail(mtd, ops); while (ops->retlen < ops->len || ops->oobretlen < ops->ooblen) { int ret; if (info.pair >= npairs) { info.pair = 0; base += master->erasesize; } wunit = mtd_pairing_info_to_wunit(master, &info); pos = mtd_wunit_to_offset(mtd, base, wunit); adjops.len = ops->len - ops->retlen; if (adjops.len > mtd->writesize - pageofs) adjops.len = mtd->writesize - pageofs; adjops.ooblen = ops->ooblen - ops->oobretlen; if (adjops.ooblen > oobavail - adjops.ooboffs) adjops.ooblen = oobavail - adjops.ooboffs; if (read) { ret = mtd_read_oob_std(mtd, pos + pageofs, &adjops); if (ret > 0) max_bitflips = max(max_bitflips, ret); } else { ret = mtd_write_oob_std(mtd, pos + pageofs, &adjops); } if (ret < 0) return ret; max_bitflips = max(max_bitflips, ret); ops->retlen += adjops.retlen; ops->oobretlen += adjops.oobretlen; adjops.datbuf += adjops.retlen; adjops.oobbuf += adjops.oobretlen; adjops.ooboffs = 0; pageofs = 0; info.pair++; } return max_bitflips; } int mtd_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops) { struct mtd_info *master = mtd_get_master(mtd); struct mtd_ecc_stats old_stats = master->ecc_stats; int ret_code; ops->retlen = ops->oobretlen = 0; ret_code = mtd_check_oob_ops(mtd, from, ops); if (ret_code) return ret_code; ledtrig_mtd_activity(); /* Check the validity of a potential fallback on mtd->_read */ if (!master->_read_oob && (!master->_read || ops->oobbuf)) return -EOPNOTSUPP; if (ops->stats) memset(ops->stats, 0, sizeof(*ops->stats)); if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) ret_code = mtd_io_emulated_slc(mtd, from, true, ops); else ret_code = mtd_read_oob_std(mtd, from, ops); mtd_update_ecc_stats(mtd, master, &old_stats); /* * In cases where ops->datbuf != NULL, mtd->_read_oob() has semantics * similar to mtd->_read(), returning a non-negative integer * representing max bitflips. In other cases, mtd->_read_oob() may * return -EUCLEAN. In all cases, perform similar logic to mtd_read(). */ if (unlikely(ret_code < 0)) return ret_code; if (mtd->ecc_strength == 0) return 0; /* device lacks ecc */ if (ops->stats) ops->stats->max_bitflips = ret_code; return ret_code >= mtd->bitflip_threshold ? -EUCLEAN : 0; } EXPORT_SYMBOL_GPL(mtd_read_oob); int mtd_write_oob(struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops) { struct mtd_info *master = mtd_get_master(mtd); int ret; ops->retlen = ops->oobretlen = 0; if (!(mtd->flags & MTD_WRITEABLE)) return -EROFS; ret = mtd_check_oob_ops(mtd, to, ops); if (ret) return ret; ledtrig_mtd_activity(); /* Check the validity of a potential fallback on mtd->_write */ if (!master->_write_oob && (!master->_write || ops->oobbuf)) return -EOPNOTSUPP; if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) return mtd_io_emulated_slc(mtd, to, false, ops); return mtd_write_oob_std(mtd, to, ops); } EXPORT_SYMBOL_GPL(mtd_write_oob); /** * mtd_ooblayout_ecc - Get the OOB region definition of a specific ECC section * @mtd: MTD device structure * @section: ECC section. Depending on the layout you may have all the ECC * bytes stored in a single contiguous section, or one section * per ECC chunk (and sometime several sections for a single ECC * ECC chunk) * @oobecc: OOB region struct filled with the appropriate ECC position * information * * This function returns ECC section information in the OOB area. If you want * to get all the ECC bytes information, then you should call * mtd_ooblayout_ecc(mtd, section++, oobecc) until it returns -ERANGE. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_ecc(struct mtd_info *mtd, int section, struct mtd_oob_region *oobecc) { struct mtd_info *master = mtd_get_master(mtd); memset(oobecc, 0, sizeof(*oobecc)); if (!master || section < 0) return -EINVAL; if (!master->ooblayout || !master->ooblayout->ecc) return -ENOTSUPP; return master->ooblayout->ecc(master, section, oobecc); } EXPORT_SYMBOL_GPL(mtd_ooblayout_ecc); /** * mtd_ooblayout_free - Get the OOB region definition of a specific free * section * @mtd: MTD device structure * @section: Free section you are interested in. Depending on the layout * you may have all the free bytes stored in a single contiguous * section, or one section per ECC chunk plus an extra section * for the remaining bytes (or other funky layout). * @oobfree: OOB region struct filled with the appropriate free position * information * * This function returns free bytes position in the OOB area. If you want * to get all the free bytes information, then you should call * mtd_ooblayout_free(mtd, section++, oobfree) until it returns -ERANGE. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_free(struct mtd_info *mtd, int section, struct mtd_oob_region *oobfree) { struct mtd_info *master = mtd_get_master(mtd); memset(oobfree, 0, sizeof(*oobfree)); if (!master || section < 0) return -EINVAL; if (!master->ooblayout || !master->ooblayout->free) return -ENOTSUPP; return master->ooblayout->free(master, section, oobfree); } EXPORT_SYMBOL_GPL(mtd_ooblayout_free); /** * mtd_ooblayout_find_region - Find the region attached to a specific byte * @mtd: mtd info structure * @byte: the byte we are searching for * @sectionp: pointer where the section id will be stored * @oobregion: used to retrieve the ECC position * @iter: iterator function. Should be either mtd_ooblayout_free or * mtd_ooblayout_ecc depending on the region type you're searching for * * This function returns the section id and oobregion information of a * specific byte. For example, say you want to know where the 4th ECC byte is * stored, you'll use: * * mtd_ooblayout_find_region(mtd, 3, &section, &oobregion, mtd_ooblayout_ecc); * * Returns zero on success, a negative error code otherwise. */ static int mtd_ooblayout_find_region(struct mtd_info *mtd, int byte, int *sectionp, struct mtd_oob_region *oobregion, int (*iter)(struct mtd_info *, int section, struct mtd_oob_region *oobregion)) { int pos = 0, ret, section = 0; memset(oobregion, 0, sizeof(*oobregion)); while (1) { ret = iter(mtd, section, oobregion); if (ret) return ret; if (pos + oobregion->length > byte) break; pos += oobregion->length; section++; } /* * Adjust region info to make it start at the beginning at the * 'start' ECC byte. */ oobregion->offset += byte - pos; oobregion->length -= byte - pos; *sectionp = section; return 0; } /** * mtd_ooblayout_find_eccregion - Find the ECC region attached to a specific * ECC byte * @mtd: mtd info structure * @eccbyte: the byte we are searching for * @section: pointer where the section id will be stored * @oobregion: OOB region information * * Works like mtd_ooblayout_find_region() except it searches for a specific ECC * byte. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_find_eccregion(struct mtd_info *mtd, int eccbyte, int *section, struct mtd_oob_region *oobregion) { return mtd_ooblayout_find_region(mtd, eccbyte, section, oobregion, mtd_ooblayout_ecc); } EXPORT_SYMBOL_GPL(mtd_ooblayout_find_eccregion); /** * mtd_ooblayout_get_bytes - Extract OOB bytes from the oob buffer * @mtd: mtd info structure * @buf: destination buffer to store OOB bytes * @oobbuf: OOB buffer * @start: first byte to retrieve * @nbytes: number of bytes to retrieve * @iter: section iterator * * Extract bytes attached to a specific category (ECC or free) * from the OOB buffer and copy them into buf. * * Returns zero on success, a negative error code otherwise. */ static int mtd_ooblayout_get_bytes(struct mtd_info *mtd, u8 *buf, const u8 *oobbuf, int start, int nbytes, int (*iter)(struct mtd_info *, int section, struct mtd_oob_region *oobregion)) { struct mtd_oob_region oobregion; int section, ret; ret = mtd_ooblayout_find_region(mtd, start, &section, &oobregion, iter); while (!ret) { int cnt; cnt = min_t(int, nbytes, oobregion.length); memcpy(buf, oobbuf + oobregion.offset, cnt); buf += cnt; nbytes -= cnt; if (!nbytes) break; ret = iter(mtd, ++section, &oobregion); } return ret; } /** * mtd_ooblayout_set_bytes - put OOB bytes into the oob buffer * @mtd: mtd info structure * @buf: source buffer to get OOB bytes from * @oobbuf: OOB buffer * @start: first OOB byte to set * @nbytes: number of OOB bytes to set * @iter: section iterator * * Fill the OOB buffer with data provided in buf. The category (ECC or free) * is selected by passing the appropriate iterator. * * Returns zero on success, a negative error code otherwise. */ static int mtd_ooblayout_set_bytes(struct mtd_info *mtd, const u8 *buf, u8 *oobbuf, int start, int nbytes, int (*iter)(struct mtd_info *, int section, struct mtd_oob_region *oobregion)) { struct mtd_oob_region oobregion; int section, ret; ret = mtd_ooblayout_find_region(mtd, start, &section, &oobregion, iter); while (!ret) { int cnt; cnt = min_t(int, nbytes, oobregion.length); memcpy(oobbuf + oobregion.offset, buf, cnt); buf += cnt; nbytes -= cnt; if (!nbytes) break; ret = iter(mtd, ++section, &oobregion); } return ret; } /** * mtd_ooblayout_count_bytes - count the number of bytes in a OOB category * @mtd: mtd info structure * @iter: category iterator * * Count the number of bytes in a given category. * * Returns a positive value on success, a negative error code otherwise. */ static int mtd_ooblayout_count_bytes(struct mtd_info *mtd, int (*iter)(struct mtd_info *, int section, struct mtd_oob_region *oobregion)) { struct mtd_oob_region oobregion; int section = 0, ret, nbytes = 0; while (1) { ret = iter(mtd, section++, &oobregion); if (ret) { if (ret == -ERANGE) ret = nbytes; break; } nbytes += oobregion.length; } return ret; } /** * mtd_ooblayout_get_eccbytes - extract ECC bytes from the oob buffer * @mtd: mtd info structure * @eccbuf: destination buffer to store ECC bytes * @oobbuf: OOB buffer * @start: first ECC byte to retrieve * @nbytes: number of ECC bytes to retrieve * * Works like mtd_ooblayout_get_bytes(), except it acts on ECC bytes. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_get_eccbytes(struct mtd_info *mtd, u8 *eccbuf, const u8 *oobbuf, int start, int nbytes) { return mtd_ooblayout_get_bytes(mtd, eccbuf, oobbuf, start, nbytes, mtd_ooblayout_ecc); } EXPORT_SYMBOL_GPL(mtd_ooblayout_get_eccbytes); /** * mtd_ooblayout_set_eccbytes - set ECC bytes into the oob buffer * @mtd: mtd info structure * @eccbuf: source buffer to get ECC bytes from * @oobbuf: OOB buffer * @start: first ECC byte to set * @nbytes: number of ECC bytes to set * * Works like mtd_ooblayout_set_bytes(), except it acts on ECC bytes. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_set_eccbytes(struct mtd_info *mtd, const u8 *eccbuf, u8 *oobbuf, int start, int nbytes) { return mtd_ooblayout_set_bytes(mtd, eccbuf, oobbuf, start, nbytes, mtd_ooblayout_ecc); } EXPORT_SYMBOL_GPL(mtd_ooblayout_set_eccbytes); /** * mtd_ooblayout_get_databytes - extract data bytes from the oob buffer * @mtd: mtd info structure * @databuf: destination buffer to store ECC bytes * @oobbuf: OOB buffer * @start: first ECC byte to retrieve * @nbytes: number of ECC bytes to retrieve * * Works like mtd_ooblayout_get_bytes(), except it acts on free bytes. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_get_databytes(struct mtd_info *mtd, u8 *databuf, const u8 *oobbuf, int start, int nbytes) { return mtd_ooblayout_get_bytes(mtd, databuf, oobbuf, start, nbytes, mtd_ooblayout_free); } EXPORT_SYMBOL_GPL(mtd_ooblayout_get_databytes); /** * mtd_ooblayout_set_databytes - set data bytes into the oob buffer * @mtd: mtd info structure * @databuf: source buffer to get data bytes from * @oobbuf: OOB buffer * @start: first ECC byte to set * @nbytes: number of ECC bytes to set * * Works like mtd_ooblayout_set_bytes(), except it acts on free bytes. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_set_databytes(struct mtd_info *mtd, const u8 *databuf, u8 *oobbuf, int start, int nbytes) { return mtd_ooblayout_set_bytes(mtd, databuf, oobbuf, start, nbytes, mtd_ooblayout_free); } EXPORT_SYMBOL_GPL(mtd_ooblayout_set_databytes); /** * mtd_ooblayout_count_freebytes - count the number of free bytes in OOB * @mtd: mtd info structure * * Works like mtd_ooblayout_count_bytes(), except it count free bytes. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_count_freebytes(struct mtd_info *mtd) { return mtd_ooblayout_count_bytes(mtd, mtd_ooblayout_free); } EXPORT_SYMBOL_GPL(mtd_ooblayout_count_freebytes); /** * mtd_ooblayout_count_eccbytes - count the number of ECC bytes in OOB * @mtd: mtd info structure * * Works like mtd_ooblayout_count_bytes(), except it count ECC bytes. * * Returns zero on success, a negative error code otherwise. */ int mtd_ooblayout_count_eccbytes(struct mtd_info *mtd) { return mtd_ooblayout_count_bytes(mtd, mtd_ooblayout_ecc); } EXPORT_SYMBOL_GPL(mtd_ooblayout_count_eccbytes); /* * Method to access the protection register area, present in some flash * devices. The user data is one time programmable but the factory data is read * only. */ int mtd_get_fact_prot_info(struct mtd_info *mtd, size_t len, size_t *retlen, struct otp_info *buf) { struct mtd_info *master = mtd_get_master(mtd); if (!master->_get_fact_prot_info) return -EOPNOTSUPP; if (!len) return 0; return master->_get_fact_prot_info(master, len, retlen, buf); } EXPORT_SYMBOL_GPL(mtd_get_fact_prot_info); int mtd_read_fact_prot_reg(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { struct mtd_info *master = mtd_get_master(mtd); *retlen = 0; if (!master->_read_fact_prot_reg) return -EOPNOTSUPP; if (!len) return 0; return master->_read_fact_prot_reg(master, from, len, retlen, buf); } EXPORT_SYMBOL_GPL(mtd_read_fact_prot_reg); int mtd_get_user_prot_info(struct mtd_info *mtd, size_t len, size_t *retlen, struct otp_info *buf) { struct mtd_info *master = mtd_get_master(mtd); if (!master->_get_user_prot_info) return -EOPNOTSUPP; if (!len) return 0; return master->_get_user_prot_info(master, len, retlen, buf); } EXPORT_SYMBOL_GPL(mtd_get_user_prot_info); int mtd_read_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { struct mtd_info *master = mtd_get_master(mtd); *retlen = 0; if (!master->_read_user_prot_reg) return -EOPNOTSUPP; if (!len) return 0; return master->_read_user_prot_reg(master, from, len, retlen, buf); } EXPORT_SYMBOL_GPL(mtd_read_user_prot_reg); int mtd_write_user_prot_reg(struct mtd_info *mtd, loff_t to, size_t len, size_t *retlen, const u_char *buf) { struct mtd_info *master = mtd_get_master(mtd); int ret; *retlen = 0; if (!master->_write_user_prot_reg) return -EOPNOTSUPP; if (!len) return 0; ret = master->_write_user_prot_reg(master, to, len, retlen, buf); if (ret) return ret; /* * If no data could be written at all, we are out of memory and * must return -ENOSPC. */ return (*retlen) ? 0 : -ENOSPC; } EXPORT_SYMBOL_GPL(mtd_write_user_prot_reg); int mtd_lock_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len) { struct mtd_info *master = mtd_get_master(mtd); if (!master->_lock_user_prot_reg) return -EOPNOTSUPP; if (!len) return 0; return master->_lock_user_prot_reg(master, from, len); } EXPORT_SYMBOL_GPL(mtd_lock_user_prot_reg); int mtd_erase_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len) { struct mtd_info *master = mtd_get_master(mtd); if (!master->_erase_user_prot_reg) return -EOPNOTSUPP; if (!len) return 0; return master->_erase_user_prot_reg(master, from, len); } EXPORT_SYMBOL_GPL(mtd_erase_user_prot_reg); /* Chip-supported device locking */ int mtd_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len) { struct mtd_info *master = mtd_get_master(mtd); if (!master->_lock) return -EOPNOTSUPP; if (ofs < 0 || ofs >= mtd->size || len > mtd->size - ofs) return -EINVAL; if (!len) return 0; if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) { ofs = (loff_t)mtd_div_by_eb(ofs, mtd) * master->erasesize; len = (u64)mtd_div_by_eb(len, mtd) * master->erasesize; } return master->_lock(master, mtd_get_master_ofs(mtd, ofs), len); } EXPORT_SYMBOL_GPL(mtd_lock); int mtd_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) { struct mtd_info *master = mtd_get_master(mtd); if (!master->_unlock) return -EOPNOTSUPP; if (ofs < 0 || ofs >= mtd->size || len > mtd->size - ofs) return -EINVAL; if (!len) return 0; if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) { ofs = (loff_t)mtd_div_by_eb(ofs, mtd) * master->erasesize; len = (u64)mtd_div_by_eb(len, mtd) * master->erasesize; } return master->_unlock(master, mtd_get_master_ofs(mtd, ofs), len); } EXPORT_SYMBOL_GPL(mtd_unlock); int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len) { struct mtd_info *master = mtd_get_master(mtd); if (!master->_is_locked) return -EOPNOTSUPP; if (ofs < 0 || ofs >= mtd->size || len > mtd->size - ofs) return -EINVAL; if (!len) return 0; if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) { ofs = (loff_t)mtd_div_by_eb(ofs, mtd) * master->erasesize; len = (u64)mtd_div_by_eb(len, mtd) * master->erasesize; } return master->_is_locked(master, mtd_get_master_ofs(mtd, ofs), len); } EXPORT_SYMBOL_GPL(mtd_is_locked); int mtd_block_isreserved(struct mtd_info *mtd, loff_t ofs) { struct mtd_info *master = mtd_get_master(mtd); if (ofs < 0 || ofs >= mtd->size) return -EINVAL; if (!master->_block_isreserved) return 0; if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) ofs = (loff_t)mtd_div_by_eb(ofs, mtd) * master->erasesize; return master->_block_isreserved(master, mtd_get_master_ofs(mtd, ofs)); } EXPORT_SYMBOL_GPL(mtd_block_isreserved); int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs) { struct mtd_info *master = mtd_get_master(mtd); if (ofs < 0 || ofs >= mtd->size) return -EINVAL; if (!master->_block_isbad) return 0; if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) ofs = (loff_t)mtd_div_by_eb(ofs, mtd) * master->erasesize; return master->_block_isbad(master, mtd_get_master_ofs(mtd, ofs)); } EXPORT_SYMBOL_GPL(mtd_block_isbad); int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs) { struct mtd_info *master = mtd_get_master(mtd); loff_t moffs; int ret; if (!master->_block_markbad) return -EOPNOTSUPP; if (ofs < 0 || ofs >= mtd->size) return -EINVAL; if (!(mtd->flags & MTD_WRITEABLE)) return -EROFS; if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) ofs = (loff_t)mtd_div_by_eb(ofs, mtd) * master->erasesize; moffs = mtd_get_master_ofs(mtd, ofs); if (master->_block_isbad) { ret = master->_block_isbad(master, moffs); if (ret > 0) return 0; } ret = master->_block_markbad(master, moffs); if (ret) return ret; while (mtd->parent) { mtd->ecc_stats.badblocks++; mtd = mtd->parent; } return 0; } EXPORT_SYMBOL_GPL(mtd_block_markbad); ALLOW_ERROR_INJECTION(mtd_block_markbad, ERRNO); /* * default_mtd_writev - the default writev method * @mtd: mtd device description object pointer * @vecs: the vectors to write * @count: count of vectors in @vecs * @to: the MTD device offset to write to * @retlen: on exit contains the count of bytes written to the MTD device. * * This function returns zero in case of success and a negative error code in * case of failure. */ static int default_mtd_writev(struct mtd_info *mtd, const struct kvec *vecs, unsigned long count, loff_t to, size_t *retlen) { unsigned long i; size_t totlen = 0, thislen; int ret = 0; for (i = 0; i < count; i++) { if (!vecs[i].iov_len) continue; ret = mtd_write(mtd, to, vecs[i].iov_len, &thislen, vecs[i].iov_base); totlen += thislen; if (ret || thislen != vecs[i].iov_len) break; to += vecs[i].iov_len; } *retlen = totlen; return ret; } /* * mtd_writev - the vector-based MTD write method * @mtd: mtd device description object pointer * @vecs: the vectors to write * @count: count of vectors in @vecs * @to: the MTD device offset to write to * @retlen: on exit contains the count of bytes written to the MTD device. * * This function returns zero in case of success and a negative error code in * case of failure. */ int mtd_writev(struct mtd_info *mtd, const struct kvec *vecs, unsigned long count, loff_t to, size_t *retlen) { struct mtd_info *master = mtd_get_master(mtd); *retlen = 0; if (!(mtd->flags & MTD_WRITEABLE)) return -EROFS; if (!master->_writev) return default_mtd_writev(mtd, vecs, count, to, retlen); return master->_writev(master, vecs, count, mtd_get_master_ofs(mtd, to), retlen); } EXPORT_SYMBOL_GPL(mtd_writev); /** * mtd_kmalloc_up_to - allocate a contiguous buffer up to the specified size * @mtd: mtd device description object pointer * @size: a pointer to the ideal or maximum size of the allocation, points * to the actual allocation size on success. * * This routine attempts to allocate a contiguous kernel buffer up to * the specified size, backing off the size of the request exponentially * until the request succeeds or until the allocation size falls below * the system page size. This attempts to make sure it does not adversely * impact system performance, so when allocating more than one page, we * ask the memory allocator to avoid re-trying, swapping, writing back * or performing I/O. * * Note, this function also makes sure that the allocated buffer is aligned to * the MTD device's min. I/O unit, i.e. the "mtd->writesize" value. * * This is called, for example by mtd_{read,write} and jffs2_scan_medium, * to handle smaller (i.e. degraded) buffer allocations under low- or * fragmented-memory situations where such reduced allocations, from a * requested ideal, are allowed. * * Returns a pointer to the allocated buffer on success; otherwise, NULL. */ void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size) { gfp_t flags = __GFP_NOWARN | __GFP_DIRECT_RECLAIM | __GFP_NORETRY; size_t min_alloc = max_t(size_t, mtd->writesize, PAGE_SIZE); void *kbuf; *size = min_t(size_t, *size, KMALLOC_MAX_SIZE); while (*size > min_alloc) { kbuf = kmalloc(*size, flags); if (kbuf) return kbuf; *size >>= 1; *size = ALIGN(*size, mtd->writesize); } /* * For the last resort allocation allow 'kmalloc()' to do all sorts of * things (write-back, dropping caches, etc) by using GFP_KERNEL. */ return kmalloc(*size, GFP_KERNEL); } EXPORT_SYMBOL_GPL(mtd_kmalloc_up_to); #ifdef CONFIG_PROC_FS /*====================================================================*/ /* Support for /proc/mtd */ static int mtd_proc_show(struct seq_file *m, void *v) { struct mtd_info *mtd; seq_puts(m, "dev: size erasesize name\n"); mutex_lock(&mtd_table_mutex); mtd_for_each_device(mtd) { seq_printf(m, "mtd%d: %8.8llx %8.8x \"%s\"\n", mtd->index, (unsigned long long)mtd->size, mtd->erasesize, mtd->name); } mutex_unlock(&mtd_table_mutex); return 0; } #endif /* CONFIG_PROC_FS */ /*====================================================================*/ /* Init code */ static struct backing_dev_info * __init mtd_bdi_init(const char *name) { struct backing_dev_info *bdi; int ret; bdi = bdi_alloc(NUMA_NO_NODE); if (!bdi) return ERR_PTR(-ENOMEM); bdi->ra_pages = 0; bdi->io_pages = 0; /* * We put '-0' suffix to the name to get the same name format as we * used to get. Since this is called only once, we get a unique name. */ ret = bdi_register(bdi, "%.28s-0", name); if (ret) bdi_put(bdi); return ret ? ERR_PTR(ret) : bdi; } static struct proc_dir_entry *proc_mtd; static int __init init_mtd(void) { int ret; ret = class_register(&mtd_class); if (ret) goto err_reg; mtd_bdi = mtd_bdi_init("mtd"); if (IS_ERR(mtd_bdi)) { ret = PTR_ERR(mtd_bdi); goto err_bdi; } proc_mtd = proc_create_single("mtd", 0, NULL, mtd_proc_show); ret = init_mtdchar(); if (ret) goto out_procfs; dfs_dir_mtd = debugfs_create_dir("mtd", NULL); debugfs_create_bool("expert_analysis_mode", 0600, dfs_dir_mtd, &mtd_expert_analysis_mode); return 0; out_procfs: if (proc_mtd) remove_proc_entry("mtd", NULL); bdi_unregister(mtd_bdi); bdi_put(mtd_bdi); err_bdi: class_unregister(&mtd_class); err_reg: pr_err("Error registering mtd class or bdi: %d\n", ret); return ret; } static void __exit cleanup_mtd(void) { debugfs_remove_recursive(dfs_dir_mtd); cleanup_mtdchar(); if (proc_mtd) remove_proc_entry("mtd", NULL); class_unregister(&mtd_class); bdi_unregister(mtd_bdi); bdi_put(mtd_bdi); idr_destroy(&mtd_idr); } module_init(init_mtd); module_exit(cleanup_mtd); MODULE_LICENSE("GPL"); MODULE_AUTHOR("David Woodhouse <dwmw2@infradead.org>"); MODULE_DESCRIPTION("Core MTD registration and access routines");
6 44 45 175 218 49 8 42 49 32 492 2 2 330 330 4 23 4 19 23 259 126 133 773 772 382 404 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * include/net/l3mdev.h - L3 master device API * Copyright (c) 2015 Cumulus Networks * Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com> */ #ifndef _NET_L3MDEV_H_ #define _NET_L3MDEV_H_ #include <net/dst.h> #include <net/fib_rules.h> enum l3mdev_type { L3MDEV_TYPE_UNSPEC, L3MDEV_TYPE_VRF, __L3MDEV_TYPE_MAX }; #define L3MDEV_TYPE_MAX (__L3MDEV_TYPE_MAX - 1) typedef int (*lookup_by_table_id_t)(struct net *net, u32 table_d); /** * struct l3mdev_ops - l3mdev operations * * @l3mdev_fib_table: Get FIB table id to use for lookups * * @l3mdev_l3_rcv: Hook in L3 receive path * * @l3mdev_l3_out: Hook in L3 output path * * @l3mdev_link_scope_lookup: IPv6 lookup for linklocal and mcast destinations */ struct l3mdev_ops { u32 (*l3mdev_fib_table)(const struct net_device *dev); struct sk_buff * (*l3mdev_l3_rcv)(struct net_device *dev, struct sk_buff *skb, u16 proto); struct sk_buff * (*l3mdev_l3_out)(struct net_device *dev, struct sock *sk, struct sk_buff *skb, u16 proto); /* IPv6 ops */ struct dst_entry * (*l3mdev_link_scope_lookup)(const struct net_device *dev, struct flowi6 *fl6); }; #ifdef CONFIG_NET_L3_MASTER_DEV int l3mdev_table_lookup_register(enum l3mdev_type l3type, lookup_by_table_id_t fn); void l3mdev_table_lookup_unregister(enum l3mdev_type l3type, lookup_by_table_id_t fn); int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, struct net *net, u32 table_id); int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg); static inline bool l3mdev_fib_rule_iif_match(const struct flowi *fl, int iifindex) { return !(fl->flowi_flags & FLOWI_FLAG_L3MDEV_OIF) && fl->flowi_l3mdev == iifindex; } static inline bool l3mdev_fib_rule_oif_match(const struct flowi *fl, int oifindex) { return fl->flowi_flags & FLOWI_FLAG_L3MDEV_OIF && fl->flowi_l3mdev == oifindex; } void l3mdev_update_flow(struct net *net, struct flowi *fl); int l3mdev_master_ifindex_rcu(const struct net_device *dev); static inline int l3mdev_master_ifindex(struct net_device *dev) { int ifindex; rcu_read_lock(); ifindex = l3mdev_master_ifindex_rcu(dev); rcu_read_unlock(); return ifindex; } static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) { struct net_device *dev; int rc = 0; if (ifindex) { rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifindex); if (dev) rc = l3mdev_master_ifindex_rcu(dev); rcu_read_unlock(); } return rc; } static inline struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) { /* netdev_master_upper_dev_get_rcu calls * list_first_or_null_rcu to walk the upper dev list. * list_first_or_null_rcu does not handle a const arg. We aren't * making changes, just want the master device from that list so * typecast to remove the const */ struct net_device *dev = (struct net_device *)_dev; struct net_device *master; if (!dev) return NULL; if (netif_is_l3_master(dev)) master = dev; else if (netif_is_l3_slave(dev)) master = netdev_master_upper_dev_get_rcu(dev); else master = NULL; return master; } int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex); static inline int l3mdev_master_upper_ifindex_by_index(struct net *net, int ifindex) { rcu_read_lock(); ifindex = l3mdev_master_upper_ifindex_by_index_rcu(net, ifindex); rcu_read_unlock(); return ifindex; } u32 l3mdev_fib_table_rcu(const struct net_device *dev); u32 l3mdev_fib_table_by_index(struct net *net, int ifindex); static inline u32 l3mdev_fib_table(const struct net_device *dev) { u32 tb_id; rcu_read_lock(); tb_id = l3mdev_fib_table_rcu(dev); rcu_read_unlock(); return tb_id; } static inline bool netif_index_is_l3_master(struct net *net, int ifindex) { struct net_device *dev; bool rc = false; if (ifindex == 0) return false; rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifindex); if (dev) rc = netif_is_l3_master(dev); rcu_read_unlock(); return rc; } struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6); static inline struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto) { struct net_device *master = NULL; if (netif_is_l3_slave(skb->dev)) master = netdev_master_upper_dev_get_rcu(skb->dev); else if (netif_is_l3_master(skb->dev) || netif_has_l3_rx_handler(skb->dev)) master = skb->dev; if (master && master->l3mdev_ops->l3mdev_l3_rcv) skb = master->l3mdev_ops->l3mdev_l3_rcv(master, skb, proto); return skb; } static inline struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb) { return l3mdev_l3_rcv(skb, AF_INET); } static inline struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb) { return l3mdev_l3_rcv(skb, AF_INET6); } static inline struct sk_buff *l3mdev_l3_out(struct sock *sk, struct sk_buff *skb, u16 proto) { struct net_device *dev = skb_dst(skb)->dev; if (netif_is_l3_slave(dev)) { struct net_device *master; rcu_read_lock(); master = netdev_master_upper_dev_get_rcu(dev); if (master && master->l3mdev_ops->l3mdev_l3_out) skb = master->l3mdev_ops->l3mdev_l3_out(master, sk, skb, proto); rcu_read_unlock(); } return skb; } static inline struct sk_buff *l3mdev_ip_out(struct sock *sk, struct sk_buff *skb) { return l3mdev_l3_out(sk, skb, AF_INET); } static inline struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb) { return l3mdev_l3_out(sk, skb, AF_INET6); } #else static inline int l3mdev_master_ifindex_rcu(const struct net_device *dev) { return 0; } static inline int l3mdev_master_ifindex(struct net_device *dev) { return 0; } static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) { return 0; } static inline int l3mdev_master_upper_ifindex_by_index_rcu(struct net *net, int ifindex) { return 0; } static inline int l3mdev_master_upper_ifindex_by_index(struct net *net, int ifindex) { return 0; } static inline struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev) { return NULL; } static inline u32 l3mdev_fib_table_rcu(const struct net_device *dev) { return 0; } static inline u32 l3mdev_fib_table(const struct net_device *dev) { return 0; } static inline u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) { return 0; } static inline bool netif_index_is_l3_master(struct net *net, int ifindex) { return false; } static inline struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6) { return NULL; } static inline struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb) { return skb; } static inline struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb) { return skb; } static inline struct sk_buff *l3mdev_ip_out(struct sock *sk, struct sk_buff *skb) { return skb; } static inline struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb) { return skb; } static inline int l3mdev_table_lookup_register(enum l3mdev_type l3type, lookup_by_table_id_t fn) { return -EOPNOTSUPP; } static inline void l3mdev_table_lookup_unregister(enum l3mdev_type l3type, lookup_by_table_id_t fn) { } static inline int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, struct net *net, u32 table_id) { return -ENODEV; } static inline int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg) { return 1; } static inline bool l3mdev_fib_rule_iif_match(const struct flowi *fl, int iifindex) { return false; } static inline bool l3mdev_fib_rule_oif_match(const struct flowi *fl, int oifindex) { return false; } static inline void l3mdev_update_flow(struct net *net, struct flowi *fl) { } #endif #endif /* _NET_L3MDEV_H_ */
11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com * Written by Alex Tomas <alex@clusterfs.com> */ #ifndef _EXT4_EXTENTS #define _EXT4_EXTENTS #include "ext4.h" /* * With AGGRESSIVE_TEST defined, the capacity of index/leaf blocks * becomes very small, so index split, in-depth growing and * other hard changes happen much more often. * This is for debug purposes only. */ #define AGGRESSIVE_TEST_ /* * With EXTENTS_STATS defined, the number of blocks and extents * are collected in the truncate path. They'll be shown at * umount time. */ #define EXTENTS_STATS__ /* * If CHECK_BINSEARCH is defined, then the results of the binary search * will also be checked by linear search. */ #define CHECK_BINSEARCH__ /* * ext4_inode has i_block array (60 bytes total). * The first 12 bytes store ext4_extent_header; * the remainder stores an array of ext4_extent. * For non-inode extent blocks, ext4_extent_tail * follows the array. */ /* * This is the extent tail on-disk structure. * All other extent structures are 12 bytes long. It turns out that * block_size % 12 >= 4 for at least all powers of 2 greater than 512, which * covers all valid ext4 block sizes. Therefore, this tail structure can be * crammed into the end of the block without having to rebalance the tree. */ struct ext4_extent_tail { __le32 et_checksum; /* crc32c(uuid+inum+extent_block) */ }; /* * This is the extent on-disk structure. * It's used at the bottom of the tree. */ struct ext4_extent { __le32 ee_block; /* first logical block extent covers */ __le16 ee_len; /* number of blocks covered by extent */ __le16 ee_start_hi; /* high 16 bits of physical block */ __le32 ee_start_lo; /* low 32 bits of physical block */ }; /* * This is index on-disk structure. * It's used at all the levels except the bottom. */ struct ext4_extent_idx { __le32 ei_block; /* index covers logical blocks from 'block' */ __le32 ei_leaf_lo; /* pointer to the physical block of the next * * level. leaf or next index could be there */ __le16 ei_leaf_hi; /* high 16 bits of physical block */ __u16 ei_unused; }; /* * Each block (leaves and indexes), even inode-stored has header. */ struct ext4_extent_header { __le16 eh_magic; /* probably will support different formats */ __le16 eh_entries; /* number of valid entries */ __le16 eh_max; /* capacity of store in entries */ __le16 eh_depth; /* has tree real underlying blocks? */ __le32 eh_generation; /* generation of the tree */ }; #define EXT4_EXT_MAGIC cpu_to_le16(0xf30a) #define EXT4_MAX_EXTENT_DEPTH 5 #define EXT4_EXTENT_TAIL_OFFSET(hdr) \ (sizeof(struct ext4_extent_header) + \ (sizeof(struct ext4_extent) * le16_to_cpu((hdr)->eh_max))) static inline struct ext4_extent_tail * find_ext4_extent_tail(struct ext4_extent_header *eh) { return (struct ext4_extent_tail *)(((void *)eh) + EXT4_EXTENT_TAIL_OFFSET(eh)); } /* * Array of ext4_ext_path contains path to some extent. * Creation/lookup routines use it for traversal/splitting/etc. * Truncate uses it to simulate recursive walking. */ struct ext4_ext_path { ext4_fsblk_t p_block; __u16 p_depth; __u16 p_maxdepth; struct ext4_extent *p_ext; struct ext4_extent_idx *p_idx; struct ext4_extent_header *p_hdr; struct buffer_head *p_bh; }; /* * Used to record a portion of a cluster found at the beginning or end * of an extent while traversing the extent tree during space removal. * A partial cluster may be removed if it does not contain blocks shared * with extents that aren't being deleted (tofree state). Otherwise, * it cannot be removed (nofree state). */ struct partial_cluster { ext4_fsblk_t pclu; /* physical cluster number */ ext4_lblk_t lblk; /* logical block number within logical cluster */ enum {initial, tofree, nofree} state; }; /* * structure for external API */ /* * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an * initialized extent. This is 2^15 and not (2^16 - 1), since we use the * MSB of ee_len field in the extent datastructure to signify if this * particular extent is an initialized extent or an unwritten (i.e. * preallocated). * EXT_UNWRITTEN_MAX_LEN is the maximum number of blocks we can have in an * unwritten extent. * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an * unwritten one. In other words, if MSB of ee_len is set, it is an * unwritten extent with only one special scenario when ee_len = 0x8000. * In this case we can not have an unwritten extent of zero length and * thus we make it as a special case of initialized extent with 0x8000 length. * This way we get better extent-to-group alignment for initialized extents. * Hence, the maximum number of blocks we can have in an *initialized* * extent is 2^15 (32768) and in an *unwritten* extent is 2^15-1 (32767). */ #define EXT_INIT_MAX_LEN (1UL << 15) #define EXT_UNWRITTEN_MAX_LEN (EXT_INIT_MAX_LEN - 1) #define EXT_FIRST_EXTENT(__hdr__) \ ((struct ext4_extent *) (((char *) (__hdr__)) + \ sizeof(struct ext4_extent_header))) #define EXT_FIRST_INDEX(__hdr__) \ ((struct ext4_extent_idx *) (((char *) (__hdr__)) + \ sizeof(struct ext4_extent_header))) #define EXT_HAS_FREE_INDEX(__path__) \ (le16_to_cpu((__path__)->p_hdr->eh_entries) \ < le16_to_cpu((__path__)->p_hdr->eh_max)) #define EXT_LAST_EXTENT(__hdr__) \ (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1) #define EXT_LAST_INDEX(__hdr__) \ (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1) #define EXT_MAX_EXTENT(__hdr__) \ ((le16_to_cpu((__hdr__)->eh_max)) ? \ ((EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)) \ : NULL) #define EXT_MAX_INDEX(__hdr__) \ ((le16_to_cpu((__hdr__)->eh_max)) ? \ ((EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)) \ : NULL) static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode) { return (struct ext4_extent_header *) EXT4_I(inode)->i_data; } static inline struct ext4_extent_header *ext_block_hdr(struct buffer_head *bh) { return (struct ext4_extent_header *) bh->b_data; } static inline unsigned short ext_depth(struct inode *inode) { return le16_to_cpu(ext_inode_hdr(inode)->eh_depth); } static inline void ext4_ext_mark_unwritten(struct ext4_extent *ext) { /* We can not have an unwritten extent of zero length! */ BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0); ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN); } static inline int ext4_ext_is_unwritten(struct ext4_extent *ext) { /* Extent with ee_len of 0x8000 is treated as an initialized extent */ return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN); } static inline int ext4_ext_get_actual_len(struct ext4_extent *ext) { return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ? le16_to_cpu(ext->ee_len) : (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN)); } static inline void ext4_ext_mark_initialized(struct ext4_extent *ext) { ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); } /* * ext4_ext_pblock: * combine low and high parts of physical block number into ext4_fsblk_t */ static inline ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex) { ext4_fsblk_t block; block = le32_to_cpu(ex->ee_start_lo); block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1; return block; } /* * ext4_idx_pblock: * combine low and high parts of a leaf physical block number into ext4_fsblk_t */ static inline ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_idx *ix) { ext4_fsblk_t block; block = le32_to_cpu(ix->ei_leaf_lo); block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1; return block; } /* * ext4_ext_store_pblock: * stores a large physical block number into an extent struct, * breaking it into parts */ static inline void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb) { ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); } /* * ext4_idx_store_pblock: * stores a large physical block number into an index struct, * breaking it into parts */ static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb) { ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff)); ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff); } #endif /* _EXT4_EXTENTS */
2153 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 // SPDX-License-Identifier: GPL-2.0 /* * Provides code common for host and device side USB. * * If either host side (ie. CONFIG_USB=y) or device side USB stack * (ie. CONFIG_USB_GADGET=y) is compiled in the kernel, this module is * compiled-in as well. Otherwise, if either of the two stacks is * compiled as module, this file is compiled as module as well. */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/of.h> #include <linux/platform_device.h> #include <linux/usb/ch9.h> #include <linux/usb/of.h> #include <linux/usb/otg.h> #include <linux/of_platform.h> #include <linux/debugfs.h> #include "common.h" static const char *const ep_type_names[] = { [USB_ENDPOINT_XFER_CONTROL] = "ctrl", [USB_ENDPOINT_XFER_ISOC] = "isoc", [USB_ENDPOINT_XFER_BULK] = "bulk", [USB_ENDPOINT_XFER_INT] = "intr", }; /** * usb_ep_type_string() - Returns human readable-name of the endpoint type. * @ep_type: The endpoint type to return human-readable name for. If it's not * any of the types: USB_ENDPOINT_XFER_{CONTROL, ISOC, BULK, INT}, * usually got by usb_endpoint_type(), the string 'unknown' will be returned. */ const char *usb_ep_type_string(int ep_type) { if (ep_type < 0 || ep_type >= ARRAY_SIZE(ep_type_names)) return "unknown"; return ep_type_names[ep_type]; } EXPORT_SYMBOL_GPL(usb_ep_type_string); /** * usb_otg_state_string() - returns human readable name of OTG state. * @state: the OTG state to return the human readable name of. If it's not * any of the states defined in usb_otg_state enum, 'UNDEFINED' will be * returned. */ const char *usb_otg_state_string(enum usb_otg_state state) { static const char *const names[] = { [OTG_STATE_A_IDLE] = "a_idle", [OTG_STATE_A_WAIT_VRISE] = "a_wait_vrise", [OTG_STATE_A_WAIT_BCON] = "a_wait_bcon", [OTG_STATE_A_HOST] = "a_host", [OTG_STATE_A_SUSPEND] = "a_suspend", [OTG_STATE_A_PERIPHERAL] = "a_peripheral", [OTG_STATE_A_WAIT_VFALL] = "a_wait_vfall", [OTG_STATE_A_VBUS_ERR] = "a_vbus_err", [OTG_STATE_B_IDLE] = "b_idle", [OTG_STATE_B_SRP_INIT] = "b_srp_init", [OTG_STATE_B_PERIPHERAL] = "b_peripheral", [OTG_STATE_B_WAIT_ACON] = "b_wait_acon", [OTG_STATE_B_HOST] = "b_host", }; if (state < 0 || state >= ARRAY_SIZE(names)) return "UNDEFINED"; return names[state]; } EXPORT_SYMBOL_GPL(usb_otg_state_string); static const char *const speed_names[] = { [USB_SPEED_UNKNOWN] = "UNKNOWN", [USB_SPEED_LOW] = "low-speed", [USB_SPEED_FULL] = "full-speed", [USB_SPEED_HIGH] = "high-speed", [USB_SPEED_WIRELESS] = "wireless", [USB_SPEED_SUPER] = "super-speed", [USB_SPEED_SUPER_PLUS] = "super-speed-plus", }; static const char *const ssp_rate[] = { [USB_SSP_GEN_UNKNOWN] = "UNKNOWN", [USB_SSP_GEN_2x1] = "super-speed-plus-gen2x1", [USB_SSP_GEN_1x2] = "super-speed-plus-gen1x2", [USB_SSP_GEN_2x2] = "super-speed-plus-gen2x2", }; /** * usb_speed_string() - Returns human readable-name of the speed. * @speed: The speed to return human-readable name for. If it's not * any of the speeds defined in usb_device_speed enum, string for * USB_SPEED_UNKNOWN will be returned. */ const char *usb_speed_string(enum usb_device_speed speed) { if (speed < 0 || speed >= ARRAY_SIZE(speed_names)) speed = USB_SPEED_UNKNOWN; return speed_names[speed]; } EXPORT_SYMBOL_GPL(usb_speed_string); /** * usb_get_maximum_speed - Get maximum requested speed for a given USB * controller. * @dev: Pointer to the given USB controller device * * The function gets the maximum speed string from property "maximum-speed", * and returns the corresponding enum usb_device_speed. */ enum usb_device_speed usb_get_maximum_speed(struct device *dev) { const char *p = "maximum-speed"; int ret; ret = device_property_match_property_string(dev, p, ssp_rate, ARRAY_SIZE(ssp_rate)); if (ret > 0) return USB_SPEED_SUPER_PLUS; ret = device_property_match_property_string(dev, p, speed_names, ARRAY_SIZE(speed_names)); if (ret > 0) return ret; return USB_SPEED_UNKNOWN; } EXPORT_SYMBOL_GPL(usb_get_maximum_speed); /** * usb_get_maximum_ssp_rate - Get the signaling rate generation and lane count * of a SuperSpeed Plus capable device. * @dev: Pointer to the given USB controller device * * If the string from "maximum-speed" property is super-speed-plus-genXxY where * 'X' is the generation number and 'Y' is the number of lanes, then this * function returns the corresponding enum usb_ssp_rate. */ enum usb_ssp_rate usb_get_maximum_ssp_rate(struct device *dev) { const char *maximum_speed; int ret; ret = device_property_read_string(dev, "maximum-speed", &maximum_speed); if (ret < 0) return USB_SSP_GEN_UNKNOWN; ret = match_string(ssp_rate, ARRAY_SIZE(ssp_rate), maximum_speed); return (ret < 0) ? USB_SSP_GEN_UNKNOWN : ret; } EXPORT_SYMBOL_GPL(usb_get_maximum_ssp_rate); /** * usb_state_string - Returns human readable name for the state. * @state: The state to return a human-readable name for. If it's not * any of the states devices in usb_device_state_string enum, * the string UNKNOWN will be returned. */ const char *usb_state_string(enum usb_device_state state) { static const char *const names[] = { [USB_STATE_NOTATTACHED] = "not attached", [USB_STATE_ATTACHED] = "attached", [USB_STATE_POWERED] = "powered", [USB_STATE_RECONNECTING] = "reconnecting", [USB_STATE_UNAUTHENTICATED] = "unauthenticated", [USB_STATE_DEFAULT] = "default", [USB_STATE_ADDRESS] = "addressed", [USB_STATE_CONFIGURED] = "configured", [USB_STATE_SUSPENDED] = "suspended", }; if (state < 0 || state >= ARRAY_SIZE(names)) return "UNKNOWN"; return names[state]; } EXPORT_SYMBOL_GPL(usb_state_string); static const char *const usb_dr_modes[] = { [USB_DR_MODE_UNKNOWN] = "", [USB_DR_MODE_HOST] = "host", [USB_DR_MODE_PERIPHERAL] = "peripheral", [USB_DR_MODE_OTG] = "otg", }; /** * usb_get_dr_mode_from_string() - Get dual role mode for given string * @str: String to find the corresponding dual role mode for * * This function performs a lookup for the given string and returns the * corresponding enum usb_dr_mode. If no match for the string could be found, * 'USB_DR_MODE_UNKNOWN' is returned. */ static enum usb_dr_mode usb_get_dr_mode_from_string(const char *str) { int ret; ret = match_string(usb_dr_modes, ARRAY_SIZE(usb_dr_modes), str); return (ret < 0) ? USB_DR_MODE_UNKNOWN : ret; } enum usb_dr_mode usb_get_dr_mode(struct device *dev) { const char *dr_mode; int err; err = device_property_read_string(dev, "dr_mode", &dr_mode); if (err < 0) return USB_DR_MODE_UNKNOWN; return usb_get_dr_mode_from_string(dr_mode); } EXPORT_SYMBOL_GPL(usb_get_dr_mode); /** * usb_get_role_switch_default_mode - Get default mode for given device * @dev: Pointer to the given device * * The function gets string from property 'role-switch-default-mode', * and returns the corresponding enum usb_dr_mode. */ enum usb_dr_mode usb_get_role_switch_default_mode(struct device *dev) { const char *str; int ret; ret = device_property_read_string(dev, "role-switch-default-mode", &str); if (ret < 0) return USB_DR_MODE_UNKNOWN; return usb_get_dr_mode_from_string(str); } EXPORT_SYMBOL_GPL(usb_get_role_switch_default_mode); /** * usb_decode_interval - Decode bInterval into the time expressed in 1us unit * @epd: The descriptor of the endpoint * @speed: The speed that the endpoint works as * * Function returns the interval expressed in 1us unit for servicing * endpoint for data transfers. */ unsigned int usb_decode_interval(const struct usb_endpoint_descriptor *epd, enum usb_device_speed speed) { unsigned int interval = 0; switch (usb_endpoint_type(epd)) { case USB_ENDPOINT_XFER_CONTROL: /* uframes per NAK */ if (speed == USB_SPEED_HIGH) interval = epd->bInterval; break; case USB_ENDPOINT_XFER_ISOC: interval = 1 << (epd->bInterval - 1); break; case USB_ENDPOINT_XFER_BULK: /* uframes per NAK */ if (speed == USB_SPEED_HIGH && usb_endpoint_dir_out(epd)) interval = epd->bInterval; break; case USB_ENDPOINT_XFER_INT: if (speed >= USB_SPEED_HIGH) interval = 1 << (epd->bInterval - 1); else interval = epd->bInterval; break; } interval *= (speed >= USB_SPEED_HIGH) ? 125 : 1000; return interval; } EXPORT_SYMBOL_GPL(usb_decode_interval); #ifdef CONFIG_OF /** * of_usb_get_dr_mode_by_phy - Get dual role mode for the controller device * which is associated with the given phy device_node * @np: Pointer to the given phy device_node * @arg0: phandle args[0] for phy's with #phy-cells >= 1, or -1 for * phys which do not have phy-cells * * In dts a usb controller associates with phy devices. The function gets * the string from property 'dr_mode' of the controller associated with the * given phy device node, and returns the correspondig enum usb_dr_mode. */ enum usb_dr_mode of_usb_get_dr_mode_by_phy(struct device_node *np, int arg0) { struct device_node *controller; struct of_phandle_args args; const char *dr_mode; int index; int err; for_each_node_with_property(controller, "phys") { if (!of_device_is_available(controller)) continue; index = 0; do { if (arg0 == -1) { args.np = of_parse_phandle(controller, "phys", index); args.args_count = 0; } else { err = of_parse_phandle_with_args(controller, "phys", "#phy-cells", index, &args); if (err) break; } of_node_put(args.np); if (args.np == np && (args.args_count == 0 || args.args[0] == arg0)) goto finish; index++; } while (args.np); } finish: err = of_property_read_string(controller, "dr_mode", &dr_mode); of_node_put(controller); if (err < 0) return USB_DR_MODE_UNKNOWN; return usb_get_dr_mode_from_string(dr_mode); } EXPORT_SYMBOL_GPL(of_usb_get_dr_mode_by_phy); /** * of_usb_host_tpl_support - to get if Targeted Peripheral List is supported * for given targeted hosts (non-PC hosts) * @np: Pointer to the given device_node * * The function gets if the targeted hosts support TPL or not */ bool of_usb_host_tpl_support(struct device_node *np) { return of_property_read_bool(np, "tpl-support"); } EXPORT_SYMBOL_GPL(of_usb_host_tpl_support); /** * of_usb_update_otg_caps - to update usb otg capabilities according to * the passed properties in DT. * @np: Pointer to the given device_node * @otg_caps: Pointer to the target usb_otg_caps to be set * * The function updates the otg capabilities */ int of_usb_update_otg_caps(struct device_node *np, struct usb_otg_caps *otg_caps) { u32 otg_rev; if (!otg_caps) return -EINVAL; if (!of_property_read_u32(np, "otg-rev", &otg_rev)) { switch (otg_rev) { case 0x0100: case 0x0120: case 0x0130: case 0x0200: /* Choose the lesser one if it's already been set */ if (otg_caps->otg_rev) otg_caps->otg_rev = min_t(u16, otg_rev, otg_caps->otg_rev); else otg_caps->otg_rev = otg_rev; break; default: pr_err("%pOF: unsupported otg-rev: 0x%x\n", np, otg_rev); return -EINVAL; } } else { /* * otg-rev is mandatory for otg properties, if not passed * we set it to be 0 and assume it's a legacy otg device. * Non-dt platform can set it afterwards. */ otg_caps->otg_rev = 0; } if (of_property_read_bool(np, "hnp-disable")) otg_caps->hnp_support = false; if (of_property_read_bool(np, "srp-disable")) otg_caps->srp_support = false; if (of_property_read_bool(np, "adp-disable") || (otg_caps->otg_rev < 0x0200)) otg_caps->adp_support = false; return 0; } EXPORT_SYMBOL_GPL(of_usb_update_otg_caps); /** * usb_of_get_companion_dev - Find the companion device * @dev: the device pointer to find a companion * * Find the companion device from platform bus. * * Takes a reference to the returned struct device which needs to be dropped * after use. * * Return: On success, a pointer to the companion device, %NULL on failure. */ struct device *usb_of_get_companion_dev(struct device *dev) { struct device_node *node; struct platform_device *pdev = NULL; node = of_parse_phandle(dev->of_node, "companion", 0); if (node) pdev = of_find_device_by_node(node); of_node_put(node); return pdev ? &pdev->dev : NULL; } EXPORT_SYMBOL_GPL(usb_of_get_companion_dev); #endif struct dentry *usb_debug_root; EXPORT_SYMBOL_GPL(usb_debug_root); DEFINE_MUTEX(usb_dynids_lock); EXPORT_SYMBOL_GPL(usb_dynids_lock); static int __init usb_common_init(void) { usb_debug_root = debugfs_create_dir("usb", NULL); ledtrig_usb_init(); return 0; } static void __exit usb_common_exit(void) { ledtrig_usb_exit(); debugfs_remove_recursive(usb_debug_root); } subsys_initcall(usb_common_init); module_exit(usb_common_exit); MODULE_DESCRIPTION("Common code for host and device side USB"); MODULE_LICENSE("GPL");
17 782 1050 500 535 51 42 1 1054 1056 770 1 348 409 79 610 77 43 35 1 77 78 96 99 100 99 100 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_NETFILTER_H #define __LINUX_NETFILTER_H #include <linux/init.h> #include <linux/skbuff.h> #include <linux/net.h> #include <linux/if.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/wait.h> #include <linux/list.h> #include <linux/static_key.h> #include <linux/module.h> #include <linux/netfilter_defs.h> #include <linux/netdevice.h> #include <linux/sockptr.h> #include <net/net_namespace.h> static inline int NF_DROP_GETERR(int verdict) { return -(verdict >> NF_VERDICT_QBITS); } static __always_inline int NF_DROP_REASON(struct sk_buff *skb, enum skb_drop_reason reason, u32 err) { BUILD_BUG_ON(err > 0xffff); kfree_skb_reason(skb, reason); return ((err << 16) | NF_STOLEN); } static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1, const union nf_inet_addr *a2) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 const unsigned long *ul1 = (const unsigned long *)a1; const unsigned long *ul2 = (const unsigned long *)a2; return ((ul1[0] ^ ul2[0]) | (ul1[1] ^ ul2[1])) == 0UL; #else return a1->all[0] == a2->all[0] && a1->all[1] == a2->all[1] && a1->all[2] == a2->all[2] && a1->all[3] == a2->all[3]; #endif } static inline void nf_inet_addr_mask(const union nf_inet_addr *a1, union nf_inet_addr *result, const union nf_inet_addr *mask) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 const unsigned long *ua = (const unsigned long *)a1; unsigned long *ur = (unsigned long *)result; const unsigned long *um = (const unsigned long *)mask; ur[0] = ua[0] & um[0]; ur[1] = ua[1] & um[1]; #else result->all[0] = a1->all[0] & mask->all[0]; result->all[1] = a1->all[1] & mask->all[1]; result->all[2] = a1->all[2] & mask->all[2]; result->all[3] = a1->all[3] & mask->all[3]; #endif } int netfilter_init(void); struct sk_buff; struct nf_hook_ops; struct sock; struct nf_hook_state { u8 hook; u8 pf; struct net_device *in; struct net_device *out; struct sock *sk; struct net *net; int (*okfn)(struct net *, struct sock *, struct sk_buff *); }; typedef unsigned int nf_hookfn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); enum nf_hook_ops_type { NF_HOOK_OP_UNDEFINED, NF_HOOK_OP_NF_TABLES, NF_HOOK_OP_BPF, NF_HOOK_OP_NFT_FT, }; struct nf_hook_ops { struct list_head list; struct rcu_head rcu; /* User fills in from here down. */ nf_hookfn *hook; struct net_device *dev; void *priv; u8 pf; enum nf_hook_ops_type hook_ops_type:8; unsigned int hooknum; /* Hooks are ordered in ascending priority. */ int priority; }; struct nf_hook_entry { nf_hookfn *hook; void *priv; }; struct nf_hook_entries_rcu_head { struct rcu_head head; void *allocation; }; struct nf_hook_entries { u16 num_hook_entries; /* padding */ struct nf_hook_entry hooks[]; /* trailer: pointers to original orig_ops of each hook, * followed by rcu_head and scratch space used for freeing * the structure via call_rcu. * * This is not part of struct nf_hook_entry since its only * needed in slow path (hook register/unregister): * const struct nf_hook_ops *orig_ops[] * * For the same reason, we store this at end -- its * only needed when a hook is deleted, not during * packet path processing: * struct nf_hook_entries_rcu_head head */ }; #ifdef CONFIG_NETFILTER static inline struct nf_hook_ops **nf_hook_entries_get_hook_ops(const struct nf_hook_entries *e) { unsigned int n = e->num_hook_entries; const void *hook_end; hook_end = &e->hooks[n]; /* this is *past* ->hooks[]! */ return (struct nf_hook_ops **)hook_end; } static inline int nf_hook_entry_hookfn(const struct nf_hook_entry *entry, struct sk_buff *skb, struct nf_hook_state *state) { return entry->hook(entry->priv, skb, state); } static inline void nf_hook_state_init(struct nf_hook_state *p, unsigned int hook, u_int8_t pf, struct net_device *indev, struct net_device *outdev, struct sock *sk, struct net *net, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { p->hook = hook; p->pf = pf; p->in = indev; p->out = outdev; p->sk = sk; p->net = net; p->okfn = okfn; } struct nf_sockopt_ops { struct list_head list; u_int8_t pf; /* Non-inclusive ranges: use 0/0/NULL to never get called. */ int set_optmin; int set_optmax; int (*set)(struct sock *sk, int optval, sockptr_t arg, unsigned int len); int get_optmin; int get_optmax; int (*get)(struct sock *sk, int optval, void __user *user, int *len); /* Use the module struct to lock set/get code in place */ struct module *owner; }; /* Function to register/unregister hook points. */ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *ops); void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *ops); int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg, unsigned int n); void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg, unsigned int n); /* Functions to register get/setsockopt ranges (non-inclusive). You need to check permissions yourself! */ int nf_register_sockopt(struct nf_sockopt_ops *reg); void nf_unregister_sockopt(struct nf_sockopt_ops *reg); #ifdef CONFIG_JUMP_LABEL extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; #endif int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, const struct nf_hook_entries *e, unsigned int i); void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state, const struct nf_hook_entries *e); /** * nf_hook - call a netfilter hook * * Returns 1 if the hook has allowed the packet to pass. The function * okfn must be invoked by the caller in this case. Any other return * value indicates the packet has been consumed by the hook. */ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { struct nf_hook_entries *hook_head = NULL; int ret = 1; #ifdef CONFIG_JUMP_LABEL if (__builtin_constant_p(pf) && __builtin_constant_p(hook) && !static_key_false(&nf_hooks_needed[pf][hook])) return 1; #endif rcu_read_lock(); switch (pf) { case NFPROTO_IPV4: hook_head = rcu_dereference(net->nf.hooks_ipv4[hook]); break; case NFPROTO_IPV6: hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]); break; case NFPROTO_ARP: #ifdef CONFIG_NETFILTER_FAMILY_ARP if (WARN_ON_ONCE(hook >= ARRAY_SIZE(net->nf.hooks_arp))) break; hook_head = rcu_dereference(net->nf.hooks_arp[hook]); #endif break; case NFPROTO_BRIDGE: #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE hook_head = rcu_dereference(net->nf.hooks_bridge[hook]); #endif break; default: WARN_ON_ONCE(1); break; } if (hook_head) { struct nf_hook_state state; nf_hook_state_init(&state, hook, pf, indev, outdev, sk, net, okfn); ret = nf_hook_slow(skb, &state, hook_head, 0); } rcu_read_unlock(); return ret; } /* Activate hook; either okfn or kfree_skb called, unless a hook returns NF_STOLEN (in which case, it's up to the hook to deal with the consequences). Returns -ERRNO if packet dropped. Zero means queued, stolen or accepted. */ /* RR: > I don't want nf_hook to return anything because people might forget > about async and trust the return value to mean "packet was ok". AK: Just document it clearly, then you can expect some sense from kernel coders :) */ static inline int NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *), bool cond) { int ret; if (!cond || ((ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn)) == 1)) ret = okfn(net, sk, skb); return ret; } static inline int NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { int ret = nf_hook(pf, hook, net, sk, skb, in, out, okfn); if (ret == 1) ret = okfn(net, sk, skb); return ret; } static inline void NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct list_head *head, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { struct nf_hook_entries *hook_head = NULL; #ifdef CONFIG_JUMP_LABEL if (__builtin_constant_p(pf) && __builtin_constant_p(hook) && !static_key_false(&nf_hooks_needed[pf][hook])) return; #endif rcu_read_lock(); switch (pf) { case NFPROTO_IPV4: hook_head = rcu_dereference(net->nf.hooks_ipv4[hook]); break; case NFPROTO_IPV6: hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]); break; default: WARN_ON_ONCE(1); break; } if (hook_head) { struct nf_hook_state state; nf_hook_state_init(&state, hook, pf, in, out, sk, net, okfn); nf_hook_slow_list(head, &state, hook_head); } rcu_read_unlock(); } /* Call setsockopt() */ int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, sockptr_t opt, unsigned int len); int nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt, int *len); struct flowi; struct nf_queue_entry; __sum16 nf_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol, unsigned short family); __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, unsigned int len, u_int8_t protocol, unsigned short family); int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict, unsigned short family); #include <net/flow.h> struct nf_conn; enum nf_nat_manip_type; struct nlattr; struct nf_nat_hook { int (*parse_nat_setup)(struct nf_conn *ct, enum nf_nat_manip_type manip, const struct nlattr *attr); void (*decode_session)(struct sk_buff *skb, struct flowi *fl); void (*remove_nat_bysrc)(struct nf_conn *ct); }; extern const struct nf_nat_hook __rcu *nf_nat_hook; static inline void nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) { #if IS_ENABLED(CONFIG_NF_NAT) const struct nf_nat_hook *nat_hook; rcu_read_lock(); nat_hook = rcu_dereference(nf_nat_hook); if (nat_hook && nat_hook->decode_session) nat_hook->decode_session(skb, fl); rcu_read_unlock(); #endif } #else /* !CONFIG_NETFILTER */ static inline int NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *), bool cond) { return okfn(net, sk, skb); } static inline int NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { return okfn(net, sk, skb); } static inline void NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct list_head *head, struct net_device *in, struct net_device *out, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { /* nothing to do */ } static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { return 1; } struct flowi; static inline void nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) { } #endif /*CONFIG_NETFILTER*/ #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <linux/netfilter/nf_conntrack_zones_common.h> void nf_ct_attach(struct sk_buff *, const struct sk_buff *); void nf_ct_set_closing(struct nf_conntrack *nfct); struct nf_conntrack_tuple; bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb); #else static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} static inline void nf_ct_set_closing(struct nf_conntrack *nfct) {} struct nf_conntrack_tuple; static inline bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb) { return false; } #endif struct nf_conn; enum ip_conntrack_info; struct nf_ct_hook { int (*update)(struct net *net, struct sk_buff *skb); void (*destroy)(struct nf_conntrack *); bool (*get_tuple_skb)(struct nf_conntrack_tuple *, const struct sk_buff *); void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb); void (*set_closing)(struct nf_conntrack *nfct); int (*confirm)(struct sk_buff *skb); u32 (*get_id)(const struct nf_conntrack *nfct); }; extern const struct nf_ct_hook __rcu *nf_ct_hook; struct nlattr; struct nfnl_ct_hook { size_t (*build_size)(const struct nf_conn *ct); int (*build)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, u_int16_t ct_attr, u_int16_t ct_info_attr); int (*parse)(const struct nlattr *attr, struct nf_conn *ct); int (*attach_expect)(const struct nlattr *attr, struct nf_conn *ct, u32 portid, u32 report); void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, s32 off); }; extern const struct nfnl_ct_hook __rcu *nfnl_ct_hook; struct nf_defrag_hook { struct module *owner; int (*enable)(struct net *net); void (*disable)(struct net *net); }; extern const struct nf_defrag_hook __rcu *nf_defrag_v4_hook; extern const struct nf_defrag_hook __rcu *nf_defrag_v6_hook; /* * Contains bitmask of ctnetlink event subscribers, if any. * Can't be pernet due to NETLINK_LISTEN_ALL_NSID setsockopt flag. */ extern u8 nf_ctnetlink_has_listener; #endif /*__LINUX_NETFILTER_H*/
8 9 10 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 // SPDX-License-Identifier: GPL-2.0-only /* * Landlock - Unique identification number generator * * Copyright © 2024-2025 Microsoft Corporation */ #include <kunit/test.h> #include <linux/atomic.h> #include <linux/bitops.h> #include <linux/random.h> #include <linux/spinlock.h> #include "common.h" #include "id.h" #define COUNTER_PRE_INIT 0 static atomic64_t next_id = ATOMIC64_INIT(COUNTER_PRE_INIT); static void __init init_id(atomic64_t *const counter, const u32 random_32bits) { u64 init; /* * Ensures sure 64-bit values are always used by user space (or may * fail with -EOVERFLOW), and makes this testable. */ init = BIT_ULL(32); /* * Makes a large (2^32) boot-time value to limit ID collision in logs * from different boots, and to limit info leak about the number of * initially (relative to the reader) created elements (e.g. domains). */ init += random_32bits; /* Sets first or ignores. This will be the first ID. */ atomic64_cmpxchg(counter, COUNTER_PRE_INIT, init); } #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST static void __init test_init_min(struct kunit *const test) { atomic64_t counter = ATOMIC64_INIT(COUNTER_PRE_INIT); init_id(&counter, 0); KUNIT_EXPECT_EQ(test, atomic64_read(&counter), 1ULL + U32_MAX); } static void __init test_init_max(struct kunit *const test) { atomic64_t counter = ATOMIC64_INIT(COUNTER_PRE_INIT); init_id(&counter, ~0); KUNIT_EXPECT_EQ(test, atomic64_read(&counter), 1 + (2ULL * U32_MAX)); } static void __init test_init_once(struct kunit *const test) { const u64 first_init = 1ULL + U32_MAX; atomic64_t counter = ATOMIC64_INIT(COUNTER_PRE_INIT); init_id(&counter, 0); KUNIT_EXPECT_EQ(test, atomic64_read(&counter), first_init); init_id(&counter, ~0); KUNIT_EXPECT_EQ_MSG( test, atomic64_read(&counter), first_init, "Should still have the same value after the subsequent init_id()"); } #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ void __init landlock_init_id(void) { return init_id(&next_id, get_random_u32()); } /* * It's not worth it to try to hide the monotonic counter because it can still * be inferred (with N counter ranges), and if we are allowed to read the inode * number we should also be allowed to read the time creation anyway, and it * can be handy to store and sort domain IDs for user space. * * Returns the value of next_id and increment it to let some space for the next * one. */ static u64 get_id_range(size_t number_of_ids, atomic64_t *const counter, u8 random_4bits) { u64 id, step; /* * We should return at least 1 ID, and we may need a set of consecutive * ones (e.g. to generate a set of inodes). */ if (WARN_ON_ONCE(number_of_ids <= 0)) number_of_ids = 1; /* * Blurs the next ID guess with 1/16 ratio. We get 2^(64 - 4) - * (2 * 2^32), so a bit less than 2^60 available IDs, which should be * much more than enough considering the number of CPU cycles required * to get a new ID (e.g. a full landlock_restrict_self() call), and the * cost of draining all available IDs during the system's uptime. */ random_4bits &= 0b1111; step = number_of_ids + random_4bits; /* It is safe to cast a signed atomic to an unsigned value. */ id = atomic64_fetch_add(step, counter); /* Warns if landlock_init_id() was not called. */ WARN_ON_ONCE(id == COUNTER_PRE_INIT); return id; } #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST static u8 get_random_u8_positive(void) { /* max() evaluates its arguments once. */ return max(1, get_random_u8()); } static void test_range1_rand0(struct kunit *const test) { atomic64_t counter; u64 init; init = get_random_u32(); atomic64_set(&counter, init); KUNIT_EXPECT_EQ(test, get_id_range(1, &counter, 0), init); KUNIT_EXPECT_EQ(test, get_id_range(get_random_u8_positive(), &counter, get_random_u8()), init + 1); } static void test_range1_rand1(struct kunit *const test) { atomic64_t counter; u64 init; init = get_random_u32(); atomic64_set(&counter, init); KUNIT_EXPECT_EQ(test, get_id_range(1, &counter, 1), init); KUNIT_EXPECT_EQ(test, get_id_range(get_random_u8_positive(), &counter, get_random_u8()), init + 2); } static void test_range1_rand15(struct kunit *const test) { atomic64_t counter; u64 init; init = get_random_u32(); atomic64_set(&counter, init); KUNIT_EXPECT_EQ(test, get_id_range(1, &counter, 15), init); KUNIT_EXPECT_EQ(test, get_id_range(get_random_u8_positive(), &counter, get_random_u8()), init + 16); } static void test_range1_rand16(struct kunit *const test) { atomic64_t counter; u64 init; init = get_random_u32(); atomic64_set(&counter, init); KUNIT_EXPECT_EQ(test, get_id_range(1, &counter, 16), init); KUNIT_EXPECT_EQ(test, get_id_range(get_random_u8_positive(), &counter, get_random_u8()), init + 1); } static void test_range2_rand0(struct kunit *const test) { atomic64_t counter; u64 init; init = get_random_u32(); atomic64_set(&counter, init); KUNIT_EXPECT_EQ(test, get_id_range(2, &counter, 0), init); KUNIT_EXPECT_EQ(test, get_id_range(get_random_u8_positive(), &counter, get_random_u8()), init + 2); } static void test_range2_rand1(struct kunit *const test) { atomic64_t counter; u64 init; init = get_random_u32(); atomic64_set(&counter, init); KUNIT_EXPECT_EQ(test, get_id_range(2, &counter, 1), init); KUNIT_EXPECT_EQ(test, get_id_range(get_random_u8_positive(), &counter, get_random_u8()), init + 3); } static void test_range2_rand2(struct kunit *const test) { atomic64_t counter; u64 init; init = get_random_u32(); atomic64_set(&counter, init); KUNIT_EXPECT_EQ(test, get_id_range(2, &counter, 2), init); KUNIT_EXPECT_EQ(test, get_id_range(get_random_u8_positive(), &counter, get_random_u8()), init + 4); } static void test_range2_rand15(struct kunit *const test) { atomic64_t counter; u64 init; init = get_random_u32(); atomic64_set(&counter, init); KUNIT_EXPECT_EQ(test, get_id_range(2, &counter, 15), init); KUNIT_EXPECT_EQ(test, get_id_range(get_random_u8_positive(), &counter, get_random_u8()), init + 17); } static void test_range2_rand16(struct kunit *const test) { atomic64_t counter; u64 init; init = get_random_u32(); atomic64_set(&counter, init); KUNIT_EXPECT_EQ(test, get_id_range(2, &counter, 16), init); KUNIT_EXPECT_EQ(test, get_id_range(get_random_u8_positive(), &counter, get_random_u8()), init + 2); } #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ /** * landlock_get_id_range - Get a range of unique IDs * * @number_of_ids: Number of IDs to hold. Must be greater than one. * * Returns: The first ID in the range. */ u64 landlock_get_id_range(size_t number_of_ids) { return get_id_range(number_of_ids, &next_id, get_random_u8()); } #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST static struct kunit_case __refdata test_cases[] = { /* clang-format off */ KUNIT_CASE(test_init_min), KUNIT_CASE(test_init_max), KUNIT_CASE(test_init_once), KUNIT_CASE(test_range1_rand0), KUNIT_CASE(test_range1_rand1), KUNIT_CASE(test_range1_rand15), KUNIT_CASE(test_range1_rand16), KUNIT_CASE(test_range2_rand0), KUNIT_CASE(test_range2_rand1), KUNIT_CASE(test_range2_rand2), KUNIT_CASE(test_range2_rand15), KUNIT_CASE(test_range2_rand16), {} /* clang-format on */ }; static struct kunit_suite test_suite = { .name = "landlock_id", .test_cases = test_cases, }; kunit_test_init_section_suite(test_suite); #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */
1 1 1 1 1 1 20 19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 // SPDX-License-Identifier: GPL-2.0-only /* Helper handling for netfilter. */ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> * (C) 2006-2012 Patrick McHardy <kaber@trash.net> */ #include <linux/types.h> #include <linux/netfilter.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/vmalloc.h> #include <linux/stddef.h> #include <linux/random.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/rculist.h> #include <linux/rtnetlink.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_log.h> #include <net/ip.h> static DEFINE_MUTEX(nf_ct_helper_mutex); struct hlist_head *nf_ct_helper_hash __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_helper_hash); unsigned int nf_ct_helper_hsize __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_helper_hsize); static unsigned int nf_ct_helper_count __read_mostly; static DEFINE_MUTEX(nf_ct_nat_helpers_mutex); static struct list_head nf_ct_nat_helpers __read_mostly; /* Stupid hash, but collision free for the default registrations of the * helpers currently in the kernel. */ static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple) { return (((tuple->src.l3num << 8) | tuple->dst.protonum) ^ (__force __u16)tuple->src.u.all) % nf_ct_helper_hsize; } struct nf_conntrack_helper * __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum) { struct nf_conntrack_helper *h; unsigned int i; for (i = 0; i < nf_ct_helper_hsize; i++) { hlist_for_each_entry_rcu(h, &nf_ct_helper_hash[i], hnode) { if (strcmp(h->name, name)) continue; if (h->tuple.src.l3num != NFPROTO_UNSPEC && h->tuple.src.l3num != l3num) continue; if (h->tuple.dst.protonum == protonum) return h; } } return NULL; } EXPORT_SYMBOL_GPL(__nf_conntrack_helper_find); struct nf_conntrack_helper * nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum) { struct nf_conntrack_helper *h; rcu_read_lock(); h = __nf_conntrack_helper_find(name, l3num, protonum); #ifdef CONFIG_MODULES if (h == NULL) { rcu_read_unlock(); if (request_module("nfct-helper-%s", name) == 0) { rcu_read_lock(); h = __nf_conntrack_helper_find(name, l3num, protonum); } else { return h; } } #endif if (h != NULL && !try_module_get(h->me)) h = NULL; if (h != NULL && !refcount_inc_not_zero(&h->refcnt)) { module_put(h->me); h = NULL; } rcu_read_unlock(); return h; } EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get); void nf_conntrack_helper_put(struct nf_conntrack_helper *helper) { refcount_dec(&helper->refcnt); module_put(helper->me); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_put); static struct nf_conntrack_nat_helper * nf_conntrack_nat_helper_find(const char *mod_name) { struct nf_conntrack_nat_helper *cur; bool found = false; list_for_each_entry_rcu(cur, &nf_ct_nat_helpers, list) { if (!strcmp(cur->mod_name, mod_name)) { found = true; break; } } return found ? cur : NULL; } int nf_nat_helper_try_module_get(const char *name, u16 l3num, u8 protonum) { struct nf_conntrack_helper *h; struct nf_conntrack_nat_helper *nat; char mod_name[NF_CT_HELPER_NAME_LEN]; int ret = 0; rcu_read_lock(); h = __nf_conntrack_helper_find(name, l3num, protonum); if (!h) { rcu_read_unlock(); return -ENOENT; } nat = nf_conntrack_nat_helper_find(h->nat_mod_name); if (!nat) { snprintf(mod_name, sizeof(mod_name), "%s", h->nat_mod_name); rcu_read_unlock(); request_module("%s", mod_name); rcu_read_lock(); nat = nf_conntrack_nat_helper_find(mod_name); if (!nat) { rcu_read_unlock(); return -ENOENT; } } if (!try_module_get(nat->module)) ret = -ENOENT; rcu_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(nf_nat_helper_try_module_get); void nf_nat_helper_put(struct nf_conntrack_helper *helper) { struct nf_conntrack_nat_helper *nat; nat = nf_conntrack_nat_helper_find(helper->nat_mod_name); if (WARN_ON_ONCE(!nat)) return; module_put(nat->module); } EXPORT_SYMBOL_GPL(nf_nat_helper_put); struct nf_conn_help * nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) { struct nf_conn_help *help; help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, gfp); if (help) INIT_HLIST_HEAD(&help->expectations); else pr_debug("failed to add helper extension area"); return help; } EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add); int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, gfp_t flags) { struct nf_conntrack_helper *helper = NULL; struct nf_conn_help *help; /* We already got a helper explicitly attached (e.g. nft_ct) */ if (test_bit(IPS_HELPER_BIT, &ct->status)) return 0; if (WARN_ON_ONCE(!tmpl)) return 0; help = nfct_help(tmpl); if (help != NULL) { helper = rcu_dereference(help->helper); set_bit(IPS_HELPER_BIT, &ct->status); } help = nfct_help(ct); if (helper == NULL) { if (help) RCU_INIT_POINTER(help->helper, NULL); return 0; } if (help == NULL) { help = nf_ct_helper_ext_add(ct, flags); if (help == NULL) return -ENOMEM; } else { /* We only allow helper re-assignment of the same sort since * we cannot reallocate the helper extension area. */ struct nf_conntrack_helper *tmp = rcu_dereference(help->helper); if (tmp && tmp->help != helper->help) { RCU_INIT_POINTER(help->helper, NULL); return 0; } } rcu_assign_pointer(help->helper, helper); return 0; } EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper); /* appropriate ct lock protecting must be taken by caller */ static int unhelp(struct nf_conn *ct, void *me) { struct nf_conn_help *help = nfct_help(ct); if (help && rcu_dereference_raw(help->helper) == me) { nf_conntrack_event(IPCT_HELPER, ct); RCU_INIT_POINTER(help->helper, NULL); } /* We are not intended to delete this conntrack. */ return 0; } void nf_ct_helper_destroy(struct nf_conn *ct) { struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_helper *helper; if (help) { rcu_read_lock(); helper = rcu_dereference(help->helper); if (helper && helper->destroy) helper->destroy(ct); rcu_read_unlock(); } } static LIST_HEAD(nf_ct_helper_expectfn_list); void nf_ct_helper_expectfn_register(struct nf_ct_helper_expectfn *n) { spin_lock_bh(&nf_conntrack_expect_lock); list_add_rcu(&n->head, &nf_ct_helper_expectfn_list); spin_unlock_bh(&nf_conntrack_expect_lock); } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_register); void nf_ct_helper_expectfn_unregister(struct nf_ct_helper_expectfn *n) { spin_lock_bh(&nf_conntrack_expect_lock); list_del_rcu(&n->head); spin_unlock_bh(&nf_conntrack_expect_lock); } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_unregister); /* Caller should hold the rcu lock */ struct nf_ct_helper_expectfn * nf_ct_helper_expectfn_find_by_name(const char *name) { struct nf_ct_helper_expectfn *cur; bool found = false; list_for_each_entry_rcu(cur, &nf_ct_helper_expectfn_list, head) { if (!strcmp(cur->name, name)) { found = true; break; } } return found ? cur : NULL; } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_name); /* Caller should hold the rcu lock */ struct nf_ct_helper_expectfn * nf_ct_helper_expectfn_find_by_symbol(const void *symbol) { struct nf_ct_helper_expectfn *cur; bool found = false; list_for_each_entry_rcu(cur, &nf_ct_helper_expectfn_list, head) { if (cur->expectfn == symbol) { found = true; break; } } return found ? cur : NULL; } EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_symbol); __printf(3, 4) void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct, const char *fmt, ...) { const struct nf_conn_help *help; const struct nf_conntrack_helper *helper; struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; /* Called from the helper function, this call never fails */ help = nfct_help(ct); /* rcu_read_lock()ed by nf_hook_thresh */ helper = rcu_dereference(help->helper); nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, "nf_ct_%s: dropping packet: %pV ", helper->name, &vaf); va_end(args); } EXPORT_SYMBOL_GPL(nf_ct_helper_log); int nf_conntrack_helper_register(struct nf_conntrack_helper *me) { struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) }; unsigned int h = helper_hash(&me->tuple); struct nf_conntrack_helper *cur; int ret = 0, i; BUG_ON(me->expect_policy == NULL); BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES); BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1); if (!nf_ct_helper_hash) return -ENOENT; if (me->expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT) return -EINVAL; mutex_lock(&nf_ct_helper_mutex); for (i = 0; i < nf_ct_helper_hsize; i++) { hlist_for_each_entry(cur, &nf_ct_helper_hash[i], hnode) { if (!strcmp(cur->name, me->name) && (cur->tuple.src.l3num == NFPROTO_UNSPEC || cur->tuple.src.l3num == me->tuple.src.l3num) && cur->tuple.dst.protonum == me->tuple.dst.protonum) { ret = -EBUSY; goto out; } } } /* avoid unpredictable behaviour for auto_assign_helper */ if (!(me->flags & NF_CT_HELPER_F_USERSPACE)) { hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) { if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple, &mask)) { ret = -EBUSY; goto out; } } } refcount_set(&me->refcnt, 1); hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]); nf_ct_helper_count++; out: mutex_unlock(&nf_ct_helper_mutex); return ret; } EXPORT_SYMBOL_GPL(nf_conntrack_helper_register); static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data) { struct nf_conn_help *help = nfct_help(exp->master); const struct nf_conntrack_helper *me = data; const struct nf_conntrack_helper *this; if (exp->helper == me) return true; this = rcu_dereference_protected(help->helper, lockdep_is_held(&nf_conntrack_expect_lock)); return this == me; } void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) { mutex_lock(&nf_ct_helper_mutex); hlist_del_rcu(&me->hnode); nf_ct_helper_count--; mutex_unlock(&nf_ct_helper_mutex); /* Make sure every nothing is still using the helper unless its a * connection in the hash. */ synchronize_rcu(); nf_ct_expect_iterate_destroy(expect_iter_me, NULL); nf_ct_iterate_destroy(unhelp, me); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); void nf_ct_helper_init(struct nf_conntrack_helper *helper, u16 l3num, u16 protonum, const char *name, u16 default_port, u16 spec_port, u32 id, const struct nf_conntrack_expect_policy *exp_pol, u32 expect_class_max, int (*help)(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo), int (*from_nlattr)(struct nlattr *attr, struct nf_conn *ct), struct module *module) { helper->tuple.src.l3num = l3num; helper->tuple.dst.protonum = protonum; helper->tuple.src.u.all = htons(spec_port); helper->expect_policy = exp_pol; helper->expect_class_max = expect_class_max; helper->help = help; helper->from_nlattr = from_nlattr; helper->me = module; snprintf(helper->nat_mod_name, sizeof(helper->nat_mod_name), NF_NAT_HELPER_PREFIX "%s", name); if (spec_port == default_port) snprintf(helper->name, sizeof(helper->name), "%s", name); else snprintf(helper->name, sizeof(helper->name), "%s-%u", name, id); } EXPORT_SYMBOL_GPL(nf_ct_helper_init); int nf_conntrack_helpers_register(struct nf_conntrack_helper *helper, unsigned int n) { unsigned int i; int err = 0; for (i = 0; i < n; i++) { err = nf_conntrack_helper_register(&helper[i]); if (err < 0) goto err; } return err; err: if (i > 0) nf_conntrack_helpers_unregister(helper, i); return err; } EXPORT_SYMBOL_GPL(nf_conntrack_helpers_register); void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *helper, unsigned int n) { while (n-- > 0) nf_conntrack_helper_unregister(&helper[n]); } EXPORT_SYMBOL_GPL(nf_conntrack_helpers_unregister); void nf_nat_helper_register(struct nf_conntrack_nat_helper *nat) { mutex_lock(&nf_ct_nat_helpers_mutex); list_add_rcu(&nat->list, &nf_ct_nat_helpers); mutex_unlock(&nf_ct_nat_helpers_mutex); } EXPORT_SYMBOL_GPL(nf_nat_helper_register); void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat) { mutex_lock(&nf_ct_nat_helpers_mutex); list_del_rcu(&nat->list); mutex_unlock(&nf_ct_nat_helpers_mutex); } EXPORT_SYMBOL_GPL(nf_nat_helper_unregister); int nf_conntrack_helper_init(void) { nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0); if (!nf_ct_helper_hash) return -ENOMEM; INIT_LIST_HEAD(&nf_ct_nat_helpers); return 0; } void nf_conntrack_helper_fini(void) { kvfree(nf_ct_helper_hash); nf_ct_helper_hash = NULL; }
140 139 147 140 147 1 4 4 4 4 151 148 128 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 // SPDX-License-Identifier: GPL-2.0-only /* * 9P entry point * * Copyright (C) 2007 by Latchesar Ionkov <lucho@ionkov.net> * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/kmod.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/moduleparam.h> #include <net/9p/9p.h> #include <linux/fs.h> #include <net/9p/client.h> #include <net/9p/transport.h> #include <linux/list.h> #include <linux/spinlock.h> #ifdef CONFIG_NET_9P_DEBUG unsigned int p9_debug_level; /* feature-rific global debug level */ EXPORT_SYMBOL(p9_debug_level); module_param_named(debug, p9_debug_level, uint, 0); MODULE_PARM_DESC(debug, "9P debugging level"); void _p9_debug(enum p9_debug_flags level, const char *func, const char *fmt, ...) { struct va_format vaf; va_list args; if ((p9_debug_level & level) != level) return; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; if (level == P9_DEBUG_9P) pr_notice("(%8.8d) %pV", task_pid_nr(current), &vaf); else pr_notice("-- %s (%d): %pV", func, task_pid_nr(current), &vaf); va_end(args); } EXPORT_SYMBOL(_p9_debug); #endif /* Dynamic Transport Registration Routines */ static DEFINE_SPINLOCK(v9fs_trans_lock); static LIST_HEAD(v9fs_trans_list); /** * v9fs_register_trans - register a new transport with 9p * @m: structure describing the transport module and entry points * */ void v9fs_register_trans(struct p9_trans_module *m) { spin_lock(&v9fs_trans_lock); list_add_tail(&m->list, &v9fs_trans_list); spin_unlock(&v9fs_trans_lock); } EXPORT_SYMBOL(v9fs_register_trans); /** * v9fs_unregister_trans - unregister a 9p transport * @m: the transport to remove * */ void v9fs_unregister_trans(struct p9_trans_module *m) { spin_lock(&v9fs_trans_lock); list_del_init(&m->list); spin_unlock(&v9fs_trans_lock); } EXPORT_SYMBOL(v9fs_unregister_trans); static struct p9_trans_module *_p9_get_trans_by_name(const char *s) { struct p9_trans_module *t, *found = NULL; spin_lock(&v9fs_trans_lock); list_for_each_entry(t, &v9fs_trans_list, list) if (strcmp(t->name, s) == 0 && try_module_get(t->owner)) { found = t; break; } spin_unlock(&v9fs_trans_lock); return found; } /** * v9fs_get_trans_by_name - get transport with the matching name * @s: string identifying transport * */ struct p9_trans_module *v9fs_get_trans_by_name(const char *s) { struct p9_trans_module *found = NULL; found = _p9_get_trans_by_name(s); #ifdef CONFIG_MODULES if (!found) { request_module("9p-%s", s); found = _p9_get_trans_by_name(s); } #endif return found; } EXPORT_SYMBOL(v9fs_get_trans_by_name); static const char * const v9fs_default_transports[] = { "virtio", "tcp", "fd", "unix", "xen", "rdma", }; /** * v9fs_get_default_trans - get the default transport * */ struct p9_trans_module *v9fs_get_default_trans(void) { struct p9_trans_module *t, *found = NULL; int i; spin_lock(&v9fs_trans_lock); list_for_each_entry(t, &v9fs_trans_list, list) if (t->def && try_module_get(t->owner)) { found = t; break; } if (!found) list_for_each_entry(t, &v9fs_trans_list, list) if (try_module_get(t->owner)) { found = t; break; } spin_unlock(&v9fs_trans_lock); for (i = 0; !found && i < ARRAY_SIZE(v9fs_default_transports); i++) found = v9fs_get_trans_by_name(v9fs_default_transports[i]); return found; } EXPORT_SYMBOL(v9fs_get_default_trans); /** * v9fs_put_trans - put trans * @m: transport to put * */ void v9fs_put_trans(struct p9_trans_module *m) { if (m) module_put(m->owner); } EXPORT_SYMBOL(v9fs_put_trans); /** * init_p9 - Initialize module * */ static int __init init_p9(void) { int ret; ret = p9_client_init(); if (ret) return ret; p9_error_init(); pr_info("Installing 9P2000 support\n"); return ret; } /** * exit_p9 - shutdown module * */ static void __exit exit_p9(void) { pr_info("Unloading 9P2000 support\n"); p9_client_exit(); } module_init(init_p9) module_exit(exit_p9) MODULE_AUTHOR("Latchesar Ionkov <lucho@ionkov.net>"); MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); MODULE_AUTHOR("Ron Minnich <rminnich@lanl.gov>"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Plan 9 Resource Sharing Support (9P2000)");
34 394 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 /* SPDX-License-Identifier: GPL-2.0 */ /* File: linux/xattr.h Extended attributes handling. Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org> Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved. Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> */ #ifndef _LINUX_XATTR_H #define _LINUX_XATTR_H #include <linux/slab.h> #include <linux/types.h> #include <linux/spinlock.h> #include <linux/mm.h> #include <linux/user_namespace.h> #include <uapi/linux/xattr.h> /* List of all open_how "versions". */ #define XATTR_ARGS_SIZE_VER0 16 /* sizeof first published struct */ #define XATTR_ARGS_SIZE_LATEST XATTR_ARGS_SIZE_VER0 struct inode; struct dentry; static inline bool is_posix_acl_xattr(const char *name) { return (strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS) == 0) || (strcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT) == 0); } /* * struct xattr_handler: When @name is set, match attributes with exactly that * name. When @prefix is set instead, match attributes with that prefix and * with a non-empty suffix. */ struct xattr_handler { const char *name; const char *prefix; int flags; /* fs private flags */ bool (*list)(struct dentry *dentry); int (*get)(const struct xattr_handler *, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size); int (*set)(const struct xattr_handler *, struct mnt_idmap *idmap, struct dentry *dentry, struct inode *inode, const char *name, const void *buffer, size_t size, int flags); }; /** * xattr_handler_can_list - check whether xattr can be listed * @handler: handler for this type of xattr * @dentry: dentry whose inode xattr to list * * Determine whether the xattr associated with @dentry can be listed given * @handler. * * Return: true if xattr can be listed, false if not. */ static inline bool xattr_handler_can_list(const struct xattr_handler *handler, struct dentry *dentry) { return handler && (!handler->list || handler->list(dentry)); } const char *xattr_full_name(const struct xattr_handler *, const char *); struct xattr { const char *name; void *value; size_t value_len; }; ssize_t __vfs_getxattr(struct dentry *, struct inode *, const char *, void *, size_t); ssize_t vfs_getxattr(struct mnt_idmap *, struct dentry *, const char *, void *, size_t); ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); int __vfs_setxattr(struct mnt_idmap *, struct dentry *, struct inode *, const char *, const void *, size_t, int); int __vfs_setxattr_noperm(struct mnt_idmap *, struct dentry *, const char *, const void *, size_t, int); int __vfs_setxattr_locked(struct mnt_idmap *, struct dentry *, const char *, const void *, size_t, int, struct delegated_inode *); int vfs_setxattr(struct mnt_idmap *, struct dentry *, const char *, const void *, size_t, int); int __vfs_removexattr(struct mnt_idmap *, struct dentry *, const char *); int __vfs_removexattr_locked(struct mnt_idmap *, struct dentry *, const char *, struct delegated_inode *); int vfs_removexattr(struct mnt_idmap *, struct dentry *, const char *); ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); int vfs_getxattr_alloc(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, char **xattr_value, size_t size, gfp_t flags); int xattr_supports_user_prefix(struct inode *inode); static inline const char *xattr_prefix(const struct xattr_handler *handler) { return handler->prefix ?: handler->name; } struct simple_xattrs { struct rb_root rb_root; rwlock_t lock; }; struct simple_xattr { struct rb_node rb_node; char *name; size_t size; char value[] __counted_by(size); }; void simple_xattrs_init(struct simple_xattrs *xattrs); void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space); size_t simple_xattr_space(const char *name, size_t size); struct simple_xattr *simple_xattr_alloc(const void *value, size_t size); void simple_xattr_free(struct simple_xattr *xattr); int simple_xattr_get(struct simple_xattrs *xattrs, const char *name, void *buffer, size_t size); struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs, const char *name, const void *value, size_t size, int flags); ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, char *buffer, size_t size); void simple_xattr_add(struct simple_xattrs *xattrs, struct simple_xattr *new_xattr); int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name); #endif /* _LINUX_XATTR_H */
6 5 1 5 1 3 4 1 1 2 7 6 4 6 2 1 3 4 2 1 1 44 44 42 1 1 1 3 3 2 2 2 1 1 1 3 3 3 1 2 1 1 3 7 7 7 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 // SPDX-License-Identifier: GPL-2.0 #include <linux/compiler_types.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/fsnotify.h> #include <linux/gfp.h> #include <linux/idr.h> #include <linux/init.h> #include <linux/ipc_namespace.h> #include <linux/kdev_t.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/namei.h> #include <linux/magic.h> #include <linux/major.h> #include <linux/miscdevice.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/mount.h> #include <linux/fs_parser.h> #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/spinlock_types.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> #include <linux/uaccess.h> #include <linux/user_namespace.h> #include <linux/xarray.h> #include <uapi/linux/android/binder.h> #include <uapi/linux/android/binderfs.h> #include "binder_internal.h" #define FIRST_INODE 1 #define SECOND_INODE 2 #define INODE_OFFSET 3 #define BINDERFS_MAX_MINOR (1U << MINORBITS) /* Ensure that the initial ipc namespace always has devices available. */ #define BINDERFS_MAX_MINOR_CAPPED (BINDERFS_MAX_MINOR - 4) static dev_t binderfs_dev; static DEFINE_MUTEX(binderfs_minors_mutex); static DEFINE_IDA(binderfs_minors); enum binderfs_param { Opt_max, Opt_stats_mode, }; enum binderfs_stats_mode { binderfs_stats_mode_unset, binderfs_stats_mode_global, }; struct binder_features { bool oneway_spam_detection; bool extended_error; bool freeze_notification; bool transaction_report; }; static const struct constant_table binderfs_param_stats[] = { { "global", binderfs_stats_mode_global }, {} }; static const struct fs_parameter_spec binderfs_fs_parameters[] = { fsparam_u32("max", Opt_max), fsparam_enum("stats", Opt_stats_mode, binderfs_param_stats), {} }; static struct binder_features binder_features = { .oneway_spam_detection = true, .extended_error = true, .freeze_notification = true, .transaction_report = true, }; static inline struct binderfs_info *BINDERFS_SB(const struct super_block *sb) { return sb->s_fs_info; } bool is_binderfs_device(const struct inode *inode) { if (inode->i_sb->s_magic == BINDERFS_SUPER_MAGIC) return true; return false; } /** * binderfs_binder_device_create - allocate inode from super block of a * binderfs mount * @ref_inode: inode from which the super block will be taken * @userp: buffer to copy information about new device for userspace to * @req: struct binderfs_device as copied from userspace * * This function allocates a new binder_device and reserves a new minor * number for it. * Minor numbers are limited and tracked globally in binderfs_minors. The * function will stash a struct binder_device for the specific binder * device in i_private of the inode. * It will go on to allocate a new inode from the super block of the * filesystem mount, stash a struct binder_device in its i_private field * and attach a dentry to that inode. * * Return: 0 on success, negative errno on failure */ static int binderfs_binder_device_create(struct inode *ref_inode, struct binderfs_device __user *userp, struct binderfs_device *req) { int minor, ret; struct dentry *dentry, *root; struct binder_device *device; char *name = NULL; struct inode *inode = NULL; struct super_block *sb = ref_inode->i_sb; struct binderfs_info *info = sb->s_fs_info; #if defined(CONFIG_IPC_NS) bool use_reserve = (info->ipc_ns == &init_ipc_ns); #else bool use_reserve = true; #endif /* Reserve new minor number for the new device. */ mutex_lock(&binderfs_minors_mutex); if (++info->device_count <= info->mount_opts.max) minor = ida_alloc_max(&binderfs_minors, use_reserve ? BINDERFS_MAX_MINOR : BINDERFS_MAX_MINOR_CAPPED, GFP_KERNEL); else minor = -ENOSPC; if (minor < 0) { --info->device_count; mutex_unlock(&binderfs_minors_mutex); return minor; } mutex_unlock(&binderfs_minors_mutex); ret = -ENOMEM; device = kzalloc(sizeof(*device), GFP_KERNEL); if (!device) goto err; inode = new_inode(sb); if (!inode) goto err; inode->i_ino = minor + INODE_OFFSET; simple_inode_init_ts(inode); init_special_inode(inode, S_IFCHR | 0600, MKDEV(MAJOR(binderfs_dev), minor)); inode->i_fop = &binder_fops; inode->i_uid = info->root_uid; inode->i_gid = info->root_gid; req->name[BINDERFS_MAX_NAME] = '\0'; /* NUL-terminate */ name = kstrdup(req->name, GFP_KERNEL); if (!name) goto err; refcount_set(&device->ref, 1); device->binderfs_inode = inode; device->context.binder_context_mgr_uid = INVALID_UID; device->context.name = name; device->miscdev.name = name; device->miscdev.minor = minor; mutex_init(&device->context.context_mgr_node_lock); req->major = MAJOR(binderfs_dev); req->minor = minor; if (userp && copy_to_user(userp, req, sizeof(*req))) { ret = -EFAULT; goto err; } root = sb->s_root; dentry = simple_start_creating(root, name); if (IS_ERR(dentry)) { ret = PTR_ERR(dentry); goto err; } inode->i_private = device; d_make_persistent(dentry, inode); fsnotify_create(root->d_inode, dentry); simple_done_creating(dentry); binder_add_device(device); return 0; err: kfree(name); kfree(device); mutex_lock(&binderfs_minors_mutex); --info->device_count; ida_free(&binderfs_minors, minor); mutex_unlock(&binderfs_minors_mutex); iput(inode); return ret; } /** * binder_ctl_ioctl - handle binder device node allocation requests * @file: The file pointer for the binder-control device node. * @cmd: The ioctl command. * @arg: The ioctl argument. * * The request handler for the binder-control device. All requests operate on * the binderfs mount the binder-control device resides in: * - BINDER_CTL_ADD * Allocate a new binder device. * * Return: %0 on success, negative errno on failure. */ static long binder_ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { int ret = -EINVAL; struct inode *inode = file_inode(file); struct binderfs_device __user *device = (struct binderfs_device __user *)arg; struct binderfs_device device_req; switch (cmd) { case BINDER_CTL_ADD: ret = copy_from_user(&device_req, device, sizeof(device_req)); if (ret) { ret = -EFAULT; break; } ret = binderfs_binder_device_create(inode, device, &device_req); break; default: break; } return ret; } static void binderfs_evict_inode(struct inode *inode) { struct binder_device *device = inode->i_private; struct binderfs_info *info = BINDERFS_SB(inode->i_sb); clear_inode(inode); if (!S_ISCHR(inode->i_mode) || !device) return; mutex_lock(&binderfs_minors_mutex); --info->device_count; ida_free(&binderfs_minors, device->miscdev.minor); mutex_unlock(&binderfs_minors_mutex); if (refcount_dec_and_test(&device->ref)) { binder_remove_device(device); kfree(device->context.name); kfree(device); } } static int binderfs_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param) { int opt; struct binderfs_mount_opts *ctx = fc->fs_private; struct fs_parse_result result; opt = fs_parse(fc, binderfs_fs_parameters, param, &result); if (opt < 0) return opt; switch (opt) { case Opt_max: if (result.uint_32 > BINDERFS_MAX_MINOR) return invalfc(fc, "Bad value for '%s'", param->key); ctx->max = result.uint_32; break; case Opt_stats_mode: if (!capable(CAP_SYS_ADMIN)) return -EPERM; ctx->stats_mode = result.uint_32; break; default: return invalfc(fc, "Unsupported parameter '%s'", param->key); } return 0; } static int binderfs_fs_context_reconfigure(struct fs_context *fc) { struct binderfs_mount_opts *ctx = fc->fs_private; struct binderfs_info *info = BINDERFS_SB(fc->root->d_sb); if (info->mount_opts.stats_mode != ctx->stats_mode) return invalfc(fc, "Binderfs stats mode cannot be changed during a remount"); info->mount_opts.stats_mode = ctx->stats_mode; info->mount_opts.max = ctx->max; return 0; } static int binderfs_show_options(struct seq_file *seq, struct dentry *root) { struct binderfs_info *info = BINDERFS_SB(root->d_sb); if (info->mount_opts.max <= BINDERFS_MAX_MINOR) seq_printf(seq, ",max=%d", info->mount_opts.max); switch (info->mount_opts.stats_mode) { case binderfs_stats_mode_unset: break; case binderfs_stats_mode_global: seq_printf(seq, ",stats=global"); break; } return 0; } static const struct super_operations binderfs_super_ops = { .evict_inode = binderfs_evict_inode, .show_options = binderfs_show_options, .statfs = simple_statfs, }; static inline bool is_binderfs_control_device(const struct dentry *dentry) { struct binderfs_info *info = dentry->d_sb->s_fs_info; return info->control_dentry == dentry; } static int binderfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { if (is_binderfs_control_device(old_dentry) || is_binderfs_control_device(new_dentry)) return -EPERM; return simple_rename(idmap, old_dir, old_dentry, new_dir, new_dentry, flags); } static int binderfs_unlink(struct inode *dir, struct dentry *dentry) { if (is_binderfs_control_device(dentry)) return -EPERM; return simple_unlink(dir, dentry); } static const struct file_operations binder_ctl_fops = { .owner = THIS_MODULE, .open = nonseekable_open, .unlocked_ioctl = binder_ctl_ioctl, .compat_ioctl = binder_ctl_ioctl, .llseek = noop_llseek, }; /** * binderfs_binder_ctl_create - create a new binder-control device * @sb: super block of the binderfs mount * * This function creates a new binder-control device node in the binderfs mount * referred to by @sb. * * Return: 0 on success, negative errno on failure */ static int binderfs_binder_ctl_create(struct super_block *sb) { int minor, ret; struct dentry *dentry; struct binder_device *device; struct inode *inode = NULL; struct dentry *root = sb->s_root; struct binderfs_info *info = sb->s_fs_info; #if defined(CONFIG_IPC_NS) bool use_reserve = (info->ipc_ns == &init_ipc_ns); #else bool use_reserve = true; #endif device = kzalloc(sizeof(*device), GFP_KERNEL); if (!device) return -ENOMEM; ret = -ENOMEM; inode = new_inode(sb); if (!inode) goto out; /* Reserve a new minor number for the new device. */ mutex_lock(&binderfs_minors_mutex); minor = ida_alloc_max(&binderfs_minors, use_reserve ? BINDERFS_MAX_MINOR : BINDERFS_MAX_MINOR_CAPPED, GFP_KERNEL); mutex_unlock(&binderfs_minors_mutex); if (minor < 0) { ret = minor; goto out; } inode->i_ino = SECOND_INODE; simple_inode_init_ts(inode); init_special_inode(inode, S_IFCHR | 0600, MKDEV(MAJOR(binderfs_dev), minor)); inode->i_fop = &binder_ctl_fops; inode->i_uid = info->root_uid; inode->i_gid = info->root_gid; refcount_set(&device->ref, 1); device->binderfs_inode = inode; device->miscdev.minor = minor; dentry = d_alloc_name(root, "binder-control"); if (!dentry) goto out; inode->i_private = device; info->control_dentry = dentry; d_make_persistent(dentry, inode); dput(dentry); return 0; out: kfree(device); iput(inode); return ret; } static const struct inode_operations binderfs_dir_inode_operations = { .lookup = simple_lookup, .rename = binderfs_rename, .unlink = binderfs_unlink, }; static struct inode *binderfs_make_inode(struct super_block *sb, int mode) { struct inode *ret; ret = new_inode(sb); if (ret) { ret->i_ino = iunique(sb, BINDERFS_MAX_MINOR + INODE_OFFSET); ret->i_mode = mode; simple_inode_init_ts(ret); } return ret; } struct dentry *binderfs_create_file(struct dentry *parent, const char *name, const struct file_operations *fops, void *data) { struct dentry *dentry; struct inode *new_inode, *parent_inode; struct super_block *sb; parent_inode = d_inode(parent); dentry = simple_start_creating(parent, name); if (IS_ERR(dentry)) return dentry; sb = parent_inode->i_sb; new_inode = binderfs_make_inode(sb, S_IFREG | 0444); if (!new_inode) { simple_done_creating(dentry); return ERR_PTR(-ENOMEM); } new_inode->i_fop = fops; new_inode->i_private = data; d_make_persistent(dentry, new_inode); fsnotify_create(parent_inode, dentry); simple_done_creating(dentry); return dentry; // borrowed } static struct dentry *binderfs_create_dir(struct dentry *parent, const char *name) { struct dentry *dentry; struct inode *new_inode, *parent_inode; struct super_block *sb; parent_inode = d_inode(parent); dentry = simple_start_creating(parent, name); if (IS_ERR(dentry)) return dentry; sb = parent_inode->i_sb; new_inode = binderfs_make_inode(sb, S_IFDIR | 0755); if (!new_inode) { simple_done_creating(dentry); return ERR_PTR(-ENOMEM); } new_inode->i_fop = &simple_dir_operations; new_inode->i_op = &simple_dir_inode_operations; set_nlink(new_inode, 2); d_make_persistent(dentry, new_inode); inc_nlink(parent_inode); fsnotify_mkdir(parent_inode, dentry); simple_done_creating(dentry); return dentry; } static int binder_features_show(struct seq_file *m, void *unused) { bool *feature = m->private; seq_printf(m, "%d\n", *feature); return 0; } DEFINE_SHOW_ATTRIBUTE(binder_features); static int init_binder_features(struct super_block *sb) { struct dentry *dentry, *dir; dir = binderfs_create_dir(sb->s_root, "features"); if (IS_ERR(dir)) return PTR_ERR(dir); dentry = binderfs_create_file(dir, "oneway_spam_detection", &binder_features_fops, &binder_features.oneway_spam_detection); if (IS_ERR(dentry)) return PTR_ERR(dentry); dentry = binderfs_create_file(dir, "extended_error", &binder_features_fops, &binder_features.extended_error); if (IS_ERR(dentry)) return PTR_ERR(dentry); dentry = binderfs_create_file(dir, "freeze_notification", &binder_features_fops, &binder_features.freeze_notification); if (IS_ERR(dentry)) return PTR_ERR(dentry); dentry = binderfs_create_file(dir, "transaction_report", &binder_features_fops, &binder_features.transaction_report); if (IS_ERR(dentry)) return PTR_ERR(dentry); return 0; } static int init_binder_logs(struct super_block *sb) { struct dentry *binder_logs_root_dir, *dentry, *proc_log_dir; const struct binder_debugfs_entry *db_entry; struct binderfs_info *info; int ret = 0; binder_logs_root_dir = binderfs_create_dir(sb->s_root, "binder_logs"); if (IS_ERR(binder_logs_root_dir)) { ret = PTR_ERR(binder_logs_root_dir); goto out; } binder_for_each_debugfs_entry(db_entry) { dentry = binderfs_create_file(binder_logs_root_dir, db_entry->name, db_entry->fops, db_entry->data); if (IS_ERR(dentry)) { ret = PTR_ERR(dentry); goto out; } } proc_log_dir = binderfs_create_dir(binder_logs_root_dir, "proc"); if (IS_ERR(proc_log_dir)) { ret = PTR_ERR(proc_log_dir); goto out; } info = sb->s_fs_info; info->proc_log_dir = proc_log_dir; out: return ret; } static int binderfs_fill_super(struct super_block *sb, struct fs_context *fc) { int ret; struct binderfs_info *info; struct binderfs_mount_opts *ctx = fc->fs_private; struct inode *inode = NULL; struct binderfs_device device_info = {}; const char *name; size_t len; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; /* * The binderfs filesystem can be mounted by userns root in a * non-initial userns. By default such mounts have the SB_I_NODEV flag * set in s_iflags to prevent security issues where userns root can * just create random device nodes via mknod() since it owns the * filesystem mount. But binderfs does not allow to create any files * including devices nodes. The only way to create binder devices nodes * is through the binder-control device which userns root is explicitly * allowed to do. So removing the SB_I_NODEV flag from s_iflags is both * necessary and safe. */ sb->s_iflags &= ~SB_I_NODEV; sb->s_iflags |= SB_I_NOEXEC; sb->s_magic = BINDERFS_SUPER_MAGIC; sb->s_op = &binderfs_super_ops; sb->s_time_gran = 1; sb->s_fs_info = kzalloc(sizeof(struct binderfs_info), GFP_KERNEL); if (!sb->s_fs_info) return -ENOMEM; info = sb->s_fs_info; info->ipc_ns = get_ipc_ns(current->nsproxy->ipc_ns); info->root_gid = make_kgid(sb->s_user_ns, 0); if (!gid_valid(info->root_gid)) info->root_gid = GLOBAL_ROOT_GID; info->root_uid = make_kuid(sb->s_user_ns, 0); if (!uid_valid(info->root_uid)) info->root_uid = GLOBAL_ROOT_UID; info->mount_opts.max = ctx->max; info->mount_opts.stats_mode = ctx->stats_mode; inode = new_inode(sb); if (!inode) return -ENOMEM; inode->i_ino = FIRST_INODE; inode->i_fop = &simple_dir_operations; inode->i_mode = S_IFDIR | 0755; simple_inode_init_ts(inode); inode->i_op = &binderfs_dir_inode_operations; set_nlink(inode, 2); sb->s_root = d_make_root(inode); if (!sb->s_root) return -ENOMEM; ret = binderfs_binder_ctl_create(sb); if (ret) return ret; name = binder_devices_param; for (len = strcspn(name, ","); len > 0; len = strcspn(name, ",")) { strscpy(device_info.name, name, len + 1); ret = binderfs_binder_device_create(inode, NULL, &device_info); if (ret) return ret; name += len; if (*name == ',') name++; } ret = init_binder_features(sb); if (ret) return ret; if (info->mount_opts.stats_mode == binderfs_stats_mode_global) return init_binder_logs(sb); return 0; } static int binderfs_fs_context_get_tree(struct fs_context *fc) { return get_tree_nodev(fc, binderfs_fill_super); } static void binderfs_fs_context_free(struct fs_context *fc) { struct binderfs_mount_opts *ctx = fc->fs_private; kfree(ctx); } static const struct fs_context_operations binderfs_fs_context_ops = { .free = binderfs_fs_context_free, .get_tree = binderfs_fs_context_get_tree, .parse_param = binderfs_fs_context_parse_param, .reconfigure = binderfs_fs_context_reconfigure, }; static int binderfs_init_fs_context(struct fs_context *fc) { struct binderfs_mount_opts *ctx; ctx = kzalloc(sizeof(struct binderfs_mount_opts), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->max = BINDERFS_MAX_MINOR; ctx->stats_mode = binderfs_stats_mode_unset; fc->fs_private = ctx; fc->ops = &binderfs_fs_context_ops; return 0; } static void binderfs_kill_super(struct super_block *sb) { struct binderfs_info *info = sb->s_fs_info; /* * During inode eviction struct binderfs_info is needed. * So first wipe the super_block then free struct binderfs_info. */ kill_anon_super(sb); if (info && info->ipc_ns) put_ipc_ns(info->ipc_ns); kfree(info); } static struct file_system_type binder_fs_type = { .name = "binder", .init_fs_context = binderfs_init_fs_context, .parameters = binderfs_fs_parameters, .kill_sb = binderfs_kill_super, .fs_flags = FS_USERNS_MOUNT, }; int __init init_binderfs(void) { int ret; const char *name; size_t len; /* Verify that the default binderfs device names are valid. */ name = binder_devices_param; for (len = strcspn(name, ","); len > 0; len = strcspn(name, ",")) { if (len > BINDERFS_MAX_NAME) return -E2BIG; name += len; if (*name == ',') name++; } /* Allocate new major number for binderfs. */ ret = alloc_chrdev_region(&binderfs_dev, 0, BINDERFS_MAX_MINOR, "binder"); if (ret) return ret; ret = register_filesystem(&binder_fs_type); if (ret) { unregister_chrdev_region(binderfs_dev, BINDERFS_MAX_MINOR); return ret; } return ret; }
2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 // SPDX-License-Identifier: GPL-2.0 /* * Handling of different ABIs (personalities). * * We group personalities into execution domains which have their * own handlers for kernel entry points, signal mapping, etc... * * 2001-05-06 Complete rewrite, Christoph Hellwig (hch@infradead.org) */ #include <linux/init.h> #include <linux/kernel.h> #include <linux/kmod.h> #include <linux/module.h> #include <linux/personality.h> #include <linux/proc_fs.h> #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/syscalls.h> #include <linux/sysctl.h> #include <linux/types.h> #ifdef CONFIG_PROC_FS static int execdomains_proc_show(struct seq_file *m, void *v) { seq_puts(m, "0-0\tLinux \t[kernel]\n"); return 0; } static int __init proc_execdomains_init(void) { proc_create_single("execdomains", 0, NULL, execdomains_proc_show); return 0; } module_init(proc_execdomains_init); #endif SYSCALL_DEFINE1(personality, unsigned int, personality) { unsigned int old = current->personality; if (personality != 0xffffffff) set_personality(personality); return old; }
185 146 4 45 46 261 185 79 260 167 102 15 15 15 272 274 273 272 28 249 12 120 136 2 17 17 38 11 3 31 26 14 1 4 5 48 1 11 34 14 15 2 13 35 8 6 7 2 15 4 17 17 1 15 15 1 3 12 12 1 12 8 8 15 1 12 12 6 1 6 2 3 28 27 7 20 5 294 294 292 2 53 53 53 2 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/stat.c * * Copyright (C) 1991, 1992 Linus Torvalds */ #include <linux/blkdev.h> #include <linux/export.h> #include <linux/mm.h> #include <linux/errno.h> #include <linux/file.h> #include <linux/highuid.h> #include <linux/fs.h> #include <linux/namei.h> #include <linux/security.h> #include <linux/cred.h> #include <linux/syscalls.h> #include <linux/pagemap.h> #include <linux/compat.h> #include <linux/iversion.h> #include <linux/uaccess.h> #include <asm/unistd.h> #include <trace/events/timestamp.h> #include "internal.h" #include "mount.h" /** * fill_mg_cmtime - Fill in the mtime and ctime and flag ctime as QUERIED * @stat: where to store the resulting values * @request_mask: STATX_* values requested * @inode: inode from which to grab the c/mtime * * Given @inode, grab the ctime and mtime out if it and store the result * in @stat. When fetching the value, flag it as QUERIED (if not already) * so the next write will record a distinct timestamp. * * NB: The QUERIED flag is tracked in the ctime, but we set it there even * if only the mtime was requested, as that ensures that the next mtime * change will be distinct. */ void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode) { atomic_t *pcn = (atomic_t *)&inode->i_ctime_nsec; /* If neither time was requested, then don't report them */ if (!(request_mask & (STATX_CTIME|STATX_MTIME))) { stat->result_mask &= ~(STATX_CTIME|STATX_MTIME); return; } stat->mtime = inode_get_mtime(inode); stat->ctime.tv_sec = inode->i_ctime_sec; stat->ctime.tv_nsec = (u32)atomic_read(pcn); if (!(stat->ctime.tv_nsec & I_CTIME_QUERIED)) stat->ctime.tv_nsec = ((u32)atomic_fetch_or(I_CTIME_QUERIED, pcn)); stat->ctime.tv_nsec &= ~I_CTIME_QUERIED; trace_fill_mg_cmtime(inode, &stat->ctime, &stat->mtime); } EXPORT_SYMBOL(fill_mg_cmtime); /** * generic_fillattr - Fill in the basic attributes from the inode struct * @idmap: idmap of the mount the inode was found from * @request_mask: statx request_mask * @inode: Inode to use as the source * @stat: Where to fill in the attributes * * Fill in the basic attributes in the kstat structure from data that's to be * found on the VFS inode structure. This is the default if no getattr inode * operation is supplied. * * If the inode has been found through an idmapped mount the idmap of * the vfsmount must be passed through @idmap. This function will then * take care to map the inode according to @idmap before filling in the * uid and gid filds. On non-idmapped mounts or if permission checking is to be * performed on the raw inode simply pass @nop_mnt_idmap. */ void generic_fillattr(struct mnt_idmap *idmap, u32 request_mask, struct inode *inode, struct kstat *stat) { vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode); vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode); stat->dev = inode->i_sb->s_dev; stat->ino = inode->i_ino; stat->mode = inode->i_mode; stat->nlink = inode->i_nlink; stat->uid = vfsuid_into_kuid(vfsuid); stat->gid = vfsgid_into_kgid(vfsgid); stat->rdev = inode->i_rdev; stat->size = i_size_read(inode); stat->atime = inode_get_atime(inode); if (is_mgtime(inode)) { fill_mg_cmtime(stat, request_mask, inode); } else { stat->ctime = inode_get_ctime(inode); stat->mtime = inode_get_mtime(inode); } stat->blksize = i_blocksize(inode); stat->blocks = inode->i_blocks; if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) { stat->result_mask |= STATX_CHANGE_COOKIE; stat->change_cookie = inode_query_iversion(inode); } } EXPORT_SYMBOL(generic_fillattr); /** * generic_fill_statx_attr - Fill in the statx attributes from the inode flags * @inode: Inode to use as the source * @stat: Where to fill in the attribute flags * * Fill in the STATX_ATTR_* flags in the kstat structure for properties of the * inode that are published on i_flags and enforced by the VFS. */ void generic_fill_statx_attr(struct inode *inode, struct kstat *stat) { if (inode->i_flags & S_IMMUTABLE) stat->attributes |= STATX_ATTR_IMMUTABLE; if (inode->i_flags & S_APPEND) stat->attributes |= STATX_ATTR_APPEND; stat->attributes_mask |= KSTAT_ATTR_VFS_FLAGS; } EXPORT_SYMBOL(generic_fill_statx_attr); /** * generic_fill_statx_atomic_writes - Fill in atomic writes statx attributes * @stat: Where to fill in the attribute flags * @unit_min: Minimum supported atomic write length in bytes * @unit_max: Maximum supported atomic write length in bytes * @unit_max_opt: Optimised maximum supported atomic write length in bytes * * Fill in the STATX{_ATTR}_WRITE_ATOMIC flags in the kstat structure from * atomic write unit_min and unit_max values. */ void generic_fill_statx_atomic_writes(struct kstat *stat, unsigned int unit_min, unsigned int unit_max, unsigned int unit_max_opt) { /* Confirm that the request type is known */ stat->result_mask |= STATX_WRITE_ATOMIC; /* Confirm that the file attribute type is known */ stat->attributes_mask |= STATX_ATTR_WRITE_ATOMIC; if (unit_min) { stat->atomic_write_unit_min = unit_min; stat->atomic_write_unit_max = unit_max; stat->atomic_write_unit_max_opt = unit_max_opt; /* Initially only allow 1x segment */ stat->atomic_write_segments_max = 1; /* Confirm atomic writes are actually supported */ stat->attributes |= STATX_ATTR_WRITE_ATOMIC; } } EXPORT_SYMBOL_GPL(generic_fill_statx_atomic_writes); /** * vfs_getattr_nosec - getattr without security checks * @path: file to get attributes from * @stat: structure to return attributes in * @request_mask: STATX_xxx flags indicating what the caller wants * @query_flags: Query mode (AT_STATX_SYNC_TYPE) * * Get attributes without calling security_inode_getattr. * * Currently the only caller other than vfs_getattr is internal to the * filehandle lookup code, which uses only the inode number and returns no * attributes to any user. Any other code probably wants vfs_getattr. */ int vfs_getattr_nosec(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct mnt_idmap *idmap; struct inode *inode = d_backing_inode(path->dentry); memset(stat, 0, sizeof(*stat)); stat->result_mask |= STATX_BASIC_STATS; query_flags &= AT_STATX_SYNC_TYPE; /* allow the fs to override these if it really wants to */ /* SB_NOATIME means filesystem supplies dummy atime value */ if (inode->i_sb->s_flags & SB_NOATIME) stat->result_mask &= ~STATX_ATIME; /* * Note: If you add another clause to set an attribute flag, please * update attributes_mask below. */ if (IS_AUTOMOUNT(inode)) stat->attributes |= STATX_ATTR_AUTOMOUNT; if (IS_DAX(inode)) stat->attributes |= STATX_ATTR_DAX; stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT | STATX_ATTR_DAX); idmap = mnt_idmap(path->mnt); if (inode->i_op->getattr) { int ret; ret = inode->i_op->getattr(idmap, path, stat, request_mask, query_flags); if (ret) return ret; } else { generic_fillattr(idmap, request_mask, inode, stat); } /* * If this is a block device inode, override the filesystem attributes * with the block device specific parameters that need to be obtained * from the bdev backing inode. */ if (S_ISBLK(stat->mode)) bdev_statx(path, stat, request_mask); return 0; } EXPORT_SYMBOL(vfs_getattr_nosec); /* * vfs_getattr - Get the enhanced basic attributes of a file * @path: The file of interest * @stat: Where to return the statistics * @request_mask: STATX_xxx flags indicating what the caller wants * @query_flags: Query mode (AT_STATX_SYNC_TYPE) * * Ask the filesystem for a file's attributes. The caller must indicate in * request_mask and query_flags to indicate what they want. * * If the file is remote, the filesystem can be forced to update the attributes * from the backing store by passing AT_STATX_FORCE_SYNC in query_flags or can * suppress the update by passing AT_STATX_DONT_SYNC. * * Bits must have been set in request_mask to indicate which attributes the * caller wants retrieving. Any such attribute not requested may be returned * anyway, but the value may be approximate, and, if remote, may not have been * synchronised with the server. * * 0 will be returned on success, and a -ve error code if unsuccessful. */ int vfs_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { int retval; retval = security_inode_getattr(path); if (unlikely(retval)) return retval; return vfs_getattr_nosec(path, stat, request_mask, query_flags); } EXPORT_SYMBOL(vfs_getattr); /** * vfs_fstat - Get the basic attributes by file descriptor * @fd: The file descriptor referring to the file of interest * @stat: The result structure to fill in. * * This function is a wrapper around vfs_getattr(). The main difference is * that it uses a file descriptor to determine the file location. * * 0 will be returned on success, and a -ve error code if unsuccessful. */ int vfs_fstat(int fd, struct kstat *stat) { CLASS(fd_raw, f)(fd); if (fd_empty(f)) return -EBADF; return vfs_getattr(&fd_file(f)->f_path, stat, STATX_BASIC_STATS, 0); } static int statx_lookup_flags(int flags) { int lookup_flags = 0; if (!(flags & AT_SYMLINK_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; if (!(flags & AT_NO_AUTOMOUNT)) lookup_flags |= LOOKUP_AUTOMOUNT; return lookup_flags; } static int vfs_statx_path(const struct path *path, int flags, struct kstat *stat, u32 request_mask) { int error = vfs_getattr(path, stat, request_mask, flags); if (error) return error; if (request_mask & STATX_MNT_ID_UNIQUE) { stat->mnt_id = real_mount(path->mnt)->mnt_id_unique; stat->result_mask |= STATX_MNT_ID_UNIQUE; } else { stat->mnt_id = real_mount(path->mnt)->mnt_id; stat->result_mask |= STATX_MNT_ID; } if (path_mounted(path)) stat->attributes |= STATX_ATTR_MOUNT_ROOT; stat->attributes_mask |= STATX_ATTR_MOUNT_ROOT; return 0; } static int vfs_statx_fd(int fd, int flags, struct kstat *stat, u32 request_mask) { CLASS(fd_raw, f)(fd); if (fd_empty(f)) return -EBADF; return vfs_statx_path(&fd_file(f)->f_path, flags, stat, request_mask); } /** * vfs_statx - Get basic and extra attributes by filename * @dfd: A file descriptor representing the base dir for a relative filename * @filename: The name of the file of interest * @flags: Flags to control the query * @stat: The result structure to fill in. * @request_mask: STATX_xxx flags indicating what the caller wants * * This function is a wrapper around vfs_getattr(). The main difference is * that it uses a filename and base directory to determine the file location. * Additionally, the use of AT_SYMLINK_NOFOLLOW in flags will prevent a symlink * at the given name from being referenced. * * 0 will be returned on success, and a -ve error code if unsuccessful. */ static int vfs_statx(int dfd, struct filename *filename, int flags, struct kstat *stat, u32 request_mask) { struct path path; unsigned int lookup_flags = statx_lookup_flags(flags); int error; if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT | AT_EMPTY_PATH | AT_STATX_SYNC_TYPE)) return -EINVAL; retry: error = filename_lookup(dfd, filename, lookup_flags, &path, NULL); if (error) return error; error = vfs_statx_path(&path, flags, stat, request_mask); path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } return error; } int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, int flags) { CLASS(filename_maybe_null, name)(filename, flags); if (!name && dfd >= 0) return vfs_fstat(dfd, stat); return vfs_statx(dfd, name, flags | AT_NO_AUTOMOUNT, stat, STATX_BASIC_STATS); } #ifdef __ARCH_WANT_OLD_STAT /* * For backward compatibility? Maybe this should be moved * into arch/i386 instead? */ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * statbuf) { static int warncount = 5; struct __old_kernel_stat tmp; if (warncount > 0) { warncount--; printk(KERN_WARNING "VFS: Warning: %s using old stat() call. Recompile your binary.\n", current->comm); } else if (warncount < 0) { /* it's laughable, but... */ warncount = 0; } memset(&tmp, 0, sizeof(struct __old_kernel_stat)); tmp.st_dev = old_encode_dev(stat->dev); tmp.st_ino = stat->ino; if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) return -EOVERFLOW; tmp.st_mode = stat->mode; tmp.st_nlink = stat->nlink; if (tmp.st_nlink != stat->nlink) return -EOVERFLOW; SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); tmp.st_rdev = old_encode_dev(stat->rdev); #if BITS_PER_LONG == 32 if (stat->size > MAX_NON_LFS) return -EOVERFLOW; #endif tmp.st_size = stat->size; tmp.st_atime = stat->atime.tv_sec; tmp.st_mtime = stat->mtime.tv_sec; tmp.st_ctime = stat->ctime.tv_sec; return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; } SYSCALL_DEFINE2(stat, const char __user *, filename, struct __old_kernel_stat __user *, statbuf) { struct kstat stat; int error; error = vfs_stat(filename, &stat); if (unlikely(error)) return error; return cp_old_stat(&stat, statbuf); } SYSCALL_DEFINE2(lstat, const char __user *, filename, struct __old_kernel_stat __user *, statbuf) { struct kstat stat; int error; error = vfs_lstat(filename, &stat); if (unlikely(error)) return error; return cp_old_stat(&stat, statbuf); } SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf) { struct kstat stat; int error; error = vfs_fstat(fd, &stat); if (unlikely(error)) return error; return cp_old_stat(&stat, statbuf); } #endif /* __ARCH_WANT_OLD_STAT */ #ifdef __ARCH_WANT_NEW_STAT #ifndef INIT_STRUCT_STAT_PADDING # define INIT_STRUCT_STAT_PADDING(st) memset(&st, 0, sizeof(st)) #endif static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) { struct stat tmp; if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev)) return -EOVERFLOW; if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev)) return -EOVERFLOW; #if BITS_PER_LONG == 32 if (stat->size > MAX_NON_LFS) return -EOVERFLOW; #endif INIT_STRUCT_STAT_PADDING(tmp); tmp.st_dev = new_encode_dev(stat->dev); tmp.st_ino = stat->ino; if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) return -EOVERFLOW; tmp.st_mode = stat->mode; tmp.st_nlink = stat->nlink; if (tmp.st_nlink != stat->nlink) return -EOVERFLOW; SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); tmp.st_rdev = new_encode_dev(stat->rdev); tmp.st_size = stat->size; tmp.st_atime = stat->atime.tv_sec; tmp.st_mtime = stat->mtime.tv_sec; tmp.st_ctime = stat->ctime.tv_sec; #ifdef STAT_HAVE_NSEC tmp.st_atime_nsec = stat->atime.tv_nsec; tmp.st_mtime_nsec = stat->mtime.tv_nsec; tmp.st_ctime_nsec = stat->ctime.tv_nsec; #endif tmp.st_blocks = stat->blocks; tmp.st_blksize = stat->blksize; return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; } SYSCALL_DEFINE2(newstat, const char __user *, filename, struct stat __user *, statbuf) { struct kstat stat; int error; error = vfs_stat(filename, &stat); if (unlikely(error)) return error; return cp_new_stat(&stat, statbuf); } SYSCALL_DEFINE2(newlstat, const char __user *, filename, struct stat __user *, statbuf) { struct kstat stat; int error; error = vfs_lstat(filename, &stat); if (unlikely(error)) return error; return cp_new_stat(&stat, statbuf); } #if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT) SYSCALL_DEFINE4(newfstatat, int, dfd, const char __user *, filename, struct stat __user *, statbuf, int, flag) { struct kstat stat; int error; error = vfs_fstatat(dfd, filename, &stat, flag); if (unlikely(error)) return error; return cp_new_stat(&stat, statbuf); } #endif SYSCALL_DEFINE2(newfstat, unsigned int, fd, struct stat __user *, statbuf) { struct kstat stat; int error; error = vfs_fstat(fd, &stat); if (unlikely(error)) return error; return cp_new_stat(&stat, statbuf); } #endif static int do_readlinkat(int dfd, const char __user *pathname, char __user *buf, int bufsiz) { struct path path; int error; unsigned int lookup_flags = 0; if (bufsiz <= 0) return -EINVAL; CLASS(filename_flags, name)(pathname, LOOKUP_EMPTY); retry: error = filename_lookup(dfd, name, lookup_flags, &path, NULL); if (unlikely(error)) return error; /* * AFS mountpoints allow readlink(2) but are not symlinks */ if (d_is_symlink(path.dentry) || d_backing_inode(path.dentry)->i_op->readlink) { error = security_inode_readlink(path.dentry); if (!error) { touch_atime(&path); error = vfs_readlink(path.dentry, buf, bufsiz); } } else { error = (name->name[0] == '\0') ? -ENOENT : -EINVAL; } path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } return error; } SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname, char __user *, buf, int, bufsiz) { return do_readlinkat(dfd, pathname, buf, bufsiz); } SYSCALL_DEFINE3(readlink, const char __user *, path, char __user *, buf, int, bufsiz) { return do_readlinkat(AT_FDCWD, path, buf, bufsiz); } /* ---------- LFS-64 ----------- */ #if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64) #ifndef INIT_STRUCT_STAT64_PADDING # define INIT_STRUCT_STAT64_PADDING(st) memset(&st, 0, sizeof(st)) #endif static long cp_new_stat64(struct kstat *stat, struct stat64 __user *statbuf) { struct stat64 tmp; INIT_STRUCT_STAT64_PADDING(tmp); #ifdef CONFIG_MIPS /* mips has weird padding, so we don't get 64 bits there */ tmp.st_dev = new_encode_dev(stat->dev); tmp.st_rdev = new_encode_dev(stat->rdev); #else tmp.st_dev = huge_encode_dev(stat->dev); tmp.st_rdev = huge_encode_dev(stat->rdev); #endif tmp.st_ino = stat->ino; if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) return -EOVERFLOW; #ifdef STAT64_HAS_BROKEN_ST_INO tmp.__st_ino = stat->ino; #endif tmp.st_mode = stat->mode; tmp.st_nlink = stat->nlink; tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid); tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid); tmp.st_atime = stat->atime.tv_sec; tmp.st_atime_nsec = stat->atime.tv_nsec; tmp.st_mtime = stat->mtime.tv_sec; tmp.st_mtime_nsec = stat->mtime.tv_nsec; tmp.st_ctime = stat->ctime.tv_sec; tmp.st_ctime_nsec = stat->ctime.tv_nsec; tmp.st_size = stat->size; tmp.st_blocks = stat->blocks; tmp.st_blksize = stat->blksize; return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; } SYSCALL_DEFINE2(stat64, const char __user *, filename, struct stat64 __user *, statbuf) { struct kstat stat; int error = vfs_stat(filename, &stat); if (!error) error = cp_new_stat64(&stat, statbuf); return error; } SYSCALL_DEFINE2(lstat64, const char __user *, filename, struct stat64 __user *, statbuf) { struct kstat stat; int error = vfs_lstat(filename, &stat); if (!error) error = cp_new_stat64(&stat, statbuf); return error; } SYSCALL_DEFINE2(fstat64, unsigned long, fd, struct stat64 __user *, statbuf) { struct kstat stat; int error = vfs_fstat(fd, &stat); if (!error) error = cp_new_stat64(&stat, statbuf); return error; } SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename, struct stat64 __user *, statbuf, int, flag) { struct kstat stat; int error; error = vfs_fstatat(dfd, filename, &stat, flag); if (error) return error; return cp_new_stat64(&stat, statbuf); } #endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */ static noinline_for_stack int cp_statx(const struct kstat *stat, struct statx __user *buffer) { struct statx tmp; memset(&tmp, 0, sizeof(tmp)); /* STATX_CHANGE_COOKIE is kernel-only for now */ tmp.stx_mask = stat->result_mask & ~STATX_CHANGE_COOKIE; tmp.stx_blksize = stat->blksize; /* STATX_ATTR_CHANGE_MONOTONIC is kernel-only for now */ tmp.stx_attributes = stat->attributes & ~STATX_ATTR_CHANGE_MONOTONIC; tmp.stx_nlink = stat->nlink; tmp.stx_uid = from_kuid_munged(current_user_ns(), stat->uid); tmp.stx_gid = from_kgid_munged(current_user_ns(), stat->gid); tmp.stx_mode = stat->mode; tmp.stx_ino = stat->ino; tmp.stx_size = stat->size; tmp.stx_blocks = stat->blocks; tmp.stx_attributes_mask = stat->attributes_mask; tmp.stx_atime.tv_sec = stat->atime.tv_sec; tmp.stx_atime.tv_nsec = stat->atime.tv_nsec; tmp.stx_btime.tv_sec = stat->btime.tv_sec; tmp.stx_btime.tv_nsec = stat->btime.tv_nsec; tmp.stx_ctime.tv_sec = stat->ctime.tv_sec; tmp.stx_ctime.tv_nsec = stat->ctime.tv_nsec; tmp.stx_mtime.tv_sec = stat->mtime.tv_sec; tmp.stx_mtime.tv_nsec = stat->mtime.tv_nsec; tmp.stx_rdev_major = MAJOR(stat->rdev); tmp.stx_rdev_minor = MINOR(stat->rdev); tmp.stx_dev_major = MAJOR(stat->dev); tmp.stx_dev_minor = MINOR(stat->dev); tmp.stx_mnt_id = stat->mnt_id; tmp.stx_dio_mem_align = stat->dio_mem_align; tmp.stx_dio_offset_align = stat->dio_offset_align; tmp.stx_dio_read_offset_align = stat->dio_read_offset_align; tmp.stx_subvol = stat->subvol; tmp.stx_atomic_write_unit_min = stat->atomic_write_unit_min; tmp.stx_atomic_write_unit_max = stat->atomic_write_unit_max; tmp.stx_atomic_write_segments_max = stat->atomic_write_segments_max; tmp.stx_atomic_write_unit_max_opt = stat->atomic_write_unit_max_opt; return copy_to_user(buffer, &tmp, sizeof(tmp)) ? -EFAULT : 0; } int do_statx(int dfd, struct filename *filename, unsigned int flags, unsigned int mask, struct statx __user *buffer) { struct kstat stat; int error; if (mask & STATX__RESERVED) return -EINVAL; if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE) return -EINVAL; /* * STATX_CHANGE_COOKIE is kernel-only for now. Ignore requests * from userland. */ mask &= ~STATX_CHANGE_COOKIE; error = vfs_statx(dfd, filename, flags, &stat, mask); if (error) return error; return cp_statx(&stat, buffer); } int do_statx_fd(int fd, unsigned int flags, unsigned int mask, struct statx __user *buffer) { struct kstat stat; int error; if (mask & STATX__RESERVED) return -EINVAL; if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE) return -EINVAL; /* * STATX_CHANGE_COOKIE is kernel-only for now. Ignore requests * from userland. */ mask &= ~STATX_CHANGE_COOKIE; error = vfs_statx_fd(fd, flags, &stat, mask); if (error) return error; return cp_statx(&stat, buffer); } /** * sys_statx - System call to get enhanced stats * @dfd: Base directory to pathwalk from *or* fd to stat. * @filename: File to stat or either NULL or "" with AT_EMPTY_PATH * @flags: AT_* flags to control pathwalk. * @mask: Parts of statx struct actually required. * @buffer: Result buffer. * * Note that fstat() can be emulated by setting dfd to the fd of interest, * supplying "" (or preferably NULL) as the filename and setting AT_EMPTY_PATH * in the flags. */ SYSCALL_DEFINE5(statx, int, dfd, const char __user *, filename, unsigned, flags, unsigned int, mask, struct statx __user *, buffer) { CLASS(filename_maybe_null, name)(filename, flags); if (!name && dfd >= 0) return do_statx_fd(dfd, flags & ~AT_NO_AUTOMOUNT, mask, buffer); return do_statx(dfd, name, flags, mask, buffer); } #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_STAT) static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) { struct compat_stat tmp; if (sizeof(tmp.st_dev) < 4 && !old_valid_dev(stat->dev)) return -EOVERFLOW; if (sizeof(tmp.st_rdev) < 4 && !old_valid_dev(stat->rdev)) return -EOVERFLOW; memset(&tmp, 0, sizeof(tmp)); tmp.st_dev = new_encode_dev(stat->dev); tmp.st_ino = stat->ino; if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino) return -EOVERFLOW; tmp.st_mode = stat->mode; tmp.st_nlink = stat->nlink; if (tmp.st_nlink != stat->nlink) return -EOVERFLOW; SET_UID(tmp.st_uid, from_kuid_munged(current_user_ns(), stat->uid)); SET_GID(tmp.st_gid, from_kgid_munged(current_user_ns(), stat->gid)); tmp.st_rdev = new_encode_dev(stat->rdev); if ((u64) stat->size > MAX_NON_LFS) return -EOVERFLOW; tmp.st_size = stat->size; tmp.st_atime = stat->atime.tv_sec; tmp.st_atime_nsec = stat->atime.tv_nsec; tmp.st_mtime = stat->mtime.tv_sec; tmp.st_mtime_nsec = stat->mtime.tv_nsec; tmp.st_ctime = stat->ctime.tv_sec; tmp.st_ctime_nsec = stat->ctime.tv_nsec; tmp.st_blocks = stat->blocks; tmp.st_blksize = stat->blksize; return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0; } COMPAT_SYSCALL_DEFINE2(newstat, const char __user *, filename, struct compat_stat __user *, statbuf) { struct kstat stat; int error; error = vfs_stat(filename, &stat); if (error) return error; return cp_compat_stat(&stat, statbuf); } COMPAT_SYSCALL_DEFINE2(newlstat, const char __user *, filename, struct compat_stat __user *, statbuf) { struct kstat stat; int error; error = vfs_lstat(filename, &stat); if (error) return error; return cp_compat_stat(&stat, statbuf); } #ifndef __ARCH_WANT_STAT64 COMPAT_SYSCALL_DEFINE4(newfstatat, unsigned int, dfd, const char __user *, filename, struct compat_stat __user *, statbuf, int, flag) { struct kstat stat; int error; error = vfs_fstatat(dfd, filename, &stat, flag); if (error) return error; return cp_compat_stat(&stat, statbuf); } #endif COMPAT_SYSCALL_DEFINE2(newfstat, unsigned int, fd, struct compat_stat __user *, statbuf) { struct kstat stat; int error = vfs_fstat(fd, &stat); if (!error) error = cp_compat_stat(&stat, statbuf); return error; } #endif /* Caller is here responsible for sufficient locking (ie. inode->i_lock) */ void __inode_add_bytes(struct inode *inode, loff_t bytes) { inode->i_blocks += bytes >> 9; bytes &= 511; inode->i_bytes += bytes; if (inode->i_bytes >= 512) { inode->i_blocks++; inode->i_bytes -= 512; } } EXPORT_SYMBOL(__inode_add_bytes); void inode_add_bytes(struct inode *inode, loff_t bytes) { spin_lock(&inode->i_lock); __inode_add_bytes(inode, bytes); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(inode_add_bytes); void __inode_sub_bytes(struct inode *inode, loff_t bytes) { inode->i_blocks -= bytes >> 9; bytes &= 511; if (inode->i_bytes < bytes) { inode->i_blocks--; inode->i_bytes += 512; } inode->i_bytes -= bytes; } EXPORT_SYMBOL(__inode_sub_bytes); void inode_sub_bytes(struct inode *inode, loff_t bytes) { spin_lock(&inode->i_lock); __inode_sub_bytes(inode, bytes); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(inode_sub_bytes); loff_t inode_get_bytes(struct inode *inode) { loff_t ret; spin_lock(&inode->i_lock); ret = __inode_get_bytes(inode); spin_unlock(&inode->i_lock); return ret; } EXPORT_SYMBOL(inode_get_bytes); void inode_set_bytes(struct inode *inode, loff_t bytes) { /* Caller is here responsible for sufficient locking * (ie. inode->i_lock) */ inode->i_blocks = bytes >> 9; inode->i_bytes = bytes & 511; } EXPORT_SYMBOL(inode_set_bytes);
7 1 5 2 18 17 6 6 4 6 2 2 18 18 18 10 7 7 7 277 277 278 280 71 74 73 73 291 290 79 3 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 // SPDX-License-Identifier: GPL-2.0 /* * memory buffer pool support. Such pools are mostly used * for guaranteed, deadlock-free memory allocations during * extreme VM load. * * started by Ingo Molnar, Copyright (C) 2001 * debugging by David Rientjes, Copyright (C) 2015 */ #include <linux/fault-inject.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/highmem.h> #include <linux/kasan.h> #include <linux/kmemleak.h> #include <linux/export.h> #include <linux/mempool.h> #include <linux/writeback.h> #include "slab.h" static DECLARE_FAULT_ATTR(fail_mempool_alloc); static DECLARE_FAULT_ATTR(fail_mempool_alloc_bulk); static int __init mempool_faul_inject_init(void) { int error; error = PTR_ERR_OR_ZERO(fault_create_debugfs_attr("fail_mempool_alloc", NULL, &fail_mempool_alloc)); if (error) return error; /* booting will fail on error return here, don't bother to cleanup */ return PTR_ERR_OR_ZERO( fault_create_debugfs_attr("fail_mempool_alloc_bulk", NULL, &fail_mempool_alloc_bulk)); } late_initcall(mempool_faul_inject_init); #ifdef CONFIG_SLUB_DEBUG_ON static void poison_error(struct mempool *pool, void *element, size_t size, size_t byte) { const int nr = pool->curr_nr; const int start = max_t(int, byte - (BITS_PER_LONG / 8), 0); const int end = min_t(int, byte + (BITS_PER_LONG / 8), size); int i; pr_err("BUG: mempool element poison mismatch\n"); pr_err("Mempool %p size %zu\n", pool, size); pr_err(" nr=%d @ %p: %s0x", nr, element, start > 0 ? "... " : ""); for (i = start; i < end; i++) pr_cont("%x ", *(u8 *)(element + i)); pr_cont("%s\n", end < size ? "..." : ""); dump_stack(); } static void __check_element(struct mempool *pool, void *element, size_t size) { u8 *obj = element; size_t i; for (i = 0; i < size; i++) { u8 exp = (i < size - 1) ? POISON_FREE : POISON_END; if (obj[i] != exp) { poison_error(pool, element, size, i); return; } } memset(obj, POISON_INUSE, size); } static void check_element(struct mempool *pool, void *element) { /* Skip checking: KASAN might save its metadata in the element. */ if (kasan_enabled()) return; /* Mempools backed by slab allocator */ if (pool->free == mempool_kfree) { __check_element(pool, element, (size_t)pool->pool_data); } else if (pool->free == mempool_free_slab) { __check_element(pool, element, kmem_cache_size(pool->pool_data)); } else if (pool->free == mempool_free_pages) { /* Mempools backed by page allocator */ int order = (int)(long)pool->pool_data; #ifdef CONFIG_HIGHMEM for (int i = 0; i < (1 << order); i++) { struct page *page = (struct page *)element; void *addr = kmap_local_page(page + i); __check_element(pool, addr, PAGE_SIZE); kunmap_local(addr); } #else void *addr = page_address((struct page *)element); __check_element(pool, addr, PAGE_SIZE << order); #endif } } static void __poison_element(void *element, size_t size) { u8 *obj = element; memset(obj, POISON_FREE, size - 1); obj[size - 1] = POISON_END; } static void poison_element(struct mempool *pool, void *element) { /* Skip poisoning: KASAN might save its metadata in the element. */ if (kasan_enabled()) return; /* Mempools backed by slab allocator */ if (pool->alloc == mempool_kmalloc) { __poison_element(element, (size_t)pool->pool_data); } else if (pool->alloc == mempool_alloc_slab) { __poison_element(element, kmem_cache_size(pool->pool_data)); } else if (pool->alloc == mempool_alloc_pages) { /* Mempools backed by page allocator */ int order = (int)(long)pool->pool_data; #ifdef CONFIG_HIGHMEM for (int i = 0; i < (1 << order); i++) { struct page *page = (struct page *)element; void *addr = kmap_local_page(page + i); __poison_element(addr, PAGE_SIZE); kunmap_local(addr); } #else void *addr = page_address((struct page *)element); __poison_element(addr, PAGE_SIZE << order); #endif } } #else /* CONFIG_SLUB_DEBUG_ON */ static inline void check_element(struct mempool *pool, void *element) { } static inline void poison_element(struct mempool *pool, void *element) { } #endif /* CONFIG_SLUB_DEBUG_ON */ static __always_inline bool kasan_poison_element(struct mempool *pool, void *element) { if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc) return kasan_mempool_poison_object(element); else if (pool->alloc == mempool_alloc_pages) return kasan_mempool_poison_pages(element, (unsigned long)pool->pool_data); return true; } static void kasan_unpoison_element(struct mempool *pool, void *element) { if (pool->alloc == mempool_kmalloc) kasan_mempool_unpoison_object(element, (size_t)pool->pool_data); else if (pool->alloc == mempool_alloc_slab) kasan_mempool_unpoison_object(element, kmem_cache_size(pool->pool_data)); else if (pool->alloc == mempool_alloc_pages) kasan_mempool_unpoison_pages(element, (unsigned long)pool->pool_data); } static __always_inline void add_element(struct mempool *pool, void *element) { BUG_ON(pool->min_nr != 0 && pool->curr_nr >= pool->min_nr); poison_element(pool, element); if (kasan_poison_element(pool, element)) pool->elements[pool->curr_nr++] = element; } static void *remove_element(struct mempool *pool) { void *element = pool->elements[--pool->curr_nr]; BUG_ON(pool->curr_nr < 0); kasan_unpoison_element(pool, element); check_element(pool, element); return element; } /** * mempool_exit - exit a mempool initialized with mempool_init() * @pool: pointer to the memory pool which was initialized with * mempool_init(). * * Free all reserved elements in @pool and @pool itself. This function * only sleeps if the free_fn() function sleeps. * * May be called on a zeroed but uninitialized mempool (i.e. allocated with * kzalloc()). */ void mempool_exit(struct mempool *pool) { while (pool->curr_nr) { void *element = remove_element(pool); pool->free(element, pool->pool_data); } kfree(pool->elements); pool->elements = NULL; } EXPORT_SYMBOL(mempool_exit); /** * mempool_destroy - deallocate a memory pool * @pool: pointer to the memory pool which was allocated via * mempool_create(). * * Free all reserved elements in @pool and @pool itself. This function * only sleeps if the free_fn() function sleeps. */ void mempool_destroy(struct mempool *pool) { if (unlikely(!pool)) return; mempool_exit(pool); kfree(pool); } EXPORT_SYMBOL(mempool_destroy); int mempool_init_node(struct mempool *pool, int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data, gfp_t gfp_mask, int node_id) { spin_lock_init(&pool->lock); pool->min_nr = min_nr; pool->pool_data = pool_data; pool->alloc = alloc_fn; pool->free = free_fn; init_waitqueue_head(&pool->wait); /* * max() used here to ensure storage for at least 1 element to support * zero minimum pool */ pool->elements = kmalloc_array_node(max(1, min_nr), sizeof(void *), gfp_mask, node_id); if (!pool->elements) return -ENOMEM; /* * First pre-allocate the guaranteed number of buffers, * also pre-allocate 1 element for zero minimum pool. */ while (pool->curr_nr < max(1, pool->min_nr)) { void *element; element = pool->alloc(gfp_mask, pool->pool_data); if (unlikely(!element)) { mempool_exit(pool); return -ENOMEM; } add_element(pool, element); } return 0; } EXPORT_SYMBOL(mempool_init_node); /** * mempool_init - initialize a memory pool * @pool: pointer to the memory pool that should be initialized * @min_nr: the minimum number of elements guaranteed to be * allocated for this pool. * @alloc_fn: user-defined element-allocation function. * @free_fn: user-defined element-freeing function. * @pool_data: optional private data available to the user-defined functions. * * Like mempool_create(), but initializes the pool in (i.e. embedded in another * structure). * * Return: %0 on success, negative error code otherwise. */ int mempool_init_noprof(struct mempool *pool, int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data) { return mempool_init_node(pool, min_nr, alloc_fn, free_fn, pool_data, GFP_KERNEL, NUMA_NO_NODE); } EXPORT_SYMBOL(mempool_init_noprof); /** * mempool_create_node - create a memory pool * @min_nr: the minimum number of elements guaranteed to be * allocated for this pool. * @alloc_fn: user-defined element-allocation function. * @free_fn: user-defined element-freeing function. * @pool_data: optional private data available to the user-defined functions. * @gfp_mask: memory allocation flags * @node_id: numa node to allocate on * * this function creates and allocates a guaranteed size, preallocated * memory pool. The pool can be used from the mempool_alloc() and mempool_free() * functions. This function might sleep. Both the alloc_fn() and the free_fn() * functions might sleep - as long as the mempool_alloc() function is not called * from IRQ contexts. * * Return: pointer to the created memory pool object or %NULL on error. */ struct mempool *mempool_create_node_noprof(int min_nr, mempool_alloc_t *alloc_fn, mempool_free_t *free_fn, void *pool_data, gfp_t gfp_mask, int node_id) { struct mempool *pool; pool = kmalloc_node_noprof(sizeof(*pool), gfp_mask | __GFP_ZERO, node_id); if (!pool) return NULL; if (mempool_init_node(pool, min_nr, alloc_fn, free_fn, pool_data, gfp_mask, node_id)) { kfree(pool); return NULL; } return pool; } EXPORT_SYMBOL(mempool_create_node_noprof); /** * mempool_resize - resize an existing memory pool * @pool: pointer to the memory pool which was allocated via * mempool_create(). * @new_min_nr: the new minimum number of elements guaranteed to be * allocated for this pool. * * This function shrinks/grows the pool. In the case of growing, * it cannot be guaranteed that the pool will be grown to the new * size immediately, but new mempool_free() calls will refill it. * This function may sleep. * * Note, the caller must guarantee that no mempool_destroy is called * while this function is running. mempool_alloc() & mempool_free() * might be called (eg. from IRQ contexts) while this function executes. * * Return: %0 on success, negative error code otherwise. */ int mempool_resize(struct mempool *pool, int new_min_nr) { void *element; void **new_elements; unsigned long flags; BUG_ON(new_min_nr <= 0); might_sleep(); spin_lock_irqsave(&pool->lock, flags); if (new_min_nr <= pool->min_nr) { while (new_min_nr < pool->curr_nr) { element = remove_element(pool); spin_unlock_irqrestore(&pool->lock, flags); pool->free(element, pool->pool_data); spin_lock_irqsave(&pool->lock, flags); } pool->min_nr = new_min_nr; goto out_unlock; } spin_unlock_irqrestore(&pool->lock, flags); /* Grow the pool */ new_elements = kmalloc_array(new_min_nr, sizeof(*new_elements), GFP_KERNEL); if (!new_elements) return -ENOMEM; spin_lock_irqsave(&pool->lock, flags); if (unlikely(new_min_nr <= pool->min_nr)) { /* Raced, other resize will do our work */ spin_unlock_irqrestore(&pool->lock, flags); kfree(new_elements); goto out; } memcpy(new_elements, pool->elements, pool->curr_nr * sizeof(*new_elements)); kfree(pool->elements); pool->elements = new_elements; pool->min_nr = new_min_nr; while (pool->curr_nr < pool->min_nr) { spin_unlock_irqrestore(&pool->lock, flags); element = pool->alloc(GFP_KERNEL, pool->pool_data); if (!element) goto out; spin_lock_irqsave(&pool->lock, flags); if (pool->curr_nr < pool->min_nr) { add_element(pool, element); } else { spin_unlock_irqrestore(&pool->lock, flags); pool->free(element, pool->pool_data); /* Raced */ goto out; } } out_unlock: spin_unlock_irqrestore(&pool->lock, flags); out: return 0; } EXPORT_SYMBOL(mempool_resize); static unsigned int mempool_alloc_from_pool(struct mempool *pool, void **elems, unsigned int count, unsigned int allocated, gfp_t gfp_mask) { unsigned long flags; unsigned int i; spin_lock_irqsave(&pool->lock, flags); if (unlikely(pool->curr_nr < count - allocated)) goto fail; for (i = 0; i < count; i++) { if (!elems[i]) { elems[i] = remove_element(pool); allocated++; } } spin_unlock_irqrestore(&pool->lock, flags); /* Paired with rmb in mempool_free(), read comment there. */ smp_wmb(); /* * Update the allocation stack trace as this is more useful for * debugging. */ for (i = 0; i < count; i++) kmemleak_update_trace(elems[i]); return allocated; fail: if (gfp_mask & __GFP_DIRECT_RECLAIM) { DEFINE_WAIT(wait); prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); spin_unlock_irqrestore(&pool->lock, flags); /* * Wait for someone else to return an element to @pool, but wake * up occasionally as memory pressure might have reduced even * and the normal allocation in alloc_fn could succeed even if * no element was returned. */ io_schedule_timeout(5 * HZ); finish_wait(&pool->wait, &wait); } else { /* We must not sleep if __GFP_DIRECT_RECLAIM is not set. */ spin_unlock_irqrestore(&pool->lock, flags); } return allocated; } /* * Adjust the gfp flags for mempool allocations, as we never want to dip into * the global emergency reserves or retry in the page allocator. * * The first pass also doesn't want to go reclaim, but the next passes do, so * return a separate subset for that first iteration. */ static inline gfp_t mempool_adjust_gfp(gfp_t *gfp_mask) { *gfp_mask |= __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; return *gfp_mask & ~(__GFP_DIRECT_RECLAIM | __GFP_IO); } /** * mempool_alloc_bulk - allocate multiple elements from a memory pool * @pool: pointer to the memory pool * @elems: partially or fully populated elements array * @count: number of entries in @elem that need to be allocated * @allocated: number of entries in @elem already allocated * * Allocate elements for each slot in @elem that is non-%NULL. This is done by * first calling into the alloc_fn supplied at pool initialization time, and * dipping into the reserved pool when alloc_fn fails to allocate an element. * * On return all @count elements in @elems will be populated. * * Return: Always 0. If it wasn't for %$#^$ alloc tags, it would return void. */ int mempool_alloc_bulk_noprof(struct mempool *pool, void **elems, unsigned int count, unsigned int allocated) { gfp_t gfp_mask = GFP_KERNEL; gfp_t gfp_temp = mempool_adjust_gfp(&gfp_mask); unsigned int i = 0; VM_WARN_ON_ONCE(count > pool->min_nr); might_alloc(gfp_mask); /* * If an error is injected, fail all elements in a bulk allocation so * that we stress the multiple elements missing path. */ if (should_fail_ex(&fail_mempool_alloc_bulk, 1, FAULT_NOWARN)) { pr_info("forcing mempool usage for %pS\n", (void *)_RET_IP_); goto use_pool; } repeat_alloc: /* * Try to allocate the elements using the allocation callback first as * that might succeed even when the caller's bulk allocation did not. */ for (i = 0; i < count; i++) { if (elems[i]) continue; elems[i] = pool->alloc(gfp_temp, pool->pool_data); if (unlikely(!elems[i])) goto use_pool; allocated++; } return 0; use_pool: allocated = mempool_alloc_from_pool(pool, elems, count, allocated, gfp_temp); gfp_temp = gfp_mask; goto repeat_alloc; } EXPORT_SYMBOL_GPL(mempool_alloc_bulk_noprof); /** * mempool_alloc - allocate an element from a memory pool * @pool: pointer to the memory pool * @gfp_mask: GFP_* flags. %__GFP_ZERO is not supported. * * Allocate an element from @pool. This is done by first calling into the * alloc_fn supplied at pool initialization time, and dipping into the reserved * pool when alloc_fn fails to allocate an element. * * This function only sleeps if the alloc_fn callback sleeps, or when waiting * for elements to become available in the pool. * * Return: pointer to the allocated element or %NULL when failing to allocate * an element. Allocation failure can only happen when @gfp_mask does not * include %__GFP_DIRECT_RECLAIM. */ void *mempool_alloc_noprof(struct mempool *pool, gfp_t gfp_mask) { gfp_t gfp_temp = mempool_adjust_gfp(&gfp_mask); void *element; VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO); might_alloc(gfp_mask); repeat_alloc: if (should_fail_ex(&fail_mempool_alloc, 1, FAULT_NOWARN)) { pr_info("forcing mempool usage for %pS\n", (void *)_RET_IP_); element = NULL; } else { element = pool->alloc(gfp_temp, pool->pool_data); } if (unlikely(!element)) { /* * Try to allocate an element from the pool. * * The first pass won't have __GFP_DIRECT_RECLAIM and won't * sleep in mempool_alloc_from_pool. Retry the allocation * with all flags set in that case. */ if (!mempool_alloc_from_pool(pool, &element, 1, 0, gfp_temp)) { if (gfp_temp != gfp_mask) { gfp_temp = gfp_mask; goto repeat_alloc; } if (gfp_mask & __GFP_DIRECT_RECLAIM) { goto repeat_alloc; } } } return element; } EXPORT_SYMBOL(mempool_alloc_noprof); /** * mempool_alloc_preallocated - allocate an element from preallocated elements * belonging to a memory pool * @pool: pointer to the memory pool * * This function is similar to mempool_alloc(), but it only attempts allocating * an element from the preallocated elements. It only takes a single spinlock_t * and immediately returns if no preallocated elements are available. * * Return: pointer to the allocated element or %NULL if no elements are * available. */ void *mempool_alloc_preallocated(struct mempool *pool) { void *element = NULL; mempool_alloc_from_pool(pool, &element, 1, 0, GFP_NOWAIT); return element; } EXPORT_SYMBOL(mempool_alloc_preallocated); /** * mempool_free_bulk - return elements to a mempool * @pool: pointer to the memory pool * @elems: elements to return * @count: number of elements to return * * Returns a number of elements from the start of @elem to @pool if @pool needs * replenishing and sets their slots in @elem to NULL. Other elements are left * in @elem. * * Return: number of elements transferred to @pool. Elements are always * transferred from the beginning of @elem, so the return value can be used as * an offset into @elem for the freeing the remaining elements in the caller. */ unsigned int mempool_free_bulk(struct mempool *pool, void **elems, unsigned int count) { unsigned long flags; unsigned int freed = 0; bool added = false; /* * Paired with the wmb in mempool_alloc(). The preceding read is * for @element and the following @pool->curr_nr. This ensures * that the visible value of @pool->curr_nr is from after the * allocation of @element. This is necessary for fringe cases * where @element was passed to this task without going through * barriers. * * For example, assume @p is %NULL at the beginning and one task * performs "p = mempool_alloc(...);" while another task is doing * "while (!p) cpu_relax(); mempool_free(p, ...);". This function * may end up using curr_nr value which is from before allocation * of @p without the following rmb. */ smp_rmb(); /* * For correctness, we need a test which is guaranteed to trigger * if curr_nr + #allocated == min_nr. Testing curr_nr < min_nr * without locking achieves that and refilling as soon as possible * is desirable. * * Because curr_nr visible here is always a value after the * allocation of @element, any task which decremented curr_nr below * min_nr is guaranteed to see curr_nr < min_nr unless curr_nr gets * incremented to min_nr afterwards. If curr_nr gets incremented * to min_nr after the allocation of @element, the elements * allocated after that are subject to the same guarantee. * * Waiters happen iff curr_nr is 0 and the above guarantee also * ensures that there will be frees which return elements to the * pool waking up the waiters. * * For zero-minimum pools, curr_nr < min_nr (0 < 0) never succeeds, * so waiters sleeping on pool->wait would never be woken by the * wake-up path of previous test. This explicit check ensures the * allocation of element when both min_nr and curr_nr are 0, and * any active waiters are properly awakened. */ if (unlikely(READ_ONCE(pool->curr_nr) < pool->min_nr)) { spin_lock_irqsave(&pool->lock, flags); while (pool->curr_nr < pool->min_nr && freed < count) { add_element(pool, elems[freed++]); added = true; } spin_unlock_irqrestore(&pool->lock, flags); } else if (unlikely(pool->min_nr == 0 && READ_ONCE(pool->curr_nr) == 0)) { /* Handle the min_nr = 0 edge case: */ spin_lock_irqsave(&pool->lock, flags); if (likely(pool->curr_nr == 0)) { add_element(pool, elems[freed++]); added = true; } spin_unlock_irqrestore(&pool->lock, flags); } if (unlikely(added) && wq_has_sleeper(&pool->wait)) wake_up(&pool->wait); return freed; } EXPORT_SYMBOL_GPL(mempool_free_bulk); /** * mempool_free - return an element to the pool. * @element: element to return * @pool: pointer to the memory pool * * Returns @element to @pool if it needs replenishing, else frees it using * the free_fn callback in @pool. * * This function only sleeps if the free_fn callback sleeps. */ void mempool_free(void *element, struct mempool *pool) { if (likely(element) && !mempool_free_bulk(pool, &element, 1)) pool->free(element, pool->pool_data); } EXPORT_SYMBOL(mempool_free); /* * A commonly used alloc and free fn. */ void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data) { struct kmem_cache *mem = pool_data; VM_BUG_ON(mem->ctor); return kmem_cache_alloc_noprof(mem, gfp_mask); } EXPORT_SYMBOL(mempool_alloc_slab); void mempool_free_slab(void *element, void *pool_data) { struct kmem_cache *mem = pool_data; kmem_cache_free(mem, element); } EXPORT_SYMBOL(mempool_free_slab); /* * A commonly used alloc and free fn that kmalloc/kfrees the amount of memory * specified by pool_data */ void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data) { size_t size = (size_t)pool_data; return kmalloc_noprof(size, gfp_mask); } EXPORT_SYMBOL(mempool_kmalloc); void mempool_kfree(void *element, void *pool_data) { kfree(element); } EXPORT_SYMBOL(mempool_kfree); /* * A simple mempool-backed page allocator that allocates pages * of the order specified by pool_data. */ void *mempool_alloc_pages(gfp_t gfp_mask, void *pool_data) { int order = (int)(long)pool_data; return alloc_pages_noprof(gfp_mask, order); } EXPORT_SYMBOL(mempool_alloc_pages); void mempool_free_pages(void *element, void *pool_data) { int order = (int)(long)pool_data; __free_pages(element, order); } EXPORT_SYMBOL(mempool_free_pages);
6 6 5 5 2 1 1 1 1 13 1 2 5 5 6 1 1 2 2 8 19 1 14 1 3 1 3 1 3 1 1 2 2 2 2 3 3 15 12 3 2 2 2 7 7 3 2 1 3 3 13 13 13 2 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 /* AFS superblock handling * * Copyright (c) 2002, 2007, 2018 Red Hat, Inc. All rights reserved. * * This software may be freely redistributed under the terms of the * GNU General Public License. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * Authors: David Howells <dhowells@redhat.com> * David Woodhouse <dwmw2@infradead.org> * */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/mount.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/fs_parser.h> #include <linux/statfs.h> #include <linux/sched.h> #include <linux/nsproxy.h> #include <linux/magic.h> #include <net/net_namespace.h> #include "internal.h" static void afs_i_init_once(void *foo); static void afs_kill_super(struct super_block *sb); static struct inode *afs_alloc_inode(struct super_block *sb); static void afs_destroy_inode(struct inode *inode); static void afs_free_inode(struct inode *inode); static int afs_statfs(struct dentry *dentry, struct kstatfs *buf); static int afs_show_devname(struct seq_file *m, struct dentry *root); static int afs_show_options(struct seq_file *m, struct dentry *root); static int afs_init_fs_context(struct fs_context *fc); static const struct fs_parameter_spec afs_fs_parameters[]; struct file_system_type afs_fs_type = { .owner = THIS_MODULE, .name = "afs", .init_fs_context = afs_init_fs_context, .parameters = afs_fs_parameters, .kill_sb = afs_kill_super, .fs_flags = FS_RENAME_DOES_D_MOVE, }; MODULE_ALIAS_FS("afs"); int afs_net_id; static const struct super_operations afs_super_ops = { .statfs = afs_statfs, .alloc_inode = afs_alloc_inode, .write_inode = netfs_unpin_writeback, .drop_inode = afs_drop_inode, .destroy_inode = afs_destroy_inode, .free_inode = afs_free_inode, .evict_inode = afs_evict_inode, .show_devname = afs_show_devname, .show_options = afs_show_options, }; static struct kmem_cache *afs_inode_cachep; static atomic_t afs_count_active_inodes; enum afs_param { Opt_autocell, Opt_dyn, Opt_flock, Opt_source, }; static const struct constant_table afs_param_flock[] = { {"local", afs_flock_mode_local }, {"openafs", afs_flock_mode_openafs }, {"strict", afs_flock_mode_strict }, {"write", afs_flock_mode_write }, {} }; static const struct fs_parameter_spec afs_fs_parameters[] = { fsparam_flag ("autocell", Opt_autocell), fsparam_flag ("dyn", Opt_dyn), fsparam_enum ("flock", Opt_flock, afs_param_flock), fsparam_string("source", Opt_source), {} }; /* * initialise the filesystem */ int __init afs_fs_init(void) { int ret; _enter(""); /* create ourselves an inode cache */ atomic_set(&afs_count_active_inodes, 0); ret = -ENOMEM; afs_inode_cachep = kmem_cache_create("afs_inode_cache", sizeof(struct afs_vnode), 0, SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, afs_i_init_once); if (!afs_inode_cachep) { printk(KERN_NOTICE "kAFS: Failed to allocate inode cache\n"); return ret; } /* now export our filesystem to lesser mortals */ ret = register_filesystem(&afs_fs_type); if (ret < 0) { kmem_cache_destroy(afs_inode_cachep); _leave(" = %d", ret); return ret; } _leave(" = 0"); return 0; } /* * clean up the filesystem */ void afs_fs_exit(void) { _enter(""); afs_mntpt_kill_timer(); unregister_filesystem(&afs_fs_type); if (atomic_read(&afs_count_active_inodes) != 0) { printk("kAFS: %d active inode objects still present\n", atomic_read(&afs_count_active_inodes)); BUG(); } /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(afs_inode_cachep); _leave(""); } /* * Display the mount device name in /proc/mounts. */ static int afs_show_devname(struct seq_file *m, struct dentry *root) { struct afs_super_info *as = AFS_FS_S(root->d_sb); struct afs_volume *volume = as->volume; struct afs_cell *cell = as->cell; const char *suf = ""; char pref = '%'; if (as->dyn_root) { seq_puts(m, "none"); return 0; } switch (volume->type) { case AFSVL_RWVOL: break; case AFSVL_ROVOL: pref = '#'; if (volume->type_force) suf = ".readonly"; break; case AFSVL_BACKVOL: pref = '#'; suf = ".backup"; break; } seq_printf(m, "%c%s:%s%s", pref, cell->name, volume->name, suf); return 0; } /* * Display the mount options in /proc/mounts. */ static int afs_show_options(struct seq_file *m, struct dentry *root) { struct afs_super_info *as = AFS_FS_S(root->d_sb); const char *p = NULL; if (as->dyn_root) seq_puts(m, ",dyn"); switch (as->flock_mode) { case afs_flock_mode_unset: break; case afs_flock_mode_local: p = "local"; break; case afs_flock_mode_openafs: p = "openafs"; break; case afs_flock_mode_strict: p = "strict"; break; case afs_flock_mode_write: p = "write"; break; } if (p) seq_printf(m, ",flock=%s", p); return 0; } /* * Parse the source name to get cell name, volume name, volume type and R/W * selector. * * This can be one of the following: * "%[cell:]volume[.]" R/W volume * "#[cell:]volume[.]" R/O or R/W volume (R/O parent), * or R/W (R/W parent) volume * "%[cell:]volume.readonly" R/O volume * "#[cell:]volume.readonly" R/O volume * "%[cell:]volume.backup" Backup volume * "#[cell:]volume.backup" Backup volume */ static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param) { struct afs_fs_context *ctx = fc->fs_private; struct afs_cell *cell; const char *cellname, *suffix, *name = param->string; int cellnamesz; _enter(",%s", name); if (fc->source) return invalf(fc, "kAFS: Multiple sources not supported"); if (!name) { printk(KERN_ERR "kAFS: no volume name specified\n"); return -EINVAL; } if ((name[0] != '%' && name[0] != '#') || !name[1]) { /* To use dynroot, we don't want to have to provide a source */ if (strcmp(name, "none") == 0) { ctx->no_cell = true; return 0; } printk(KERN_ERR "kAFS: unparsable volume name\n"); return -EINVAL; } /* determine the type of volume we're looking for */ if (name[0] == '%') { ctx->type = AFSVL_RWVOL; ctx->force = true; } name++; /* split the cell name out if there is one */ ctx->volname = strchr(name, ':'); if (ctx->volname) { cellname = name; cellnamesz = ctx->volname - name; ctx->volname++; } else { ctx->volname = name; cellname = NULL; cellnamesz = 0; } /* the volume type is further affected by a possible suffix */ suffix = strrchr(ctx->volname, '.'); if (suffix) { if (strcmp(suffix, ".readonly") == 0) { ctx->type = AFSVL_ROVOL; ctx->force = true; } else if (strcmp(suffix, ".backup") == 0) { ctx->type = AFSVL_BACKVOL; ctx->force = true; } else if (suffix[1] == 0) { } else { suffix = NULL; } } ctx->volnamesz = suffix ? suffix - ctx->volname : strlen(ctx->volname); _debug("cell %*.*s [%p]", cellnamesz, cellnamesz, cellname ?: "", ctx->cell); /* lookup the cell record */ if (cellname) { cell = afs_lookup_cell(ctx->net, cellname, cellnamesz, NULL, AFS_LOOKUP_CELL_DIRECT_MOUNT, afs_cell_trace_use_lookup_mount); if (IS_ERR(cell)) { pr_err("kAFS: unable to lookup cell '%*.*s'\n", cellnamesz, cellnamesz, cellname ?: ""); return PTR_ERR(cell); } afs_unuse_cell(ctx->cell, afs_cell_trace_unuse_parse); afs_see_cell(cell, afs_cell_trace_see_source); ctx->cell = cell; } _debug("CELL:%s [%p] VOLUME:%*.*s SUFFIX:%s TYPE:%d%s", ctx->cell->name, ctx->cell, ctx->volnamesz, ctx->volnamesz, ctx->volname, suffix ?: "-", ctx->type, ctx->force ? " FORCE" : ""); fc->source = param->string; param->string = NULL; return 0; } /* * Parse a single mount parameter. */ static int afs_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct fs_parse_result result; struct afs_fs_context *ctx = fc->fs_private; int opt; opt = fs_parse(fc, afs_fs_parameters, param, &result); if (opt < 0) return opt; switch (opt) { case Opt_source: return afs_parse_source(fc, param); case Opt_autocell: ctx->autocell = true; break; case Opt_dyn: ctx->dyn_root = true; break; case Opt_flock: ctx->flock_mode = result.uint_32; break; default: return -EINVAL; } _leave(" = 0"); return 0; } /* * Validate the options, get the cell key and look up the volume. */ static int afs_validate_fc(struct fs_context *fc) { struct afs_fs_context *ctx = fc->fs_private; struct afs_volume *volume; struct afs_cell *cell; struct key *key; int ret; if (!ctx->dyn_root) { if (ctx->no_cell) { pr_warn("kAFS: Can only specify source 'none' with -o dyn\n"); return -EINVAL; } if (!ctx->cell) { pr_warn("kAFS: No cell specified\n"); return -EDESTADDRREQ; } reget_key: /* We try to do the mount securely. */ key = afs_request_key(ctx->cell); if (IS_ERR(key)) return PTR_ERR(key); ctx->key = key; if (ctx->volume) { afs_put_volume(ctx->volume, afs_volume_trace_put_validate_fc); ctx->volume = NULL; } if (test_bit(AFS_CELL_FL_CHECK_ALIAS, &ctx->cell->flags)) { ret = afs_cell_detect_alias(ctx->cell, key); if (ret < 0) return ret; if (ret == 1) { _debug("switch to alias"); key_put(ctx->key); ctx->key = NULL; cell = afs_use_cell(ctx->cell->alias_of, afs_cell_trace_use_fc_alias); afs_unuse_cell(ctx->cell, afs_cell_trace_unuse_fc); ctx->cell = cell; goto reget_key; } } volume = afs_create_volume(ctx); if (IS_ERR(volume)) return PTR_ERR(volume); ctx->volume = volume; if (volume->type != AFSVL_RWVOL) { ctx->flock_mode = afs_flock_mode_local; fc->sb_flags |= SB_RDONLY; } } return 0; } /* * check a superblock to see if it's the one we're looking for */ static int afs_test_super(struct super_block *sb, struct fs_context *fc) { struct afs_fs_context *ctx = fc->fs_private; struct afs_super_info *as = AFS_FS_S(sb); return (as->net_ns == fc->net_ns && as->volume && as->volume->vid == ctx->volume->vid && as->cell == ctx->cell && !as->dyn_root); } static int afs_dynroot_test_super(struct super_block *sb, struct fs_context *fc) { struct afs_super_info *as = AFS_FS_S(sb); return (as->net_ns == fc->net_ns && as->dyn_root); } static int afs_set_super(struct super_block *sb, struct fs_context *fc) { return set_anon_super(sb, NULL); } /* * fill in the superblock */ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx) { struct afs_super_info *as = AFS_FS_S(sb); struct inode *inode = NULL; int ret; _enter(""); /* fill in the superblock */ sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_magic = AFS_FS_MAGIC; sb->s_op = &afs_super_ops; if (!as->dyn_root) sb->s_xattr = afs_xattr_handlers; ret = super_setup_bdi(sb); if (ret) return ret; /* allocate the root inode and dentry */ if (as->dyn_root) { inode = afs_dynroot_iget_root(sb); } else { sprintf(sb->s_id, "%llu", as->volume->vid); afs_activate_volume(as->volume); inode = afs_root_iget(sb, ctx->key); } if (IS_ERR(inode)) return PTR_ERR(inode); ret = -ENOMEM; sb->s_root = d_make_root(inode); if (!sb->s_root) goto error; if (as->dyn_root) { set_default_d_op(sb, &afs_dynroot_dentry_operations); } else { set_default_d_op(sb, &afs_fs_dentry_operations); rcu_assign_pointer(as->volume->sb, sb); } _leave(" = 0"); return 0; error: _leave(" = %d", ret); return ret; } static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc) { struct afs_fs_context *ctx = fc->fs_private; struct afs_super_info *as; as = kzalloc(sizeof(struct afs_super_info), GFP_KERNEL); if (as) { as->net_ns = get_net(fc->net_ns); as->flock_mode = ctx->flock_mode; if (ctx->dyn_root) { as->dyn_root = true; } else { as->cell = afs_use_cell(ctx->cell, afs_cell_trace_use_sbi); as->volume = afs_get_volume(ctx->volume, afs_volume_trace_get_alloc_sbi); } } return as; } static void afs_destroy_sbi(struct afs_super_info *as) { if (as) { afs_put_volume(as->volume, afs_volume_trace_put_destroy_sbi); afs_unuse_cell(as->cell, afs_cell_trace_unuse_sbi); put_net(as->net_ns); kfree(as); } } static void afs_kill_super(struct super_block *sb) { struct afs_super_info *as = AFS_FS_S(sb); /* Clear the callback interests (which will do ilookup5) before * deactivating the superblock. */ if (as->volume) rcu_assign_pointer(as->volume->sb, NULL); kill_anon_super(sb); if (as->volume) afs_deactivate_volume(as->volume); afs_destroy_sbi(as); } /* * Get an AFS superblock and root directory. */ static int afs_get_tree(struct fs_context *fc) { struct afs_fs_context *ctx = fc->fs_private; struct super_block *sb; struct afs_super_info *as; int ret; ret = afs_validate_fc(fc); if (ret) goto error; _enter(""); /* allocate a superblock info record */ ret = -ENOMEM; as = afs_alloc_sbi(fc); if (!as) goto error; fc->s_fs_info = as; /* allocate a deviceless superblock */ sb = sget_fc(fc, as->dyn_root ? afs_dynroot_test_super : afs_test_super, afs_set_super); if (IS_ERR(sb)) { ret = PTR_ERR(sb); goto error; } if (!sb->s_root) { /* initial superblock/root creation */ _debug("create"); ret = afs_fill_super(sb, ctx); if (ret < 0) goto error_sb; sb->s_flags |= SB_ACTIVE; } else { _debug("reuse"); ASSERTCMP(sb->s_flags, &, SB_ACTIVE); } fc->root = dget(sb->s_root); trace_afs_get_tree(as->cell, as->volume); _leave(" = 0 [%p]", sb); return 0; error_sb: deactivate_locked_super(sb); error: _leave(" = %d", ret); return ret; } static void afs_free_fc(struct fs_context *fc) { struct afs_fs_context *ctx = fc->fs_private; afs_destroy_sbi(fc->s_fs_info); afs_put_volume(ctx->volume, afs_volume_trace_put_free_fc); afs_unuse_cell(ctx->cell, afs_cell_trace_unuse_fc); key_put(ctx->key); kfree(ctx); } static const struct fs_context_operations afs_context_ops = { .free = afs_free_fc, .parse_param = afs_parse_param, .get_tree = afs_get_tree, }; /* * Set up the filesystem mount context. */ static int afs_init_fs_context(struct fs_context *fc) { struct afs_fs_context *ctx; struct afs_cell *cell; ctx = kzalloc(sizeof(struct afs_fs_context), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->type = AFSVL_ROVOL; ctx->net = afs_net(fc->net_ns); /* Default to the workstation cell. */ cell = afs_find_cell(ctx->net, NULL, 0, afs_cell_trace_use_fc); if (IS_ERR(cell)) cell = NULL; ctx->cell = cell; fc->fs_private = ctx; fc->ops = &afs_context_ops; return 0; } /* * Initialise an inode cache slab element prior to any use. Note that * afs_alloc_inode() *must* reset anything that could incorrectly leak from one * inode to another. */ static void afs_i_init_once(void *_vnode) { struct afs_vnode *vnode = _vnode; memset(vnode, 0, sizeof(*vnode)); inode_init_once(&vnode->netfs.inode); INIT_LIST_HEAD(&vnode->io_lock_waiters); init_rwsem(&vnode->validate_lock); spin_lock_init(&vnode->wb_lock); spin_lock_init(&vnode->lock); INIT_LIST_HEAD(&vnode->wb_keys); INIT_LIST_HEAD(&vnode->pending_locks); INIT_LIST_HEAD(&vnode->granted_locks); INIT_DELAYED_WORK(&vnode->lock_work, afs_lock_work); INIT_LIST_HEAD(&vnode->cb_mmap_link); seqlock_init(&vnode->cb_lock); } /* * allocate an AFS inode struct from our slab cache */ static struct inode *afs_alloc_inode(struct super_block *sb) { struct afs_vnode *vnode; vnode = alloc_inode_sb(sb, afs_inode_cachep, GFP_KERNEL); if (!vnode) return NULL; atomic_inc(&afs_count_active_inodes); /* Reset anything that shouldn't leak from one inode to the next. */ memset(&vnode->fid, 0, sizeof(vnode->fid)); memset(&vnode->status, 0, sizeof(vnode->status)); afs_vnode_set_cache(vnode, NULL); vnode->volume = NULL; vnode->lock_key = NULL; vnode->permit_cache = NULL; vnode->directory = NULL; vnode->directory_size = 0; vnode->flags = 1 << AFS_VNODE_UNSET; vnode->lock_state = AFS_VNODE_LOCK_NONE; init_rwsem(&vnode->rmdir_lock); INIT_WORK(&vnode->cb_work, afs_invalidate_mmap_work); _leave(" = %p", &vnode->netfs.inode); return &vnode->netfs.inode; } static void afs_free_inode(struct inode *inode) { kmem_cache_free(afs_inode_cachep, AFS_FS_I(inode)); } /* * destroy an AFS inode struct */ static void afs_destroy_inode(struct inode *inode) { struct afs_vnode *vnode = AFS_FS_I(inode); _enter("%p{%llx:%llu}", inode, vnode->fid.vid, vnode->fid.vnode); _debug("DESTROY INODE %p", inode); atomic_dec(&afs_count_active_inodes); } static void afs_get_volume_status_success(struct afs_operation *op) { struct afs_volume_status *vs = &op->volstatus.vs; struct kstatfs *buf = op->volstatus.buf; if (vs->max_quota == 0) buf->f_blocks = vs->part_max_blocks; else buf->f_blocks = vs->max_quota; if (buf->f_blocks > vs->blocks_in_use) buf->f_bavail = buf->f_bfree = buf->f_blocks - vs->blocks_in_use; } static const struct afs_operation_ops afs_get_volume_status_operation = { .issue_afs_rpc = afs_fs_get_volume_status, .issue_yfs_rpc = yfs_fs_get_volume_status, .success = afs_get_volume_status_success, }; /* * return information about an AFS volume */ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct afs_super_info *as = AFS_FS_S(dentry->d_sb); struct afs_operation *op; struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); buf->f_type = dentry->d_sb->s_magic; buf->f_bsize = AFS_BLOCK_SIZE; buf->f_namelen = AFSNAMEMAX - 1; if (as->dyn_root) { buf->f_blocks = 1; buf->f_bavail = 0; buf->f_bfree = 0; return 0; } op = afs_alloc_operation(NULL, as->volume); if (IS_ERR(op)) return PTR_ERR(op); afs_op_set_vnode(op, 0, vnode); op->nr_files = 1; op->volstatus.buf = buf; op->ops = &afs_get_volume_status_operation; return afs_do_sync_operation(op); }
6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM oom #if !defined(_TRACE_OOM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_OOM_H #include <linux/tracepoint.h> #include <trace/events/mmflags.h> #define PG_COUNT_TO_KB(x) ((x) << (PAGE_SHIFT - 10)) TRACE_EVENT(oom_score_adj_update, TP_PROTO(struct task_struct *task), TP_ARGS(task), TP_STRUCT__entry( __field( pid_t, pid) __array( char, comm, TASK_COMM_LEN ) __field( short, oom_score_adj) ), TP_fast_assign( __entry->pid = task->pid; memcpy(__entry->comm, task->comm, TASK_COMM_LEN); __entry->oom_score_adj = task->signal->oom_score_adj; ), TP_printk("pid=%d comm=%s oom_score_adj=%hd", __entry->pid, __entry->comm, __entry->oom_score_adj) ); TRACE_EVENT(reclaim_retry_zone, TP_PROTO(struct zoneref *zoneref, int order, unsigned long reclaimable, unsigned long available, unsigned long min_wmark, int no_progress_loops, bool wmark_check), TP_ARGS(zoneref, order, reclaimable, available, min_wmark, no_progress_loops, wmark_check), TP_STRUCT__entry( __field( int, node) __field( int, zone_idx) __field( int, order) __field( unsigned long, reclaimable) __field( unsigned long, available) __field( unsigned long, min_wmark) __field( int, no_progress_loops) __field( bool, wmark_check) ), TP_fast_assign( __entry->node = zonelist_node_idx(zoneref); __entry->zone_idx = zonelist_zone_idx(zoneref); __entry->order = order; __entry->reclaimable = reclaimable; __entry->available = available; __entry->min_wmark = min_wmark; __entry->no_progress_loops = no_progress_loops; __entry->wmark_check = wmark_check; ), TP_printk("node=%d zone=%-8s order=%d reclaimable=%lu available=%lu min_wmark=%lu no_progress_loops=%d wmark_check=%d", __entry->node, __print_symbolic(__entry->zone_idx, ZONE_TYPE), __entry->order, __entry->reclaimable, __entry->available, __entry->min_wmark, __entry->no_progress_loops, __entry->wmark_check) ); TRACE_EVENT(mark_victim, TP_PROTO(struct task_struct *task, uid_t uid), TP_ARGS(task, uid), TP_STRUCT__entry( __field(int, pid) __string(comm, task->comm) __field(unsigned long, total_vm) __field(unsigned long, anon_rss) __field(unsigned long, file_rss) __field(unsigned long, shmem_rss) __field(uid_t, uid) __field(unsigned long, pgtables) __field(short, oom_score_adj) ), TP_fast_assign( __entry->pid = task->pid; __assign_str(comm); __entry->total_vm = PG_COUNT_TO_KB(task->mm->total_vm); __entry->anon_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_ANONPAGES)); __entry->file_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_FILEPAGES)); __entry->shmem_rss = PG_COUNT_TO_KB(get_mm_counter(task->mm, MM_SHMEMPAGES)); __entry->uid = uid; __entry->pgtables = mm_pgtables_bytes(task->mm) >> 10; __entry->oom_score_adj = task->signal->oom_score_adj; ), TP_printk("pid=%d comm=%s total-vm=%lukB anon-rss=%lukB file-rss:%lukB shmem-rss:%lukB uid=%u pgtables=%lukB oom_score_adj=%hd", __entry->pid, __get_str(comm), __entry->total_vm, __entry->anon_rss, __entry->file_rss, __entry->shmem_rss, __entry->uid, __entry->pgtables, __entry->oom_score_adj ) ); TRACE_EVENT(wake_reaper, TP_PROTO(int pid), TP_ARGS(pid), TP_STRUCT__entry( __field(int, pid) ), TP_fast_assign( __entry->pid = pid; ), TP_printk("pid=%d", __entry->pid) ); TRACE_EVENT(start_task_reaping, TP_PROTO(int pid), TP_ARGS(pid), TP_STRUCT__entry( __field(int, pid) ), TP_fast_assign( __entry->pid = pid; ), TP_printk("pid=%d", __entry->pid) ); TRACE_EVENT(finish_task_reaping, TP_PROTO(int pid), TP_ARGS(pid), TP_STRUCT__entry( __field(int, pid) ), TP_fast_assign( __entry->pid = pid; ), TP_printk("pid=%d", __entry->pid) ); TRACE_EVENT(skip_task_reaping, TP_PROTO(int pid), TP_ARGS(pid), TP_STRUCT__entry( __field(int, pid) ), TP_fast_assign( __entry->pid = pid; ), TP_printk("pid=%d", __entry->pid) ); #ifdef CONFIG_COMPACTION TRACE_EVENT(compact_retry, TP_PROTO(int order, enum compact_priority priority, enum compact_result result, int retries, int max_retries, bool ret), TP_ARGS(order, priority, result, retries, max_retries, ret), TP_STRUCT__entry( __field( int, order) __field( int, priority) __field( int, result) __field( int, retries) __field( int, max_retries) __field( bool, ret) ), TP_fast_assign( __entry->order = order; __entry->priority = priority; __entry->result = compact_result_to_feedback(result); __entry->retries = retries; __entry->max_retries = max_retries; __entry->ret = ret; ), TP_printk("order=%d priority=%s compaction_result=%s retries=%d max_retries=%d should_retry=%d", __entry->order, __print_symbolic(__entry->priority, COMPACTION_PRIORITY), __print_symbolic(__entry->result, COMPACTION_FEEDBACK), __entry->retries, __entry->max_retries, __entry->ret) ); #endif /* CONFIG_COMPACTION */ #endif /* This part must be outside protection */ #include <trace/define_trace.h>
23 23 22 2 2 2 22 22 22 16 19 2 2 18 21 22 22 20 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 // SPDX-License-Identifier: GPL-2.0-or-later /* * SHA-3, as specified in * https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.202.pdf * * SHA-3 code by Jeff Garzik <jeff@garzik.org> * Ard Biesheuvel <ard.biesheuvel@linaro.org> * David Howells <dhowells@redhat.com> * * See also Documentation/crypto/sha3.rst */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <crypto/sha3.h> #include <crypto/utils.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/unaligned.h> #include "fips.h" /* * On some 32-bit architectures, such as h8300, GCC ends up using over 1 KB of * stack if the round calculation gets inlined into the loop in * sha3_keccakf_generic(). On the other hand, on 64-bit architectures with * plenty of [64-bit wide] general purpose registers, not inlining it severely * hurts performance. So let's use 64-bitness as a heuristic to decide whether * to inline or not. */ #ifdef CONFIG_64BIT #define SHA3_INLINE inline #else #define SHA3_INLINE noinline #endif #define SHA3_KECCAK_ROUNDS 24 static const u64 sha3_keccakf_rndc[SHA3_KECCAK_ROUNDS] = { 0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808aULL, 0x8000000080008000ULL, 0x000000000000808bULL, 0x0000000080000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL, 0x000000000000008aULL, 0x0000000000000088ULL, 0x0000000080008009ULL, 0x000000008000000aULL, 0x000000008000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL, 0x8000000000008002ULL, 0x8000000000000080ULL, 0x000000000000800aULL, 0x800000008000000aULL, 0x8000000080008081ULL, 0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL }; /* * Perform a single round of Keccak mixing. */ static SHA3_INLINE void sha3_keccakf_one_round_generic(u64 st[25], int round) { u64 t[5], tt, bc[5]; /* Theta */ bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20]; bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21]; bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22]; bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23]; bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24]; t[0] = bc[4] ^ rol64(bc[1], 1); t[1] = bc[0] ^ rol64(bc[2], 1); t[2] = bc[1] ^ rol64(bc[3], 1); t[3] = bc[2] ^ rol64(bc[4], 1); t[4] = bc[3] ^ rol64(bc[0], 1); st[0] ^= t[0]; /* Rho Pi */ tt = st[1]; st[ 1] = rol64(st[ 6] ^ t[1], 44); st[ 6] = rol64(st[ 9] ^ t[4], 20); st[ 9] = rol64(st[22] ^ t[2], 61); st[22] = rol64(st[14] ^ t[4], 39); st[14] = rol64(st[20] ^ t[0], 18); st[20] = rol64(st[ 2] ^ t[2], 62); st[ 2] = rol64(st[12] ^ t[2], 43); st[12] = rol64(st[13] ^ t[3], 25); st[13] = rol64(st[19] ^ t[4], 8); st[19] = rol64(st[23] ^ t[3], 56); st[23] = rol64(st[15] ^ t[0], 41); st[15] = rol64(st[ 4] ^ t[4], 27); st[ 4] = rol64(st[24] ^ t[4], 14); st[24] = rol64(st[21] ^ t[1], 2); st[21] = rol64(st[ 8] ^ t[3], 55); st[ 8] = rol64(st[16] ^ t[1], 45); st[16] = rol64(st[ 5] ^ t[0], 36); st[ 5] = rol64(st[ 3] ^ t[3], 28); st[ 3] = rol64(st[18] ^ t[3], 21); st[18] = rol64(st[17] ^ t[2], 15); st[17] = rol64(st[11] ^ t[1], 10); st[11] = rol64(st[ 7] ^ t[2], 6); st[ 7] = rol64(st[10] ^ t[0], 3); st[10] = rol64( tt ^ t[1], 1); /* Chi */ bc[ 0] = ~st[ 1] & st[ 2]; bc[ 1] = ~st[ 2] & st[ 3]; bc[ 2] = ~st[ 3] & st[ 4]; bc[ 3] = ~st[ 4] & st[ 0]; bc[ 4] = ~st[ 0] & st[ 1]; st[ 0] ^= bc[ 0]; st[ 1] ^= bc[ 1]; st[ 2] ^= bc[ 2]; st[ 3] ^= bc[ 3]; st[ 4] ^= bc[ 4]; bc[ 0] = ~st[ 6] & st[ 7]; bc[ 1] = ~st[ 7] & st[ 8]; bc[ 2] = ~st[ 8] & st[ 9]; bc[ 3] = ~st[ 9] & st[ 5]; bc[ 4] = ~st[ 5] & st[ 6]; st[ 5] ^= bc[ 0]; st[ 6] ^= bc[ 1]; st[ 7] ^= bc[ 2]; st[ 8] ^= bc[ 3]; st[ 9] ^= bc[ 4]; bc[ 0] = ~st[11] & st[12]; bc[ 1] = ~st[12] & st[13]; bc[ 2] = ~st[13] & st[14]; bc[ 3] = ~st[14] & st[10]; bc[ 4] = ~st[10] & st[11]; st[10] ^= bc[ 0]; st[11] ^= bc[ 1]; st[12] ^= bc[ 2]; st[13] ^= bc[ 3]; st[14] ^= bc[ 4]; bc[ 0] = ~st[16] & st[17]; bc[ 1] = ~st[17] & st[18]; bc[ 2] = ~st[18] & st[19]; bc[ 3] = ~st[19] & st[15]; bc[ 4] = ~st[15] & st[16]; st[15] ^= bc[ 0]; st[16] ^= bc[ 1]; st[17] ^= bc[ 2]; st[18] ^= bc[ 3]; st[19] ^= bc[ 4]; bc[ 0] = ~st[21] & st[22]; bc[ 1] = ~st[22] & st[23]; bc[ 2] = ~st[23] & st[24]; bc[ 3] = ~st[24] & st[20]; bc[ 4] = ~st[20] & st[21]; st[20] ^= bc[ 0]; st[21] ^= bc[ 1]; st[22] ^= bc[ 2]; st[23] ^= bc[ 3]; st[24] ^= bc[ 4]; /* Iota */ st[0] ^= sha3_keccakf_rndc[round]; } /* Generic implementation of the Keccak-f[1600] permutation */ static void sha3_keccakf_generic(struct sha3_state *state) { /* * Temporarily convert the state words from little-endian to native- * endian so that they can be operated on. Note that on little-endian * machines this conversion is a no-op and is optimized out. */ for (int i = 0; i < ARRAY_SIZE(state->words); i++) state->native_words[i] = le64_to_cpu(state->words[i]); for (int round = 0; round < SHA3_KECCAK_ROUNDS; round++) sha3_keccakf_one_round_generic(state->native_words, round); for (int i = 0; i < ARRAY_SIZE(state->words); i++) state->words[i] = cpu_to_le64(state->native_words[i]); } /* * Generic implementation of absorbing the given nonzero number of full blocks * into the sponge function Keccak[r=8*block_size, c=1600-8*block_size]. */ static void __maybe_unused sha3_absorb_blocks_generic(struct sha3_state *state, const u8 *data, size_t nblocks, size_t block_size) { do { for (size_t i = 0; i < block_size; i += 8) state->words[i / 8] ^= get_unaligned((__le64 *)&data[i]); sha3_keccakf_generic(state); data += block_size; } while (--nblocks); } #ifdef CONFIG_CRYPTO_LIB_SHA3_ARCH #include "sha3.h" /* $(SRCARCH)/sha3.h */ #else #define sha3_keccakf sha3_keccakf_generic #define sha3_absorb_blocks sha3_absorb_blocks_generic #endif void __sha3_update(struct __sha3_ctx *ctx, const u8 *in, size_t in_len) { const size_t block_size = ctx->block_size; size_t absorb_offset = ctx->absorb_offset; /* Warn if squeezing has already begun. */ WARN_ON_ONCE(absorb_offset >= block_size); if (absorb_offset && absorb_offset + in_len >= block_size) { crypto_xor(&ctx->state.bytes[absorb_offset], in, block_size - absorb_offset); in += block_size - absorb_offset; in_len -= block_size - absorb_offset; sha3_keccakf(&ctx->state); absorb_offset = 0; } if (in_len >= block_size) { size_t nblocks = in_len / block_size; sha3_absorb_blocks(&ctx->state, in, nblocks, block_size); in += nblocks * block_size; in_len -= nblocks * block_size; } if (in_len) { crypto_xor(&ctx->state.bytes[absorb_offset], in, in_len); absorb_offset += in_len; } ctx->absorb_offset = absorb_offset; } EXPORT_SYMBOL_GPL(__sha3_update); void sha3_final(struct sha3_ctx *sha3_ctx, u8 *out) { struct __sha3_ctx *ctx = &sha3_ctx->ctx; ctx->state.bytes[ctx->absorb_offset] ^= 0x06; ctx->state.bytes[ctx->block_size - 1] ^= 0x80; sha3_keccakf(&ctx->state); memcpy(out, ctx->state.bytes, ctx->digest_size); sha3_zeroize_ctx(sha3_ctx); } EXPORT_SYMBOL_GPL(sha3_final); void shake_squeeze(struct shake_ctx *shake_ctx, u8 *out, size_t out_len) { struct __sha3_ctx *ctx = &shake_ctx->ctx; const size_t block_size = ctx->block_size; size_t squeeze_offset = ctx->squeeze_offset; if (ctx->absorb_offset < block_size) { /* First squeeze: */ /* Add the domain separation suffix and padding. */ ctx->state.bytes[ctx->absorb_offset] ^= 0x1f; ctx->state.bytes[block_size - 1] ^= 0x80; /* Indicate that squeezing has begun. */ ctx->absorb_offset = block_size; /* * Indicate that no output is pending yet, i.e. sha3_keccakf() * will need to be called before the first copy. */ squeeze_offset = block_size; } while (out_len) { if (squeeze_offset == block_size) { sha3_keccakf(&ctx->state); squeeze_offset = 0; } size_t copy = min(out_len, block_size - squeeze_offset); memcpy(out, &ctx->state.bytes[squeeze_offset], copy); out += copy; out_len -= copy; squeeze_offset += copy; } ctx->squeeze_offset = squeeze_offset; } EXPORT_SYMBOL_GPL(shake_squeeze); #ifndef sha3_224_arch static inline bool sha3_224_arch(const u8 *in, size_t in_len, u8 out[SHA3_224_DIGEST_SIZE]) { return false; } #endif #ifndef sha3_256_arch static inline bool sha3_256_arch(const u8 *in, size_t in_len, u8 out[SHA3_256_DIGEST_SIZE]) { return false; } #endif #ifndef sha3_384_arch static inline bool sha3_384_arch(const u8 *in, size_t in_len, u8 out[SHA3_384_DIGEST_SIZE]) { return false; } #endif #ifndef sha3_512_arch static inline bool sha3_512_arch(const u8 *in, size_t in_len, u8 out[SHA3_512_DIGEST_SIZE]) { return false; } #endif void sha3_224(const u8 *in, size_t in_len, u8 out[SHA3_224_DIGEST_SIZE]) { struct sha3_ctx ctx; if (sha3_224_arch(in, in_len, out)) return; sha3_224_init(&ctx); sha3_update(&ctx, in, in_len); sha3_final(&ctx, out); } EXPORT_SYMBOL_GPL(sha3_224); void sha3_256(const u8 *in, size_t in_len, u8 out[SHA3_256_DIGEST_SIZE]) { struct sha3_ctx ctx; if (sha3_256_arch(in, in_len, out)) return; sha3_256_init(&ctx); sha3_update(&ctx, in, in_len); sha3_final(&ctx, out); } EXPORT_SYMBOL_GPL(sha3_256); void sha3_384(const u8 *in, size_t in_len, u8 out[SHA3_384_DIGEST_SIZE]) { struct sha3_ctx ctx; if (sha3_384_arch(in, in_len, out)) return; sha3_384_init(&ctx); sha3_update(&ctx, in, in_len); sha3_final(&ctx, out); } EXPORT_SYMBOL_GPL(sha3_384); void sha3_512(const u8 *in, size_t in_len, u8 out[SHA3_512_DIGEST_SIZE]) { struct sha3_ctx ctx; if (sha3_512_arch(in, in_len, out)) return; sha3_512_init(&ctx); sha3_update(&ctx, in, in_len); sha3_final(&ctx, out); } EXPORT_SYMBOL_GPL(sha3_512); void shake128(const u8 *in, size_t in_len, u8 *out, size_t out_len) { struct shake_ctx ctx; shake128_init(&ctx); shake_update(&ctx, in, in_len); shake_squeeze(&ctx, out, out_len); shake_zeroize_ctx(&ctx); } EXPORT_SYMBOL_GPL(shake128); void shake256(const u8 *in, size_t in_len, u8 *out, size_t out_len) { struct shake_ctx ctx; shake256_init(&ctx); shake_update(&ctx, in, in_len); shake_squeeze(&ctx, out, out_len); shake_zeroize_ctx(&ctx); } EXPORT_SYMBOL_GPL(shake256); #if defined(sha3_mod_init_arch) || defined(CONFIG_CRYPTO_FIPS) static int __init sha3_mod_init(void) { #ifdef sha3_mod_init_arch sha3_mod_init_arch(); #endif if (fips_enabled) { /* * FIPS cryptographic algorithm self-test. As per the FIPS * Implementation Guidance, testing any SHA-3 algorithm * satisfies the test requirement for all of them. */ u8 hash[SHA3_256_DIGEST_SIZE]; sha3_256(fips_test_data, sizeof(fips_test_data), hash); if (memcmp(fips_test_sha3_256_value, hash, sizeof(hash)) != 0) panic("sha3: FIPS self-test failed\n"); } return 0; } subsys_initcall(sha3_mod_init); static void __exit sha3_mod_exit(void) { } module_exit(sha3_mod_exit); #endif MODULE_DESCRIPTION("SHA-3 library functions"); MODULE_LICENSE("GPL");
16 5 5 9 10 9 5 3 124 124 4 12 5 3 3 5 4 126 126 127 14 14 1 1 1 1 11 2 3 3 3 6 3 3 1 3 1 11 21 22 22 1 22 4 4 4 4 4 4 4 4 4 4 4 3 1 14 14 1 1 9 2 3 5 4 8 1 2 6 8 5 3 4 4 94 55 1 38 38 1 30 6 6 6 36 9 28 37 37 22 1 1 1 1 19 19 19 7 12 19 18 18 18 18 18 4 17 2 6 70 1 38 1 2 81 50 38 143 145 1 144 1 53 33 25 1 45 4 5 49 1 2 35 11 9 16 90 9 130 129 12 8 2 129 2 132 22 111 128 4 42 89 129 110 22 15 1 130 94 22 3 10 94 131 131 30 7 36 1 2 1 1 1 1 1 1 8 1 2 1 1 1 1 122 3 1 3 115 9 1 1 1 1 1 3 45 2 1 5 38 5 15 1 2 1 12 9 6 3 9 11 10 1 4 4 3 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 // SPDX-License-Identifier: GPL-2.0-or-later /* * RAW sockets for IPv6 * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * * Adapted from linux/net/ipv4/raw.c * * Fixes: * Hideaki YOSHIFUJI : sin6_scope_id support * YOSHIFUJI,H.@USAGI : raw checksum (RFC2292(bis) compliance) * Kazunori MIYAZAWA @USAGI: change process style to use ip6_append_data */ #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/slab.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/icmpv6.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <linux/skbuff.h> #include <linux/compat.h> #include <linux/uaccess.h> #include <asm/ioctls.h> #include <net/net_namespace.h> #include <net/ip.h> #include <net/sock.h> #include <net/snmp.h> #include <net/ipv6.h> #include <net/ndisc.h> #include <net/protocol.h> #include <net/ip6_route.h> #include <net/ip6_checksum.h> #include <net/addrconf.h> #include <net/transp_v6.h> #include <net/udp.h> #include <net/inet_common.h> #include <net/tcp_states.h> #if IS_ENABLED(CONFIG_IPV6_MIP6) #include <net/mip6.h> #endif #include <linux/mroute6.h> #include <net/raw.h> #include <net/rawv6.h> #include <net/xfrm.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/export.h> #define ICMPV6_HDRLEN 4 /* ICMPv6 header, RFC 4443 Section 2.1 */ struct raw_hashinfo raw_v6_hashinfo; EXPORT_SYMBOL_GPL(raw_v6_hashinfo); bool raw_v6_match(struct net *net, const struct sock *sk, unsigned short num, const struct in6_addr *loc_addr, const struct in6_addr *rmt_addr, int dif, int sdif) { if (inet_sk(sk)->inet_num != num || !net_eq(sock_net(sk), net) || (!ipv6_addr_any(&sk->sk_v6_daddr) && !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) || !raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) return false; if (ipv6_addr_any(&sk->sk_v6_rcv_saddr) || ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr) || (ipv6_addr_is_multicast(loc_addr) && inet6_mc_check(sk, loc_addr, rmt_addr))) return true; return false; } EXPORT_SYMBOL_GPL(raw_v6_match); /* * 0 - deliver * 1 - block */ static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb) { struct icmp6hdr _hdr; const struct icmp6hdr *hdr; /* We require only the four bytes of the ICMPv6 header, not any * additional bytes of message body in "struct icmp6hdr". */ hdr = skb_header_pointer(skb, skb_transport_offset(skb), ICMPV6_HDRLEN, &_hdr); if (hdr) { const __u32 *data = &raw6_sk(sk)->filter.data[0]; unsigned int type = hdr->icmp6_type; return (data[type >> 5] & (1U << (type & 31))) != 0; } return 1; } #if IS_ENABLED(CONFIG_IPV6_MIP6) typedef int mh_filter_t(struct sock *sock, struct sk_buff *skb); static mh_filter_t __rcu *mh_filter __read_mostly; int rawv6_mh_filter_register(mh_filter_t filter) { rcu_assign_pointer(mh_filter, filter); return 0; } EXPORT_SYMBOL(rawv6_mh_filter_register); int rawv6_mh_filter_unregister(mh_filter_t filter) { RCU_INIT_POINTER(mh_filter, NULL); synchronize_rcu(); return 0; } EXPORT_SYMBOL(rawv6_mh_filter_unregister); #endif /* * demultiplex raw sockets. * (should consider queueing the skb in the sock receive_queue * without calling rawv6.c) * * Caller owns SKB so we must make clones. */ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) { struct net *net = dev_net(skb->dev); const struct in6_addr *saddr; const struct in6_addr *daddr; struct hlist_head *hlist; struct sock *sk; bool delivered = false; __u8 hash; saddr = &ipv6_hdr(skb)->saddr; daddr = saddr + 1; hash = raw_hashfunc(net, nexthdr); hlist = &raw_v6_hashinfo.ht[hash]; rcu_read_lock(); sk_for_each_rcu(sk, hlist) { int filtered; if (!raw_v6_match(net, sk, nexthdr, daddr, saddr, inet6_iif(skb), inet6_sdif(skb))) continue; if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) { sk_drops_inc(sk); continue; } delivered = true; switch (nexthdr) { case IPPROTO_ICMPV6: filtered = icmpv6_filter(sk, skb); break; #if IS_ENABLED(CONFIG_IPV6_MIP6) case IPPROTO_MH: { /* XXX: To validate MH only once for each packet, * this is placed here. It should be after checking * xfrm policy, however it doesn't. The checking xfrm * policy is placed in rawv6_rcv() because it is * required for each socket. */ mh_filter_t *filter; filter = rcu_dereference(mh_filter); filtered = filter ? (*filter)(sk, skb) : 0; break; } #endif default: filtered = 0; break; } if (filtered < 0) break; if (filtered == 0) { struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); /* Not releasing hash table! */ if (clone) rawv6_rcv(sk, clone); } } rcu_read_unlock(); return delivered; } bool raw6_local_deliver(struct sk_buff *skb, int nexthdr) { return ipv6_raw_deliver(skb, nexthdr); } /* This cleans up af_inet6 a bit. -DaveM */ static int rawv6_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr; __be32 v4addr = 0; int addr_type; int err; if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; if (addr->sin6_family != AF_INET6) return -EINVAL; addr_type = ipv6_addr_type(&addr->sin6_addr); /* Raw sockets are IPv6 only */ if (addr_type == IPV6_ADDR_MAPPED) return -EADDRNOTAVAIL; lock_sock(sk); err = -EINVAL; if (sk->sk_state != TCP_CLOSE) goto out; rcu_read_lock(); /* Check if the address belongs to the host. */ if (addr_type != IPV6_ADDR_ANY) { struct net_device *dev = NULL; if (__ipv6_addr_needs_scope_id(addr_type)) { if (addr_len >= sizeof(struct sockaddr_in6) && addr->sin6_scope_id) { /* Override any existing binding, if another * one is supplied by user. */ sk->sk_bound_dev_if = addr->sin6_scope_id; } /* Binding to link-local address requires an interface */ if (!sk->sk_bound_dev_if) goto out_unlock; } if (sk->sk_bound_dev_if) { err = -ENODEV; dev = dev_get_by_index_rcu(sock_net(sk), sk->sk_bound_dev_if); if (!dev) goto out_unlock; } /* ipv4 addr of the socket is invalid. Only the * unspecified and mapped address have a v4 equivalent. */ v4addr = LOOPBACK4_IPV6; if (!(addr_type & IPV6_ADDR_MULTICAST) && !ipv6_can_nonlocal_bind(sock_net(sk), inet)) { err = -EADDRNOTAVAIL; if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr, dev, 0)) { goto out_unlock; } } } inet->inet_rcv_saddr = inet->inet_saddr = v4addr; sk->sk_v6_rcv_saddr = addr->sin6_addr; if (!(addr_type & IPV6_ADDR_MULTICAST)) np->saddr = addr->sin6_addr; err = 0; out_unlock: rcu_read_unlock(); out: release_sock(sk); return err; } static void rawv6_err(struct sock *sk, struct sk_buff *skb, u8 type, u8 code, int offset, __be32 info) { bool recverr = inet6_test_bit(RECVERR6, sk); struct ipv6_pinfo *np = inet6_sk(sk); int err; int harderr; /* Report error on raw socket, if: 1. User requested recverr. 2. Socket is connected (otherwise the error indication is useless without recverr and error is hard. */ if (!recverr && sk->sk_state != TCP_ESTABLISHED) return; harderr = icmpv6_err_convert(type, code, &err); if (type == ICMPV6_PKT_TOOBIG) { ip6_sk_update_pmtu(skb, sk, info); harderr = (READ_ONCE(np->pmtudisc) == IPV6_PMTUDISC_DO); } if (type == NDISC_REDIRECT) { ip6_sk_redirect(skb, sk); return; } if (recverr) { u8 *payload = skb->data; if (!inet_test_bit(HDRINCL, sk)) payload += offset; ipv6_icmp_error(sk, skb, err, 0, ntohl(info), payload); } if (recverr || harderr) { sk->sk_err = err; sk_error_report(sk); } } void raw6_icmp_error(struct sk_buff *skb, int nexthdr, u8 type, u8 code, int inner_offset, __be32 info) { struct net *net = dev_net(skb->dev); struct hlist_head *hlist; struct sock *sk; int hash; hash = raw_hashfunc(net, nexthdr); hlist = &raw_v6_hashinfo.ht[hash]; rcu_read_lock(); sk_for_each_rcu(sk, hlist) { /* Note: ipv6_hdr(skb) != skb->data */ const struct ipv6hdr *ip6h = (const struct ipv6hdr *)skb->data; if (!raw_v6_match(net, sk, nexthdr, &ip6h->saddr, &ip6h->daddr, inet6_iif(skb), inet6_iif(skb))) continue; rawv6_err(sk, skb, type, code, inner_offset, info); } rcu_read_unlock(); } static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb) { enum skb_drop_reason reason; if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) && skb_checksum_complete(skb)) { sk_drops_inc(sk); sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_SKB_CSUM); return NET_RX_DROP; } /* Charge it to the socket. */ skb_dst_drop(skb); if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { sk_skb_reason_drop(sk, skb, reason); return NET_RX_DROP; } return 0; } /* * This is next to useless... * if we demultiplex in network layer we don't need the extra call * just to queue the skb... * maybe we could have the network decide upon a hint if it * should call raw_rcv for demultiplexing */ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) { struct inet_sock *inet = inet_sk(sk); struct raw6_sock *rp = raw6_sk(sk); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { sk_drops_inc(sk); sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_XFRM_POLICY); return NET_RX_DROP; } nf_reset_ct(skb); if (!rp->checksum) skb->ip_summed = CHECKSUM_UNNECESSARY; if (skb->ip_summed == CHECKSUM_COMPLETE) { skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); if (!csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->len, inet->inet_num, skb->csum)) skb->ip_summed = CHECKSUM_UNNECESSARY; } if (!skb_csum_unnecessary(skb)) skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, skb->len, inet->inet_num, 0)); if (inet_test_bit(HDRINCL, sk)) { if (skb_checksum_complete(skb)) { sk_drops_inc(sk); sk_skb_reason_drop(sk, skb, SKB_DROP_REASON_SKB_CSUM); return NET_RX_DROP; } } rawv6_rcv_skb(sk, skb); return 0; } /* * This should be easy, if there is something there * we return it, otherwise we block. */ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); struct sk_buff *skb; size_t copied; int err; if (flags & MSG_OOB) return -EOPNOTSUPP; if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len, addr_len); if (np->rxopt.bits.rxpmtu && READ_ONCE(np->rxpmtu)) return ipv6_recv_rxpmtu(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; copied = skb->len; if (copied > len) { copied = len; msg->msg_flags |= MSG_TRUNC; } if (skb_csum_unnecessary(skb)) { err = skb_copy_datagram_msg(skb, 0, msg, copied); } else if (msg->msg_flags&MSG_TRUNC) { if (__skb_checksum_complete(skb)) goto csum_copy_err; err = skb_copy_datagram_msg(skb, 0, msg, copied); } else { err = skb_copy_and_csum_datagram_msg(skb, 0, msg); if (err == -EINVAL) goto csum_copy_err; } if (err) goto out_free; /* Copy the address. */ if (sin6) { sin6->sin6_family = AF_INET6; sin6->sin6_port = 0; sin6->sin6_addr = ipv6_hdr(skb)->saddr; sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, inet6_iif(skb)); *addr_len = sizeof(*sin6); } sock_recv_cmsgs(msg, sk, skb); if (np->rxopt.all) ip6_datagram_recv_ctl(sk, msg, skb); err = copied; if (flags & MSG_TRUNC) err = skb->len; out_free: skb_free_datagram(sk, skb); out: return err; csum_copy_err: skb_kill_datagram(sk, skb, flags); /* Error for blocking case is chosen to masquerade as some normal condition. */ err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH; goto out; } static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, struct raw6_sock *rp) { struct ipv6_txoptions *opt; struct sk_buff *skb; int err = 0; int offset; int len; int total_len; __wsum tmp_csum; __sum16 csum; if (!rp->checksum) goto send; skb = skb_peek(&sk->sk_write_queue); if (!skb) goto out; offset = rp->offset; total_len = inet_sk(sk)->cork.base.length; opt = inet6_sk(sk)->cork.opt; total_len -= opt ? opt->opt_flen : 0; if (offset >= total_len - 1) { err = -EINVAL; ip6_flush_pending_frames(sk); goto out; } /* should be check HW csum miyazawa */ if (skb_queue_len(&sk->sk_write_queue) == 1) { /* * Only one fragment on the socket. */ tmp_csum = skb->csum; } else { struct sk_buff *csum_skb = NULL; tmp_csum = 0; skb_queue_walk(&sk->sk_write_queue, skb) { tmp_csum = csum_add(tmp_csum, skb->csum); if (csum_skb) continue; len = skb->len - skb_transport_offset(skb); if (offset >= len) { offset -= len; continue; } csum_skb = skb; } skb = csum_skb; } offset += skb_transport_offset(skb); err = skb_copy_bits(skb, offset, &csum, 2); if (err < 0) { ip6_flush_pending_frames(sk); goto out; } /* in case cksum was not initialized */ if (unlikely(csum)) tmp_csum = csum_sub(tmp_csum, csum_unfold(csum)); csum = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, total_len, fl6->flowi6_proto, tmp_csum); if (csum == 0 && fl6->flowi6_proto == IPPROTO_UDP) csum = CSUM_MANGLED_0; BUG_ON(skb_store_bits(skb, offset, &csum, 2)); send: err = ip6_push_pending_frames(sk); out: return err; } static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, struct flowi6 *fl6, struct dst_entry **dstp, unsigned int flags, const struct sockcm_cookie *sockc) { struct net *net = sock_net(sk); struct ipv6hdr *iph; struct sk_buff *skb; int err; struct rt6_info *rt = dst_rt6_info(*dstp); int hlen = LL_RESERVED_SPACE(rt->dst.dev); int tlen = rt->dst.dev->needed_tailroom; if (length > rt->dst.dev->mtu) { ipv6_local_error(sk, EMSGSIZE, fl6, rt->dst.dev->mtu); return -EMSGSIZE; } if (length < sizeof(struct ipv6hdr)) return -EINVAL; if (flags&MSG_PROBE) goto out; skb = sock_alloc_send_skb(sk, length + hlen + tlen + 15, flags & MSG_DONTWAIT, &err); if (!skb) goto error; skb_reserve(skb, hlen); skb->protocol = htons(ETH_P_IPV6); skb->priority = sockc->priority; skb->mark = sockc->mark; skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid); skb_put(skb, length); skb_reset_network_header(skb); iph = ipv6_hdr(skb); skb->ip_summed = CHECKSUM_NONE; skb_setup_tx_timestamp(skb, sockc); if (flags & MSG_CONFIRM) skb_set_dst_pending_confirm(skb, 1); skb->transport_header = skb->network_header; err = memcpy_from_msg(iph, msg, length); if (err) { err = -EFAULT; kfree_skb(skb); goto error; } skb_dst_set(skb, &rt->dst); *dstp = NULL; /* if egress device is enslaved to an L3 master device pass the * skb to its handler for processing */ skb = l3mdev_ip6_out(sk, skb); if (unlikely(!skb)) return 0; /* Acquire rcu_read_lock() in case we need to use rt->rt6i_idev * in the error path. Since skb has been freed, the dst could * have been queued for deletion. */ rcu_read_lock(); IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, rt->dst.dev, dst_output); if (err > 0) err = net_xmit_errno(err); if (err) { IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); rcu_read_unlock(); goto error_check; } rcu_read_unlock(); out: return 0; error: IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); error_check: if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk)) err = 0; return err; } struct raw6_frag_vec { struct msghdr *msg; int hlen; char c[4]; }; static int rawv6_probe_proto_opt(struct raw6_frag_vec *rfv, struct flowi6 *fl6) { int err = 0; switch (fl6->flowi6_proto) { case IPPROTO_ICMPV6: rfv->hlen = 2; err = memcpy_from_msg(rfv->c, rfv->msg, rfv->hlen); if (!err) { fl6->fl6_icmp_type = rfv->c[0]; fl6->fl6_icmp_code = rfv->c[1]; } break; case IPPROTO_MH: rfv->hlen = 4; err = memcpy_from_msg(rfv->c, rfv->msg, rfv->hlen); if (!err) fl6->fl6_mh_type = rfv->c[2]; } return err; } static int raw6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) { struct raw6_frag_vec *rfv = from; if (offset < rfv->hlen) { int copy = min(rfv->hlen - offset, len); if (skb->ip_summed == CHECKSUM_PARTIAL) memcpy(to, rfv->c + offset, copy); else skb->csum = csum_block_add( skb->csum, csum_partial_copy_nocheck(rfv->c + offset, to, copy), odd); odd = 0; offset += copy; to += copy; len -= copy; if (!len) return 0; } offset -= rfv->hlen; return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb); } static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct ipv6_txoptions *opt_to_free = NULL; struct ipv6_txoptions opt_space; DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); struct in6_addr *daddr, *final_p, final; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct raw6_sock *rp = raw6_sk(sk); struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; struct dst_entry *dst = NULL; struct raw6_frag_vec rfv; struct flowi6 fl6; struct ipcm6_cookie ipc6; int addr_len = msg->msg_namelen; int hdrincl; u16 proto; int err; /* Rough check on arithmetic overflow, better check is made in ip6_append_data(). */ if (len > INT_MAX) return -EMSGSIZE; /* Mirror BSD error message compatibility */ if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; hdrincl = inet_test_bit(HDRINCL, sk); ipcm6_init_sk(&ipc6, sk); /* * Get and verify the address. */ memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = ipc6.sockc.mark; fl6.flowi6_uid = sk_uid(sk); if (sin6) { if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; if (sin6->sin6_family && sin6->sin6_family != AF_INET6) return -EAFNOSUPPORT; /* port is the proto value [0..255] carried in nexthdr */ proto = ntohs(sin6->sin6_port); if (!proto) proto = inet->inet_num; else if (proto != inet->inet_num && inet->inet_num != IPPROTO_RAW) return -EINVAL; if (proto > 255) return -EINVAL; daddr = &sin6->sin6_addr; if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (IS_ERR(flowlabel)) return -EINVAL; } } /* * Otherwise it will be difficult to maintain * sk->sk_dst_cache. */ if (sk->sk_state == TCP_ESTABLISHED && ipv6_addr_equal(daddr, &sk->sk_v6_daddr)) daddr = &sk->sk_v6_daddr; if (addr_len >= sizeof(struct sockaddr_in6) && sin6->sin6_scope_id && __ipv6_addr_needs_scope_id(__ipv6_addr_type(daddr))) fl6.flowi6_oif = sin6->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; proto = inet->inet_num; daddr = &sk->sk_v6_daddr; fl6.flowlabel = np->flow_label; } if (fl6.flowi6_oif == 0) fl6.flowi6_oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { opt = &opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); ipc6.opt = opt; err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6, &ipc6); if (err < 0) { fl6_sock_release(flowlabel); return err; } if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (IS_ERR(flowlabel)) return -EINVAL; } if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; } if (!opt) { opt = txopt_get(np); opt_to_free = opt; } if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); fl6.flowi6_proto = proto; fl6.flowi6_mark = ipc6.sockc.mark; if (!hdrincl) { rfv.msg = msg; rfv.hlen = 0; err = rawv6_probe_proto_opt(&rfv, &fl6); if (err) goto out; } if (!ipv6_addr_any(daddr)) fl6.daddr = *daddr; else fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */ if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) fl6.saddr = np->saddr; final_p = fl6_update_dst(&fl6, opt, &final); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) fl6.flowi6_oif = READ_ONCE(np->mcast_oif); else if (!fl6.flowi6_oif) fl6.flowi6_oif = READ_ONCE(np->ucast_oif); security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); if (hdrincl) fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH; fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto out; } if (ipc6.hlimit < 0) ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; back_from_confirm: if (hdrincl) err = rawv6_send_hdrinc(sk, msg, len, &fl6, &dst, msg->msg_flags, &ipc6.sockc); else { ipc6.opt = opt; lock_sock(sk); err = ip6_append_data(sk, raw6_getfrag, &rfv, len, 0, &ipc6, &fl6, dst_rt6_info(dst), msg->msg_flags); if (err) ip6_flush_pending_frames(sk); else if (!(msg->msg_flags & MSG_MORE)) err = rawv6_push_pending_frames(sk, &fl6, rp); release_sock(sk); } done: dst_release(dst); out: fl6_sock_release(flowlabel); txopt_put(opt_to_free); return err < 0 ? err : len; do_confirm: if (msg->msg_flags & MSG_PROBE) dst_confirm_neigh(dst, &fl6.daddr); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; goto done; } static int rawv6_seticmpfilter(struct sock *sk, int optname, sockptr_t optval, int optlen) { switch (optname) { case ICMPV6_FILTER: if (optlen > sizeof(struct icmp6_filter)) optlen = sizeof(struct icmp6_filter); if (copy_from_sockptr(&raw6_sk(sk)->filter, optval, optlen)) return -EFAULT; return 0; default: return -ENOPROTOOPT; } return 0; } static int rawv6_geticmpfilter(struct sock *sk, int optname, char __user *optval, int __user *optlen) { int len; switch (optname) { case ICMPV6_FILTER: if (get_user(len, optlen)) return -EFAULT; if (len < 0) return -EINVAL; if (len > sizeof(struct icmp6_filter)) len = sizeof(struct icmp6_filter); if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &raw6_sk(sk)->filter, len)) return -EFAULT; return 0; default: return -ENOPROTOOPT; } return 0; } static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { struct raw6_sock *rp = raw6_sk(sk); int val; if (optlen < sizeof(val)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; switch (optname) { case IPV6_HDRINCL: if (sk->sk_type != SOCK_RAW) return -EINVAL; inet_assign_bit(HDRINCL, sk, val); return 0; case IPV6_CHECKSUM: if (inet_sk(sk)->inet_num == IPPROTO_ICMPV6 && level == IPPROTO_IPV6) { /* * RFC3542 tells that IPV6_CHECKSUM socket * option in the IPPROTO_IPV6 level is not * allowed on ICMPv6 sockets. * If you want to set it, use IPPROTO_RAW * level IPV6_CHECKSUM socket option * (Linux extension). */ return -EINVAL; } /* You may get strange result with a positive odd offset; RFC2292bis agrees with me. */ if (val > 0 && (val&1)) return -EINVAL; if (val < 0) { rp->checksum = 0; } else { rp->checksum = 1; rp->offset = val; } return 0; default: return -ENOPROTOOPT; } } static int rawv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { switch (level) { case SOL_RAW: break; case SOL_ICMPV6: if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return -EOPNOTSUPP; return rawv6_seticmpfilter(sk, optname, optval, optlen); case SOL_IPV6: if (optname == IPV6_CHECKSUM || optname == IPV6_HDRINCL) break; fallthrough; default: return ipv6_setsockopt(sk, level, optname, optval, optlen); } return do_rawv6_setsockopt(sk, level, optname, optval, optlen); } static int do_rawv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { struct raw6_sock *rp = raw6_sk(sk); int val, len; if (get_user(len, optlen)) return -EFAULT; switch (optname) { case IPV6_HDRINCL: val = inet_test_bit(HDRINCL, sk); break; case IPV6_CHECKSUM: /* * We allow getsockopt() for IPPROTO_IPV6-level * IPV6_CHECKSUM socket option on ICMPv6 sockets * since RFC3542 is silent about it. */ if (rp->checksum == 0) val = -1; else val = rp->offset; break; default: return -ENOPROTOOPT; } len = min_t(unsigned int, sizeof(int), len); if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &val, len)) return -EFAULT; return 0; } static int rawv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { switch (level) { case SOL_RAW: break; case SOL_ICMPV6: if (inet_sk(sk)->inet_num != IPPROTO_ICMPV6) return -EOPNOTSUPP; return rawv6_geticmpfilter(sk, optname, optval, optlen); case SOL_IPV6: if (optname == IPV6_CHECKSUM || optname == IPV6_HDRINCL) break; fallthrough; default: return ipv6_getsockopt(sk, level, optname, optval, optlen); } return do_rawv6_getsockopt(sk, level, optname, optval, optlen); } static int rawv6_ioctl(struct sock *sk, int cmd, int *karg) { switch (cmd) { case SIOCOUTQ: { *karg = sk_wmem_alloc_get(sk); return 0; } case SIOCINQ: { struct sk_buff *skb; spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb) *karg = skb->len; else *karg = 0; spin_unlock_bh(&sk->sk_receive_queue.lock); return 0; } default: #ifdef CONFIG_IPV6_MROUTE return ip6mr_ioctl(sk, cmd, karg); #else return -ENOIOCTLCMD; #endif } } #ifdef CONFIG_COMPAT static int compat_rawv6_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) { switch (cmd) { case SIOCOUTQ: case SIOCINQ: return -ENOIOCTLCMD; default: #ifdef CONFIG_IPV6_MROUTE return ip6mr_compat_ioctl(sk, cmd, compat_ptr(arg)); #else return -ENOIOCTLCMD; #endif } } #endif static void rawv6_close(struct sock *sk, long timeout) { if (inet_sk(sk)->inet_num == IPPROTO_RAW) ip6_ra_control(sk, -1); ip6mr_sk_done(sk); sk_common_release(sk); } static void raw6_destroy(struct sock *sk) { lock_sock(sk); ip6_flush_pending_frames(sk); release_sock(sk); } static int rawv6_init_sk(struct sock *sk) { struct raw6_sock *rp = raw6_sk(sk); sk->sk_drop_counters = &rp->drop_counters; switch (inet_sk(sk)->inet_num) { case IPPROTO_ICMPV6: rp->checksum = 1; rp->offset = 2; break; case IPPROTO_MH: rp->checksum = 1; rp->offset = 4; break; default: break; } return 0; } struct proto rawv6_prot = { .name = "RAWv6", .owner = THIS_MODULE, .close = rawv6_close, .destroy = raw6_destroy, .connect = ip6_datagram_connect_v6_only, .disconnect = __udp_disconnect, .ioctl = rawv6_ioctl, .init = rawv6_init_sk, .setsockopt = rawv6_setsockopt, .getsockopt = rawv6_getsockopt, .sendmsg = rawv6_sendmsg, .recvmsg = rawv6_recvmsg, .bind = rawv6_bind, .backlog_rcv = rawv6_rcv_skb, .hash = raw_hash_sk, .unhash = raw_unhash_sk, .obj_size = sizeof(struct raw6_sock), .ipv6_pinfo_offset = offsetof(struct raw6_sock, inet6), .useroffset = offsetof(struct raw6_sock, filter), .usersize = sizeof_field(struct raw6_sock, filter), .h.raw_hash = &raw_v6_hashinfo, #ifdef CONFIG_COMPAT .compat_ioctl = compat_rawv6_ioctl, #endif .diag_destroy = raw_abort, }; #ifdef CONFIG_PROC_FS static int raw6_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) { seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); } else { struct sock *sp = v; __u16 srcp = inet_sk(sp)->inet_num; ip6_dgram_sock_seq_show(seq, v, srcp, 0, raw_seq_private(seq)->bucket); } return 0; } static const struct seq_operations raw6_seq_ops = { .start = raw_seq_start, .next = raw_seq_next, .stop = raw_seq_stop, .show = raw6_seq_show, }; static int __net_init raw6_init_net(struct net *net) { if (!proc_create_net_data("raw6", 0444, net->proc_net, &raw6_seq_ops, sizeof(struct raw_iter_state), &raw_v6_hashinfo)) return -ENOMEM; return 0; } static void __net_exit raw6_exit_net(struct net *net) { remove_proc_entry("raw6", net->proc_net); } static struct pernet_operations raw6_net_ops = { .init = raw6_init_net, .exit = raw6_exit_net, }; int __init raw6_proc_init(void) { return register_pernet_subsys(&raw6_net_ops); } void raw6_proc_exit(void) { unregister_pernet_subsys(&raw6_net_ops); } #endif /* CONFIG_PROC_FS */ /* Same as inet6_dgram_ops, sans udp_poll. */ const struct proto_ops inet6_sockraw_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, .bind = inet6_bind, .connect = inet_dgram_connect, /* ok */ .socketpair = sock_no_socketpair, /* a do nothing */ .accept = sock_no_accept, /* a do nothing */ .getname = inet6_getname, .poll = datagram_poll, /* ok */ .ioctl = inet6_ioctl, /* must change */ .gettstamp = sock_gettstamp, .listen = sock_no_listen, /* ok */ .shutdown = inet_shutdown, /* ok */ .setsockopt = sock_common_setsockopt, /* ok */ .getsockopt = sock_common_getsockopt, /* ok */ .sendmsg = inet_sendmsg, /* ok */ .recvmsg = sock_common_recvmsg, /* ok */ .mmap = sock_no_mmap, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, #endif }; static struct inet_protosw rawv6_protosw = { .type = SOCK_RAW, .protocol = IPPROTO_IP, /* wild card */ .prot = &rawv6_prot, .ops = &inet6_sockraw_ops, .flags = INET_PROTOSW_REUSE, }; int __init rawv6_init(void) { return inet6_register_protosw(&rawv6_protosw); } void rawv6_exit(void) { inet6_unregister_protosw(&rawv6_protosw); }
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 // SPDX-License-Identifier: GPL-2.0-or-later /* * HID driver for EVision devices * For now, only ignore bogus consumer reports * sent after the keyboard has been configured * * Copyright (c) 2022 Philippe Valembois */ #include <linux/device.h> #include <linux/input.h> #include <linux/hid.h> #include <linux/module.h> #include "hid-ids.h" static int evision_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { /* mapping only applies to USB_DEVICE_ID_EVISION_ICL01 */ if (hdev->product != USB_DEVICE_ID_EVISION_ICL01) return 0; if ((usage->hid & HID_USAGE_PAGE) != HID_UP_CONSUMER) return 0; /* Ignore key down event */ if ((usage->hid & HID_USAGE) >> 8 == 0x05) return -1; /* Ignore key up event */ if ((usage->hid & HID_USAGE) >> 8 == 0x06) return -1; switch (usage->hid & HID_USAGE) { /* Ignore configuration saved event */ case 0x0401: return -1; /* Ignore reset event */ case 0x0402: return -1; } return 0; } #define REP_DSC_SIZE 236 #define USAGE_MAX_INDEX 59 static const __u8 *evision_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { if (hdev->product == USB_DEVICE_ID_EV_TELINK_RECEIVER && *rsize == REP_DSC_SIZE && rdesc[USAGE_MAX_INDEX] == 0x29 && rdesc[USAGE_MAX_INDEX + 1] == 3) { hid_info(hdev, "fixing EVision:TeLink Receiver report descriptor\n"); rdesc[USAGE_MAX_INDEX + 1] = 5; // change usage max from 3 to 5 } return rdesc; } static const struct hid_device_id evision_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_EVISION, USB_DEVICE_ID_EVISION_ICL01) }, { HID_USB_DEVICE(USB_VENDOR_ID_EVISION, USB_DEVICE_ID_EV_TELINK_RECEIVER) }, { } }; MODULE_DEVICE_TABLE(hid, evision_devices); static struct hid_driver evision_driver = { .name = "evision", .id_table = evision_devices, .input_mapping = evision_input_mapping, .report_fixup = evision_report_fixup, }; module_hid_driver(evision_driver); MODULE_DESCRIPTION("HID driver for EVision devices"); MODULE_LICENSE("GPL");
41 41 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 // SPDX-License-Identifier: GPL-2.0-only /* * OF helpers for network devices. * * Initially copied out of arch/powerpc/kernel/prom_parse.c */ #include <linux/etherdevice.h> #include <linux/kernel.h> #include <linux/of_net.h> #include <linux/of_platform.h> #include <linux/platform_device.h> #include <linux/phy.h> #include <linux/export.h> #include <linux/device.h> #include <linux/nvmem-consumer.h> /** * of_get_phy_mode - Get phy mode for given device_node * @np: Pointer to the given device_node * @interface: Pointer to the result * * The function gets phy interface string from property 'phy-mode' or * 'phy-connection-type'. The index in phy_modes table is set in * interface and 0 returned. In case of error interface is set to * PHY_INTERFACE_MODE_NA and an errno is returned, e.g. -ENODEV. */ int of_get_phy_mode(struct device_node *np, phy_interface_t *interface) { const char *pm; int err, i; *interface = PHY_INTERFACE_MODE_NA; err = of_property_read_string(np, "phy-mode", &pm); if (err < 0) err = of_property_read_string(np, "phy-connection-type", &pm); if (err < 0) return err; for (i = 0; i < PHY_INTERFACE_MODE_MAX; i++) if (!strcasecmp(pm, phy_modes(i))) { *interface = i; return 0; } return -ENODEV; } EXPORT_SYMBOL_GPL(of_get_phy_mode); static int of_get_mac_addr(struct device_node *np, const char *name, u8 *addr) { struct property *pp = of_find_property(np, name, NULL); if (pp && pp->length == ETH_ALEN && is_valid_ether_addr(pp->value)) { memcpy(addr, pp->value, ETH_ALEN); return 0; } return -ENODEV; } int of_get_mac_address_nvmem(struct device_node *np, u8 *addr) { struct platform_device *pdev = of_find_device_by_node(np); struct nvmem_cell *cell; const void *mac; size_t len; int ret; /* Try lookup by device first, there might be a nvmem_cell_lookup * associated with a given device. */ if (pdev) { ret = nvmem_get_mac_address(&pdev->dev, addr); put_device(&pdev->dev); return ret; } cell = of_nvmem_cell_get(np, "mac-address"); if (IS_ERR(cell)) return PTR_ERR(cell); mac = nvmem_cell_read(cell, &len); nvmem_cell_put(cell); if (IS_ERR(mac)) return PTR_ERR(mac); if (len != ETH_ALEN || !is_valid_ether_addr(mac)) { kfree(mac); return -EINVAL; } memcpy(addr, mac, ETH_ALEN); kfree(mac); return 0; } EXPORT_SYMBOL(of_get_mac_address_nvmem); /** * of_get_mac_address() * @np: Caller's Device Node * @addr: Pointer to a six-byte array for the result * * Search the device tree for the best MAC address to use. 'mac-address' is * checked first, because that is supposed to contain to "most recent" MAC * address. If that isn't set, then 'local-mac-address' is checked next, * because that is the default address. If that isn't set, then the obsolete * 'address' is checked, just in case we're using an old device tree. If any * of the above isn't set, then try to get MAC address from nvmem cell named * 'mac-address'. * * Note that the 'address' property is supposed to contain a virtual address of * the register set, but some DTS files have redefined that property to be the * MAC address. * * All-zero MAC addresses are rejected, because those could be properties that * exist in the device tree, but were not set by U-Boot. For example, the * DTS could define 'mac-address' and 'local-mac-address', with zero MAC * addresses. Some older U-Boots only initialized 'local-mac-address'. In * this case, the real MAC is in 'local-mac-address', and 'mac-address' exists * but is all zeros. * * Return: 0 on success and errno in case of error. */ int of_get_mac_address(struct device_node *np, u8 *addr) { int ret; if (!np) return -ENODEV; ret = of_get_mac_addr(np, "mac-address", addr); if (!ret) return 0; ret = of_get_mac_addr(np, "local-mac-address", addr); if (!ret) return 0; ret = of_get_mac_addr(np, "address", addr); if (!ret) return 0; return of_get_mac_address_nvmem(np, addr); } EXPORT_SYMBOL(of_get_mac_address); /** * of_get_ethdev_address() * @np: Caller's Device Node * @dev: Pointer to netdevice which address will be updated * * Search the device tree for the best MAC address to use. * If found set @dev->dev_addr to that address. * * See documentation of of_get_mac_address() for more information on how * the best address is determined. * * Return: 0 on success and errno in case of error. */ int of_get_ethdev_address(struct device_node *np, struct net_device *dev) { u8 addr[ETH_ALEN]; int ret; ret = of_get_mac_address(np, addr); if (!ret) eth_hw_addr_set(dev, addr); return ret; } EXPORT_SYMBOL(of_get_ethdev_address);
130 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 // SPDX-License-Identifier: GPL-2.0-or-later /* * Virtio PCI driver - common functionality for all device versions * * This module allows virtio devices to be used over a virtual PCI device. * This can be used with QEMU based VMMs like KVM or Xen. * * Copyright IBM Corp. 2007 * Copyright Red Hat, Inc. 2014 * * Authors: * Anthony Liguori <aliguori@us.ibm.com> * Rusty Russell <rusty@rustcorp.com.au> * Michael S. Tsirkin <mst@redhat.com> */ #include "virtio_pci_common.h" static bool force_legacy = false; #if IS_ENABLED(CONFIG_VIRTIO_PCI_LEGACY) module_param(force_legacy, bool, 0444); MODULE_PARM_DESC(force_legacy, "Force legacy mode for transitional virtio 1 devices"); #endif bool vp_is_avq(struct virtio_device *vdev, unsigned int index) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) return false; return index == vp_dev->admin_vq.vq_index; } /* wait for pending irq handlers */ void vp_synchronize_vectors(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); int i; if (vp_dev->intx_enabled) synchronize_irq(vp_dev->pci_dev->irq); for (i = 0; i < vp_dev->msix_vectors; ++i) synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i)); } /* the notify function used when creating a virt queue */ bool vp_notify(struct virtqueue *vq) { /* we write the queue's selector into the notification register to * signal the other end */ iowrite16(vq->index, (void __iomem *)vq->priv); return true; } /* Notify all slow path virtqueues on an interrupt. */ static void vp_vring_slow_path_interrupt(int irq, struct virtio_pci_device *vp_dev) { struct virtio_pci_vq_info *info; unsigned long flags; spin_lock_irqsave(&vp_dev->lock, flags); list_for_each_entry(info, &vp_dev->slow_virtqueues, node) vring_interrupt(irq, info->vq); spin_unlock_irqrestore(&vp_dev->lock, flags); } /* Handle a configuration change: Tell driver if it wants to know. */ static irqreturn_t vp_config_changed(int irq, void *opaque) { struct virtio_pci_device *vp_dev = opaque; virtio_config_changed(&vp_dev->vdev); vp_vring_slow_path_interrupt(irq, vp_dev); return IRQ_HANDLED; } /* Notify all virtqueues on an interrupt. */ static irqreturn_t vp_vring_interrupt(int irq, void *opaque) { struct virtio_pci_device *vp_dev = opaque; struct virtio_pci_vq_info *info; irqreturn_t ret = IRQ_NONE; unsigned long flags; spin_lock_irqsave(&vp_dev->lock, flags); list_for_each_entry(info, &vp_dev->virtqueues, node) { if (vring_interrupt(irq, info->vq) == IRQ_HANDLED) ret = IRQ_HANDLED; } spin_unlock_irqrestore(&vp_dev->lock, flags); return ret; } /* A small wrapper to also acknowledge the interrupt when it's handled. * I really need an EIO hook for the vring so I can ack the interrupt once we * know that we'll be handling the IRQ but before we invoke the callback since * the callback may notify the host which results in the host attempting to * raise an interrupt that we would then mask once we acknowledged the * interrupt. */ static irqreturn_t vp_interrupt(int irq, void *opaque) { struct virtio_pci_device *vp_dev = opaque; u8 isr; /* reading the ISR has the effect of also clearing it so it's very * important to save off the value. */ isr = ioread8(vp_dev->isr); /* It's definitely not us if the ISR was not high */ if (!isr) return IRQ_NONE; /* Configuration change? Tell driver if it wants to know. */ if (isr & VIRTIO_PCI_ISR_CONFIG) vp_config_changed(irq, opaque); return vp_vring_interrupt(irq, opaque); } static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, bool per_vq_vectors, struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); const char *name = dev_name(&vp_dev->vdev.dev); unsigned int flags = PCI_IRQ_MSIX; unsigned int i, v; int err = -ENOMEM; vp_dev->msix_vectors = nvectors; vp_dev->msix_names = kmalloc_array(nvectors, sizeof(*vp_dev->msix_names), GFP_KERNEL); if (!vp_dev->msix_names) goto error; vp_dev->msix_affinity_masks = kcalloc(nvectors, sizeof(*vp_dev->msix_affinity_masks), GFP_KERNEL); if (!vp_dev->msix_affinity_masks) goto error; for (i = 0; i < nvectors; ++i) if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i], GFP_KERNEL)) goto error; if (!per_vq_vectors) desc = NULL; if (desc) { flags |= PCI_IRQ_AFFINITY; desc->pre_vectors++; /* virtio config vector */ } err = pci_alloc_irq_vectors_affinity(vp_dev->pci_dev, nvectors, nvectors, flags, desc); if (err < 0) goto error; vp_dev->msix_enabled = 1; /* Set the vector used for configuration */ v = vp_dev->msix_used_vectors; snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, "%s-config", name); err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), vp_config_changed, 0, vp_dev->msix_names[v], vp_dev); if (err) goto error; ++vp_dev->msix_used_vectors; v = vp_dev->config_vector(vp_dev, v); /* Verify we had enough resources to assign the vector */ if (v == VIRTIO_MSI_NO_VECTOR) { err = -EBUSY; goto error; } if (!per_vq_vectors) { /* Shared vector for all VQs */ v = vp_dev->msix_used_vectors; snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, "%s-virtqueues", name); err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), vp_vring_interrupt, 0, vp_dev->msix_names[v], vp_dev); if (err) goto error; ++vp_dev->msix_used_vectors; } return 0; error: return err; } static bool vp_is_slow_path_vector(u16 msix_vec) { return msix_vec == VP_MSIX_CONFIG_VECTOR; } static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int index, void (*callback)(struct virtqueue *vq), const char *name, bool ctx, u16 msix_vec, struct virtio_pci_vq_info **p_info) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL); struct virtqueue *vq; unsigned long flags; /* fill out our structure that represents an active queue */ if (!info) return ERR_PTR(-ENOMEM); vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, ctx, msix_vec); if (IS_ERR(vq)) goto out_info; info->vq = vq; if (callback) { spin_lock_irqsave(&vp_dev->lock, flags); if (!vp_is_slow_path_vector(msix_vec)) list_add(&info->node, &vp_dev->virtqueues); else list_add(&info->node, &vp_dev->slow_virtqueues); spin_unlock_irqrestore(&vp_dev->lock, flags); } else { INIT_LIST_HEAD(&info->node); } *p_info = info; return vq; out_info: kfree(info); return vq; } static void vp_del_vq(struct virtqueue *vq, struct virtio_pci_vq_info *info) { struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); unsigned long flags; /* * If it fails during re-enable reset vq. This way we won't rejoin * info->node to the queue. Prevent unexpected irqs. */ if (!vq->reset) { spin_lock_irqsave(&vp_dev->lock, flags); list_del(&info->node); spin_unlock_irqrestore(&vp_dev->lock, flags); } vp_dev->del_vq(info); kfree(info); } /* the config->del_vqs() implementation */ void vp_del_vqs(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info; struct virtqueue *vq, *n; int i; list_for_each_entry_safe(vq, n, &vdev->vqs, list) { info = vp_is_avq(vdev, vq->index) ? vp_dev->admin_vq.info : vp_dev->vqs[vq->index]; if (vp_dev->per_vq_vectors) { int v = info->msix_vector; if (v != VIRTIO_MSI_NO_VECTOR && !vp_is_slow_path_vector(v)) { int irq = pci_irq_vector(vp_dev->pci_dev, v); irq_update_affinity_hint(irq, NULL); free_irq(irq, vq); } } vp_del_vq(vq, info); } vp_dev->per_vq_vectors = false; if (vp_dev->intx_enabled) { free_irq(vp_dev->pci_dev->irq, vp_dev); vp_dev->intx_enabled = 0; } for (i = 0; i < vp_dev->msix_used_vectors; ++i) free_irq(pci_irq_vector(vp_dev->pci_dev, i), vp_dev); if (vp_dev->msix_affinity_masks) { for (i = 0; i < vp_dev->msix_vectors; i++) free_cpumask_var(vp_dev->msix_affinity_masks[i]); } if (vp_dev->msix_enabled) { /* Disable the vector used for configuration */ vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR); pci_free_irq_vectors(vp_dev->pci_dev); vp_dev->msix_enabled = 0; } vp_dev->msix_vectors = 0; vp_dev->msix_used_vectors = 0; kfree(vp_dev->msix_names); vp_dev->msix_names = NULL; kfree(vp_dev->msix_affinity_masks); vp_dev->msix_affinity_masks = NULL; kfree(vp_dev->vqs); vp_dev->vqs = NULL; } enum vp_vq_vector_policy { VP_VQ_VECTOR_POLICY_EACH, VP_VQ_VECTOR_POLICY_SHARED_SLOW, VP_VQ_VECTOR_POLICY_SHARED, }; static struct virtqueue * vp_find_one_vq_msix(struct virtio_device *vdev, int queue_idx, vq_callback_t *callback, const char *name, bool ctx, bool slow_path, int *allocated_vectors, enum vp_vq_vector_policy vector_policy, struct virtio_pci_vq_info **p_info) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtqueue *vq; u16 msix_vec; int err; if (!callback) msix_vec = VIRTIO_MSI_NO_VECTOR; else if (vector_policy == VP_VQ_VECTOR_POLICY_EACH || (vector_policy == VP_VQ_VECTOR_POLICY_SHARED_SLOW && !slow_path)) msix_vec = (*allocated_vectors)++; else if (vector_policy != VP_VQ_VECTOR_POLICY_EACH && slow_path) msix_vec = VP_MSIX_CONFIG_VECTOR; else msix_vec = VP_MSIX_VQ_VECTOR; vq = vp_setup_vq(vdev, queue_idx, callback, name, ctx, msix_vec, p_info); if (IS_ERR(vq)) return vq; if (vector_policy == VP_VQ_VECTOR_POLICY_SHARED || msix_vec == VIRTIO_MSI_NO_VECTOR || vp_is_slow_path_vector(msix_vec)) return vq; /* allocate per-vq irq if available and necessary */ snprintf(vp_dev->msix_names[msix_vec], sizeof(*vp_dev->msix_names), "%s-%s", dev_name(&vp_dev->vdev.dev), name); err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), vring_interrupt, 0, vp_dev->msix_names[msix_vec], vq); if (err) { vp_del_vq(vq, *p_info); return ERR_PTR(err); } return vq; } static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], struct virtqueue_info vqs_info[], enum vp_vq_vector_policy vector_policy, struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_admin_vq *avq = &vp_dev->admin_vq; struct virtqueue_info *vqi; int i, err, nvectors, allocated_vectors, queue_idx = 0; struct virtqueue *vq; bool per_vq_vectors; u16 avq_num = 0; vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); if (!vp_dev->vqs) return -ENOMEM; if (vp_dev->avq_index) { err = vp_dev->avq_index(vdev, &avq->vq_index, &avq_num); if (err) goto error_find; } per_vq_vectors = vector_policy != VP_VQ_VECTOR_POLICY_SHARED; if (per_vq_vectors) { /* Best option: one for change interrupt, one per vq. */ nvectors = 1; for (i = 0; i < nvqs; ++i) { vqi = &vqs_info[i]; if (vqi->name && vqi->callback) ++nvectors; } if (avq_num && vector_policy == VP_VQ_VECTOR_POLICY_EACH) ++nvectors; } else { /* Second best: one for change, shared for all vqs. */ nvectors = 2; } err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors, desc); if (err) goto error_find; vp_dev->per_vq_vectors = per_vq_vectors; allocated_vectors = vp_dev->msix_used_vectors; for (i = 0; i < nvqs; ++i) { vqi = &vqs_info[i]; if (!vqi->name) { vqs[i] = NULL; continue; } vqs[i] = vp_find_one_vq_msix(vdev, queue_idx++, vqi->callback, vqi->name, vqi->ctx, false, &allocated_vectors, vector_policy, &vp_dev->vqs[i]); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto error_find; } } if (!avq_num) return 0; sprintf(avq->name, "avq.%u", avq->vq_index); vq = vp_find_one_vq_msix(vdev, avq->vq_index, vp_modern_avq_done, avq->name, false, true, &allocated_vectors, vector_policy, &vp_dev->admin_vq.info); if (IS_ERR(vq)) { err = PTR_ERR(vq); goto error_find; } return 0; error_find: vp_del_vqs(vdev); return err; } static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], struct virtqueue_info vqs_info[]) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_admin_vq *avq = &vp_dev->admin_vq; int i, err, queue_idx = 0; struct virtqueue *vq; u16 avq_num = 0; vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); if (!vp_dev->vqs) return -ENOMEM; if (vp_dev->avq_index) { err = vp_dev->avq_index(vdev, &avq->vq_index, &avq_num); if (err) goto out_del_vqs; } err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, dev_name(&vdev->dev), vp_dev); if (err) goto out_del_vqs; vp_dev->intx_enabled = 1; vp_dev->per_vq_vectors = false; for (i = 0; i < nvqs; ++i) { struct virtqueue_info *vqi = &vqs_info[i]; if (!vqi->name) { vqs[i] = NULL; continue; } vqs[i] = vp_setup_vq(vdev, queue_idx++, vqi->callback, vqi->name, vqi->ctx, VIRTIO_MSI_NO_VECTOR, &vp_dev->vqs[i]); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto out_del_vqs; } } if (!avq_num) return 0; sprintf(avq->name, "avq.%u", avq->vq_index); vq = vp_setup_vq(vdev, queue_idx++, vp_modern_avq_done, avq->name, false, VIRTIO_MSI_NO_VECTOR, &vp_dev->admin_vq.info); if (IS_ERR(vq)) { err = PTR_ERR(vq); goto out_del_vqs; } return 0; out_del_vqs: vp_del_vqs(vdev); return err; } /* the config->find_vqs() implementation */ int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], struct virtqueue_info vqs_info[], struct irq_affinity *desc) { int err; /* Try MSI-X with one vector per queue. */ err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, VP_VQ_VECTOR_POLICY_EACH, desc); if (!err) return 0; /* Fallback: MSI-X with one shared vector for config and * slow path queues, one vector per queue for the rest. */ err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, VP_VQ_VECTOR_POLICY_SHARED_SLOW, desc); if (!err) return 0; /* Fallback: MSI-X with one vector for config, one shared for queues. */ err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, VP_VQ_VECTOR_POLICY_SHARED, desc); if (!err) return 0; /* Is there an interrupt? If not give up. */ if (!(to_vp_device(vdev)->pci_dev->irq)) return err; /* Finally fall back to regular interrupts. */ return vp_find_vqs_intx(vdev, nvqs, vqs, vqs_info); } const char *vp_bus_name(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); return pci_name(vp_dev->pci_dev); } /* Setup the affinity for a virtqueue: * - force the affinity for per vq vector * - OR over all affinities for shared MSI * - ignore the affinity request if we're using INTX */ int vp_set_vq_affinity(struct virtqueue *vq, const struct cpumask *cpu_mask) { struct virtio_device *vdev = vq->vdev; struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index]; struct cpumask *mask; unsigned int irq; if (!vq->callback) return -EINVAL; if (vp_dev->msix_enabled) { mask = vp_dev->msix_affinity_masks[info->msix_vector]; irq = pci_irq_vector(vp_dev->pci_dev, info->msix_vector); if (!cpu_mask) irq_update_affinity_hint(irq, NULL); else { cpumask_copy(mask, cpu_mask); irq_set_affinity_and_hint(irq, mask); } } return 0; } const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); if (!vp_dev->per_vq_vectors || vp_dev->vqs[index]->msix_vector == VIRTIO_MSI_NO_VECTOR || vp_is_slow_path_vector(vp_dev->vqs[index]->msix_vector)) return NULL; return pci_irq_get_affinity(vp_dev->pci_dev, vp_dev->vqs[index]->msix_vector); } #ifdef CONFIG_PM_SLEEP static int virtio_pci_freeze(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); int ret; ret = virtio_device_freeze(&vp_dev->vdev); if (!ret) pci_disable_device(pci_dev); return ret; } static int virtio_pci_restore(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); int ret; ret = pci_enable_device(pci_dev); if (ret) return ret; pci_set_master(pci_dev); return virtio_device_restore(&vp_dev->vdev); } static bool vp_supports_pm_no_reset(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); u16 pmcsr; if (!pci_dev->pm_cap) return false; pci_read_config_word(pci_dev, pci_dev->pm_cap + PCI_PM_CTRL, &pmcsr); if (PCI_POSSIBLE_ERROR(pmcsr)) { dev_err(dev, "Unable to query pmcsr"); return false; } return pmcsr & PCI_PM_CTRL_NO_SOFT_RESET; } static int virtio_pci_suspend(struct device *dev) { return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_freeze(dev); } static int virtio_pci_resume(struct device *dev) { return vp_supports_pm_no_reset(dev) ? 0 : virtio_pci_restore(dev); } static const struct dev_pm_ops virtio_pci_pm_ops = { .suspend = virtio_pci_suspend, .resume = virtio_pci_resume, .freeze = virtio_pci_freeze, .thaw = virtio_pci_restore, .poweroff = virtio_pci_freeze, .restore = virtio_pci_restore, }; #endif /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */ static const struct pci_device_id virtio_pci_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_REDHAT_QUMRANET, PCI_ANY_ID) }, { 0 } }; MODULE_DEVICE_TABLE(pci, virtio_pci_id_table); static void virtio_pci_release_dev(struct device *_d) { struct virtio_device *vdev = dev_to_virtio(_d); struct virtio_pci_device *vp_dev = to_vp_device(vdev); /* As struct device is a kobject, it's not safe to * free the memory (including the reference counter itself) * until it's release callback. */ kfree(vp_dev); } static int virtio_pci_probe(struct pci_dev *pci_dev, const struct pci_device_id *id) { struct virtio_pci_device *vp_dev, *reg_dev = NULL; int rc; /* allocate our structure and fill it out */ vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL); if (!vp_dev) return -ENOMEM; pci_set_drvdata(pci_dev, vp_dev); vp_dev->vdev.dev.parent = &pci_dev->dev; vp_dev->vdev.dev.release = virtio_pci_release_dev; vp_dev->pci_dev = pci_dev; INIT_LIST_HEAD(&vp_dev->virtqueues); INIT_LIST_HEAD(&vp_dev->slow_virtqueues); spin_lock_init(&vp_dev->lock); /* enable the device */ rc = pci_enable_device(pci_dev); if (rc) goto err_enable_device; if (force_legacy) { rc = virtio_pci_legacy_probe(vp_dev); /* Also try modern mode if we can't map BAR0 (no IO space). */ if (rc == -ENODEV || rc == -ENOMEM) rc = virtio_pci_modern_probe(vp_dev); if (rc) goto err_probe; } else { rc = virtio_pci_modern_probe(vp_dev); if (rc == -ENODEV) rc = virtio_pci_legacy_probe(vp_dev); if (rc) goto err_probe; } pci_set_master(pci_dev); rc = register_virtio_device(&vp_dev->vdev); reg_dev = vp_dev; if (rc) goto err_register; return 0; err_register: if (vp_dev->is_legacy) virtio_pci_legacy_remove(vp_dev); else virtio_pci_modern_remove(vp_dev); err_probe: pci_disable_device(pci_dev); err_enable_device: if (reg_dev) put_device(&vp_dev->vdev.dev); else kfree(vp_dev); return rc; } static void virtio_pci_remove(struct pci_dev *pci_dev) { struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); struct device *dev = get_device(&vp_dev->vdev.dev); /* * Device is marked broken on surprise removal so that virtio upper * layers can abort any ongoing operation. */ if (!pci_device_is_present(pci_dev)) virtio_break_device(&vp_dev->vdev); pci_disable_sriov(pci_dev); unregister_virtio_device(&vp_dev->vdev); if (vp_dev->is_legacy) virtio_pci_legacy_remove(vp_dev); else virtio_pci_modern_remove(vp_dev); pci_disable_device(pci_dev); put_device(dev); } static int virtio_pci_sriov_configure(struct pci_dev *pci_dev, int num_vfs) { struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); struct virtio_device *vdev = &vp_dev->vdev; int ret; if (!(vdev->config->get_status(vdev) & VIRTIO_CONFIG_S_DRIVER_OK)) return -EBUSY; if (!__virtio_test_bit(vdev, VIRTIO_F_SR_IOV)) return -EINVAL; if (pci_vfs_assigned(pci_dev)) return -EPERM; if (num_vfs == 0) { pci_disable_sriov(pci_dev); return 0; } ret = pci_enable_sriov(pci_dev, num_vfs); if (ret < 0) return ret; return num_vfs; } static void virtio_pci_reset_prepare(struct pci_dev *pci_dev) { struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); int ret = 0; ret = virtio_device_reset_prepare(&vp_dev->vdev); if (ret) { if (ret != -EOPNOTSUPP) dev_warn(&pci_dev->dev, "Reset prepare failure: %d", ret); return; } if (pci_is_enabled(pci_dev)) pci_disable_device(pci_dev); } static void virtio_pci_reset_done(struct pci_dev *pci_dev) { struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); int ret; if (pci_is_enabled(pci_dev)) return; ret = pci_enable_device(pci_dev); if (!ret) { pci_set_master(pci_dev); ret = virtio_device_reset_done(&vp_dev->vdev); } if (ret && ret != -EOPNOTSUPP) dev_warn(&pci_dev->dev, "Reset done failure: %d", ret); } static const struct pci_error_handlers virtio_pci_err_handler = { .reset_prepare = virtio_pci_reset_prepare, .reset_done = virtio_pci_reset_done, }; static struct pci_driver virtio_pci_driver = { .name = "virtio-pci", .id_table = virtio_pci_id_table, .probe = virtio_pci_probe, .remove = virtio_pci_remove, #ifdef CONFIG_PM_SLEEP .driver.pm = &virtio_pci_pm_ops, #endif .sriov_configure = virtio_pci_sriov_configure, .err_handler = &virtio_pci_err_handler, }; struct virtio_device *virtio_pci_vf_get_pf_dev(struct pci_dev *pdev) { struct virtio_pci_device *pf_vp_dev; pf_vp_dev = pci_iov_get_pf_drvdata(pdev, &virtio_pci_driver); if (IS_ERR(pf_vp_dev)) return NULL; return &pf_vp_dev->vdev; } module_pci_driver(virtio_pci_driver); MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>"); MODULE_DESCRIPTION("virtio-pci"); MODULE_LICENSE("GPL"); MODULE_VERSION("1");
1 2 1 7 7 7 7 1 3 1 2 2 2 2 1 2 2 2 1 1 4 4 4 4 2 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */ /* Kernel module implementing an IP set type: the hash:ip,port type */ #include <linux/jhash.h> #include <linux/module.h> #include <linux/ip.h> #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/random.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/netlink.h> #include <net/tcp.h> #include <linux/netfilter.h> #include <linux/netfilter/ipset/pfxlen.h> #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_getport.h> #include <linux/netfilter/ipset/ip_set_hash.h> #define IPSET_TYPE_REV_MIN 0 /* 1 SCTP and UDPLITE support added */ /* 2 Counters support added */ /* 3 Comments support added */ /* 4 Forceadd support added */ /* 5 skbinfo support added */ /* 6 bucketsize, initval support added */ #define IPSET_TYPE_REV_MAX 7 /* bitmask support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); IP_SET_MODULE_DESC("hash:ip,port", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,port"); /* Type specific function prefix */ #define HTYPE hash_ipport #define IP_SET_HASH_WITH_NETMASK #define IP_SET_HASH_WITH_BITMASK /* IPv4 variant */ /* Member elements */ struct hash_ipport4_elem { __be32 ip; __be16 port; u8 proto; u8 padding; }; /* Common functions */ static bool hash_ipport4_data_equal(const struct hash_ipport4_elem *ip1, const struct hash_ipport4_elem *ip2, u32 *multi) { return ip1->ip == ip2->ip && ip1->port == ip2->port && ip1->proto == ip2->proto; } static bool hash_ipport4_data_list(struct sk_buff *skb, const struct hash_ipport4_elem *data) { if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto)) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_ipport4_data_next(struct hash_ipport4_elem *next, const struct hash_ipport4_elem *d) { next->ip = d->ip; next->port = d->port; } #define MTYPE hash_ipport4 #define HOST_MASK 32 #include "ip_set_hash_gen.h" static int hash_ipport4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem e = { .ip = 0 }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); const struct MTYPE *h = set->data; if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) return -EINVAL; ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); e.ip &= h->bitmask.ip; if (e.ip == 0) return -EINVAL; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { struct hash_ipport4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport4_elem e = { .ip = 0 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip, ip_to = 0, p = 0, port, port_to, i = 0; bool with_ports = false; int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO))) return -IPSET_ERR_PROTOCOL; ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; e.ip &= h->bitmask.ip; if (e.ip == 0) return -EINVAL; e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); if (tb[IPSET_ATTR_PROTO]) { e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); with_ports = ip_set_proto_with_ports(e.proto); if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else { return -IPSET_ERR_MISSING_PROTO; } if (!(with_ports || e.proto == IPPROTO_ICMP)) e.port = 0; if (adt == IPSET_TEST || !(tb[IPSET_ATTR_IP_TO] || tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_PORT_TO])) { ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } ip_to = ip = ntohl(e.ip); if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) return ret; if (ip > ip_to) swap(ip, ip_to); } else if (tb[IPSET_ATTR_CIDR]) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (!cidr || cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } port_to = port = ntohs(e.port); if (with_ports && tb[IPSET_ATTR_PORT_TO]) { port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (port > port_to) swap(port, port_to); } if (retried) ip = ntohl(h->next.ip); for (; ip <= ip_to; ip++) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; for (; p <= port_to; p++, i++) { e.ip = htonl(ip); e.port = htons(p); if (i > IPSET_MAX_RANGE) { hash_ipport4_data_next(&h->next, &e); return -ERANGE; } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; } } return ret; } /* IPv6 variant */ struct hash_ipport6_elem { union nf_inet_addr ip; __be16 port; u8 proto; u8 padding; }; /* Common functions */ static bool hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1, const struct hash_ipport6_elem *ip2, u32 *multi) { return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && ip1->port == ip2->port && ip1->proto == ip2->proto; } static bool hash_ipport6_data_list(struct sk_buff *skb, const struct hash_ipport6_elem *data) { if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) || nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto)) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_ipport6_data_next(struct hash_ipport6_elem *next, const struct hash_ipport6_elem *d) { next->port = d->port; } #undef MTYPE #undef HOST_MASK #define MTYPE hash_ipport6 #define HOST_MASK 128 #define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" static int hash_ipport6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport6_elem e = { .ip = { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); const struct MTYPE *h = set->data; if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) return -EINVAL; ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); nf_inet_addr_mask_inplace(&e.ip, &h->bitmask); if (ipv6_addr_any(&e.ip.in6)) return -EINVAL; return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct hash_ipport6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipport6_elem e = { .ip = { .all = { 0 } } }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO))) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; if (unlikely(tb[IPSET_ATTR_CIDR])) { u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (cidr != HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; nf_inet_addr_mask_inplace(&e.ip, &h->bitmask); if (ipv6_addr_any(&e.ip.in6)) return -EINVAL; e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); if (tb[IPSET_ATTR_PROTO]) { e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); with_ports = ip_set_proto_with_ports(e.proto); if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else { return -IPSET_ERR_MISSING_PROTO; } if (!(with_ports || e.proto == IPPROTO_ICMPV6)) e.port = 0; if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_eexist(ret, flags) ? 0 : ret; } port = ntohs(e.port); port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (port > port_to) swap(port, port_to); if (retried) port = ntohs(h->next.port); for (; port <= port_to; port++) { e.port = htons(port); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; } return ret; } static struct ip_set_type hash_ipport_type __read_mostly = { .name = "hash:ip,port", .protocol = IPSET_PROTOCOL, .features = IPSET_TYPE_IP | IPSET_TYPE_PORT, .dimension = IPSET_DIM_TWO, .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_ipport_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_NETMASK] = { .type = NLA_U8 }, [IPSET_ATTR_BITMASK] = { .type = NLA_NESTED }, }, .adt_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_PORT] = { .type = NLA_U16 }, [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; static int __init hash_ipport_init(void) { return ip_set_type_register(&hash_ipport_type); } static void __exit hash_ipport_fini(void) { rcu_barrier(); ip_set_type_unregister(&hash_ipport_type); } module_init(hash_ipport_init); module_exit(hash_ipport_fini);
3 3 3 3 5 5 4 1 5 5 5 5 5 5 4 1 6 2 12 12 8 4 8 3 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2012 Bjørn Mork <bjorn@mork.no> * * The probing code is heavily inspired by cdc_ether, which is: * Copyright (C) 2003-2005 by David Brownell * Copyright (C) 2006 by Ole Andre Vadla Ravnas (ActiveSync) */ #include <linux/module.h> #include <linux/sched/signal.h> #include <linux/netdevice.h> #include <linux/ethtool.h> #include <linux/etherdevice.h> #include <linux/if_arp.h> #include <linux/kstrtox.h> #include <linux/mii.h> #include <linux/rtnetlink.h> #include <linux/usb.h> #include <linux/usb/cdc.h> #include <linux/usb/usbnet.h> #include <linux/usb/cdc-wdm.h> #include <linux/u64_stats_sync.h> /* This driver supports wwan (3G/LTE/?) devices using a vendor * specific management protocol called Qualcomm MSM Interface (QMI) - * in addition to the more common AT commands over serial interface * management * * QMI is wrapped in CDC, using CDC encapsulated commands on the * control ("master") interface of a two-interface CDC Union * resembling standard CDC ECM. The devices do not use the control * interface for any other CDC messages. Most likely because the * management protocol is used in place of the standard CDC * notifications NOTIFY_NETWORK_CONNECTION and NOTIFY_SPEED_CHANGE * * Alternatively, control and data functions can be combined in a * single USB interface. * * Handling a protocol like QMI is out of the scope for any driver. * It is exported as a character device using the cdc-wdm driver as * a subdriver, enabling userspace applications ("modem managers") to * handle it. * * These devices may alternatively/additionally be configured using AT * commands on a serial interface */ /* driver specific data */ struct qmi_wwan_state { struct usb_driver *subdriver; atomic_t pmcount; unsigned long flags; struct usb_interface *control; struct usb_interface *data; }; enum qmi_wwan_flags { QMI_WWAN_FLAG_RAWIP = 1 << 0, QMI_WWAN_FLAG_MUX = 1 << 1, QMI_WWAN_FLAG_PASS_THROUGH = 1 << 2, }; enum qmi_wwan_quirks { QMI_WWAN_QUIRK_DTR = 1 << 0, /* needs "set DTR" request */ }; struct qmimux_hdr { u8 pad; u8 mux_id; __be16 pkt_len; }; struct qmimux_priv { struct net_device *real_dev; u8 mux_id; }; static int qmimux_open(struct net_device *dev) { struct qmimux_priv *priv = netdev_priv(dev); struct net_device *real_dev = priv->real_dev; if (!(priv->real_dev->flags & IFF_UP)) return -ENETDOWN; if (netif_carrier_ok(real_dev)) netif_carrier_on(dev); return 0; } static int qmimux_stop(struct net_device *dev) { netif_carrier_off(dev); return 0; } static netdev_tx_t qmimux_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct qmimux_priv *priv = netdev_priv(dev); unsigned int len = skb->len; struct qmimux_hdr *hdr; netdev_tx_t ret; hdr = skb_push(skb, sizeof(struct qmimux_hdr)); hdr->pad = 0; hdr->mux_id = priv->mux_id; hdr->pkt_len = cpu_to_be16(len); skb->dev = priv->real_dev; ret = dev_queue_xmit(skb); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) dev_sw_netstats_tx_add(dev, 1, len); else dev->stats.tx_dropped++; return ret; } static const struct net_device_ops qmimux_netdev_ops = { .ndo_open = qmimux_open, .ndo_stop = qmimux_stop, .ndo_start_xmit = qmimux_start_xmit, }; static void qmimux_setup(struct net_device *dev) { dev->header_ops = NULL; /* No header */ dev->type = ARPHRD_NONE; dev->hard_header_len = 0; dev->addr_len = 0; dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; dev->netdev_ops = &qmimux_netdev_ops; dev->mtu = 1500; dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; dev->needs_free_netdev = true; } static struct net_device *qmimux_find_dev(struct usbnet *dev, u8 mux_id) { struct qmimux_priv *priv; struct list_head *iter; struct net_device *ldev; rcu_read_lock(); netdev_for_each_upper_dev_rcu(dev->net, ldev, iter) { priv = netdev_priv(ldev); if (priv->mux_id == mux_id) { rcu_read_unlock(); return ldev; } } rcu_read_unlock(); return NULL; } static bool qmimux_has_slaves(struct usbnet *dev) { return !list_empty(&dev->net->adj_list.upper); } static int qmimux_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { unsigned int len, offset = 0, pad_len, pkt_len; struct qmimux_hdr *hdr; struct net_device *net; struct sk_buff *skbn; u8 qmimux_hdr_sz = sizeof(*hdr); while (offset + qmimux_hdr_sz < skb->len) { hdr = (struct qmimux_hdr *)(skb->data + offset); len = be16_to_cpu(hdr->pkt_len); /* drop the packet, bogus length */ if (offset + len + qmimux_hdr_sz > skb->len) return 0; /* control packet, we do not know what to do */ if (hdr->pad & 0x80) goto skip; /* extract padding length and check for valid length info */ pad_len = hdr->pad & 0x3f; if (len == 0 || pad_len >= len) goto skip; pkt_len = len - pad_len; net = qmimux_find_dev(dev, hdr->mux_id); if (!net) goto skip; skbn = netdev_alloc_skb(net, pkt_len + LL_MAX_HEADER); if (!skbn) return 0; /* Raw IP packets don't have a MAC header, but other subsystems * (like xfrm) may still access MAC header offsets, so they must * be initialized. */ skb_reset_mac_header(skbn); switch (skb->data[offset + qmimux_hdr_sz] & 0xf0) { case 0x40: skbn->protocol = htons(ETH_P_IP); break; case 0x60: skbn->protocol = htons(ETH_P_IPV6); break; default: /* not ip - do not know what to do */ kfree_skb(skbn); goto skip; } skb_reserve(skbn, LL_MAX_HEADER); skb_put_data(skbn, skb->data + offset + qmimux_hdr_sz, pkt_len); if (netif_rx(skbn) != NET_RX_SUCCESS) { net->stats.rx_errors++; return 0; } else { dev_sw_netstats_rx_add(net, pkt_len); } skip: offset += len + qmimux_hdr_sz; } return 1; } static ssize_t mux_id_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_device *dev = to_net_dev(d); struct qmimux_priv *priv; priv = netdev_priv(dev); return sysfs_emit(buf, "0x%02x\n", priv->mux_id); } static DEVICE_ATTR_RO(mux_id); static struct attribute *qmi_wwan_sysfs_qmimux_attrs[] = { &dev_attr_mux_id.attr, NULL, }; static struct attribute_group qmi_wwan_sysfs_qmimux_attr_group = { .name = "qmap", .attrs = qmi_wwan_sysfs_qmimux_attrs, }; static int qmimux_register_device(struct net_device *real_dev, u8 mux_id) { struct net_device *new_dev; struct qmimux_priv *priv; int err; new_dev = alloc_netdev(sizeof(struct qmimux_priv), "qmimux%d", NET_NAME_UNKNOWN, qmimux_setup); if (!new_dev) return -ENOBUFS; dev_net_set(new_dev, dev_net(real_dev)); priv = netdev_priv(new_dev); priv->mux_id = mux_id; priv->real_dev = real_dev; new_dev->sysfs_groups[0] = &qmi_wwan_sysfs_qmimux_attr_group; err = register_netdevice(new_dev); if (err < 0) goto out_free_newdev; /* Account for reference in struct qmimux_priv_priv */ dev_hold(real_dev); err = netdev_upper_dev_link(real_dev, new_dev, NULL); if (err) goto out_unregister_netdev; netif_stacked_transfer_operstate(real_dev, new_dev); return 0; out_unregister_netdev: unregister_netdevice(new_dev); dev_put(real_dev); out_free_newdev: free_netdev(new_dev); return err; } static void qmimux_unregister_device(struct net_device *dev, struct list_head *head) { struct qmimux_priv *priv = netdev_priv(dev); struct net_device *real_dev = priv->real_dev; netdev_upper_dev_unlink(real_dev, dev); unregister_netdevice_queue(dev, head); /* Get rid of the reference to real_dev */ dev_put(real_dev); } static void qmi_wwan_netdev_setup(struct net_device *net) { struct usbnet *dev = netdev_priv(net); struct qmi_wwan_state *info = (void *)&dev->data; if (info->flags & QMI_WWAN_FLAG_RAWIP) { net->header_ops = NULL; /* No header */ net->type = ARPHRD_NONE; net->hard_header_len = 0; net->addr_len = 0; net->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; set_bit(EVENT_NO_IP_ALIGN, &dev->flags); netdev_dbg(net, "mode: raw IP\n"); } else if (!net->header_ops) { /* don't bother if already set */ ether_setup(net); /* Restoring min/max mtu values set originally by usbnet */ net->min_mtu = 0; net->max_mtu = ETH_MAX_MTU; clear_bit(EVENT_NO_IP_ALIGN, &dev->flags); netdev_dbg(net, "mode: Ethernet\n"); } /* recalculate buffers after changing hard_header_len */ usbnet_change_mtu(net, net->mtu); } static ssize_t raw_ip_show(struct device *d, struct device_attribute *attr, char *buf) { struct usbnet *dev = netdev_priv(to_net_dev(d)); struct qmi_wwan_state *info = (void *)&dev->data; return sprintf(buf, "%c\n", info->flags & QMI_WWAN_FLAG_RAWIP ? 'Y' : 'N'); } static ssize_t raw_ip_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { struct usbnet *dev = netdev_priv(to_net_dev(d)); struct qmi_wwan_state *info = (void *)&dev->data; bool enable; int ret; if (kstrtobool(buf, &enable)) return -EINVAL; /* no change? */ if (enable == (info->flags & QMI_WWAN_FLAG_RAWIP)) return len; /* ip mode cannot be cleared when pass through mode is set */ if (!enable && (info->flags & QMI_WWAN_FLAG_PASS_THROUGH)) { netdev_err(dev->net, "Cannot clear ip mode on pass through device\n"); return -EINVAL; } if (!rtnl_trylock()) return restart_syscall(); /* we don't want to modify a running netdev */ if (netif_running(dev->net)) { netdev_err(dev->net, "Cannot change a running device\n"); ret = -EBUSY; goto err; } /* let other drivers deny the change */ ret = call_netdevice_notifiers(NETDEV_PRE_TYPE_CHANGE, dev->net); ret = notifier_to_errno(ret); if (ret) { netdev_err(dev->net, "Type change was refused\n"); goto err; } if (enable) info->flags |= QMI_WWAN_FLAG_RAWIP; else info->flags &= ~QMI_WWAN_FLAG_RAWIP; qmi_wwan_netdev_setup(dev->net); call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, dev->net); ret = len; err: rtnl_unlock(); return ret; } static ssize_t add_mux_show(struct device *d, struct device_attribute *attr, char *buf) { struct net_device *dev = to_net_dev(d); struct qmimux_priv *priv; struct list_head *iter; struct net_device *ldev; ssize_t count = 0; rcu_read_lock(); netdev_for_each_upper_dev_rcu(dev, ldev, iter) { priv = netdev_priv(ldev); count += scnprintf(&buf[count], PAGE_SIZE - count, "0x%02x\n", priv->mux_id); } rcu_read_unlock(); return count; } static ssize_t add_mux_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { struct usbnet *dev = netdev_priv(to_net_dev(d)); struct qmi_wwan_state *info = (void *)&dev->data; u8 mux_id; int ret; if (kstrtou8(buf, 0, &mux_id)) return -EINVAL; /* mux_id [1 - 254] for compatibility with ip(8) and the rmnet driver */ if (mux_id < 1 || mux_id > 254) return -EINVAL; if (!rtnl_trylock()) return restart_syscall(); if (qmimux_find_dev(dev, mux_id)) { netdev_err(dev->net, "mux_id already present\n"); ret = -EINVAL; goto err; } ret = qmimux_register_device(dev->net, mux_id); if (!ret) { info->flags |= QMI_WWAN_FLAG_MUX; ret = len; } err: rtnl_unlock(); return ret; } static ssize_t del_mux_show(struct device *d, struct device_attribute *attr, char *buf) { return add_mux_show(d, attr, buf); } static ssize_t del_mux_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { struct usbnet *dev = netdev_priv(to_net_dev(d)); struct qmi_wwan_state *info = (void *)&dev->data; struct net_device *del_dev; u8 mux_id; int ret = 0; if (kstrtou8(buf, 0, &mux_id)) return -EINVAL; if (!rtnl_trylock()) return restart_syscall(); del_dev = qmimux_find_dev(dev, mux_id); if (!del_dev) { netdev_err(dev->net, "mux_id not present\n"); ret = -EINVAL; goto err; } qmimux_unregister_device(del_dev, NULL); if (!qmimux_has_slaves(dev)) info->flags &= ~QMI_WWAN_FLAG_MUX; ret = len; err: rtnl_unlock(); return ret; } static ssize_t pass_through_show(struct device *d, struct device_attribute *attr, char *buf) { struct usbnet *dev = netdev_priv(to_net_dev(d)); struct qmi_wwan_state *info; info = (void *)&dev->data; return sprintf(buf, "%c\n", info->flags & QMI_WWAN_FLAG_PASS_THROUGH ? 'Y' : 'N'); } static ssize_t pass_through_store(struct device *d, struct device_attribute *attr, const char *buf, size_t len) { struct usbnet *dev = netdev_priv(to_net_dev(d)); struct qmi_wwan_state *info; bool enable; if (kstrtobool(buf, &enable)) return -EINVAL; info = (void *)&dev->data; /* no change? */ if (enable == (info->flags & QMI_WWAN_FLAG_PASS_THROUGH)) return len; /* pass through mode can be set for raw ip devices only */ if (!(info->flags & QMI_WWAN_FLAG_RAWIP)) { netdev_err(dev->net, "Cannot set pass through mode on non ip device\n"); return -EINVAL; } if (enable) info->flags |= QMI_WWAN_FLAG_PASS_THROUGH; else info->flags &= ~QMI_WWAN_FLAG_PASS_THROUGH; return len; } static DEVICE_ATTR_RW(raw_ip); static DEVICE_ATTR_RW(add_mux); static DEVICE_ATTR_RW(del_mux); static DEVICE_ATTR_RW(pass_through); static struct attribute *qmi_wwan_sysfs_attrs[] = { &dev_attr_raw_ip.attr, &dev_attr_add_mux.attr, &dev_attr_del_mux.attr, &dev_attr_pass_through.attr, NULL, }; static struct attribute_group qmi_wwan_sysfs_attr_group = { .name = "qmi", .attrs = qmi_wwan_sysfs_attrs, }; /* default ethernet address used by the modem */ static const u8 default_modem_addr[ETH_ALEN] = {0x02, 0x50, 0xf3}; static const u8 buggy_fw_addr[ETH_ALEN] = {0x00, 0xa0, 0xc6, 0x00, 0x00, 0x00}; /* Make up an ethernet header if the packet doesn't have one. * * A firmware bug common among several devices cause them to send raw * IP packets under some circumstances. There is no way for the * driver/host to know when this will happen. And even when the bug * hits, some packets will still arrive with an intact header. * * The supported devices are only capably of sending IPv4, IPv6 and * ARP packets on a point-to-point link. Any packet with an ethernet * header will have either our address or a broadcast/multicast * address as destination. ARP packets will always have a header. * * This means that this function will reliably add the appropriate * header iff necessary, provided our hardware address does not start * with 4 or 6. * * Another common firmware bug results in all packets being addressed * to 00:a0:c6:00:00:00 despite the host address being different. * This function will also fixup such packets. */ static int qmi_wwan_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { struct qmi_wwan_state *info = (void *)&dev->data; bool rawip = info->flags & QMI_WWAN_FLAG_RAWIP; __be16 proto; /* This check is no longer done by usbnet */ if (skb->len < dev->net->hard_header_len) return 0; if (info->flags & QMI_WWAN_FLAG_MUX) return qmimux_rx_fixup(dev, skb); if (info->flags & QMI_WWAN_FLAG_PASS_THROUGH) { skb->protocol = htons(ETH_P_MAP); return 1; } switch (skb->data[0] & 0xf0) { case 0x40: proto = htons(ETH_P_IP); break; case 0x60: proto = htons(ETH_P_IPV6); break; case 0x00: if (rawip) return 0; if (is_multicast_ether_addr(skb->data)) return 1; /* possibly bogus destination - rewrite just in case */ skb_reset_mac_header(skb); goto fix_dest; default: if (rawip) return 0; /* pass along other packets without modifications */ return 1; } if (rawip) { skb_reset_mac_header(skb); skb->dev = dev->net; /* normally set by eth_type_trans */ skb->protocol = proto; return 1; } if (skb_headroom(skb) < ETH_HLEN) return 0; skb_push(skb, ETH_HLEN); skb_reset_mac_header(skb); eth_hdr(skb)->h_proto = proto; eth_zero_addr(eth_hdr(skb)->h_source); fix_dest: memcpy(eth_hdr(skb)->h_dest, dev->net->dev_addr, ETH_ALEN); return 1; } /* very simplistic detection of IPv4 or IPv6 headers */ static bool possibly_iphdr(const char *data) { return (data[0] & 0xd0) == 0x40; } /* disallow addresses which may be confused with IP headers */ static int qmi_wwan_mac_addr(struct net_device *dev, void *p) { int ret; struct sockaddr *addr = p; ret = eth_prepare_mac_addr_change(dev, p); if (ret < 0) return ret; if (possibly_iphdr(addr->sa_data)) return -EADDRNOTAVAIL; eth_commit_mac_addr_change(dev, p); return 0; } static const struct net_device_ops qmi_wwan_netdev_ops = { .ndo_open = usbnet_open, .ndo_stop = usbnet_stop, .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, .ndo_change_mtu = usbnet_change_mtu, .ndo_set_mac_address = qmi_wwan_mac_addr, .ndo_validate_addr = eth_validate_addr, }; /* using a counter to merge subdriver requests with our own into a * combined state */ static int qmi_wwan_manage_power(struct usbnet *dev, int on) { struct qmi_wwan_state *info = (void *)&dev->data; int rv; dev_dbg(&dev->intf->dev, "%s() pmcount=%d, on=%d\n", __func__, atomic_read(&info->pmcount), on); if ((on && atomic_add_return(1, &info->pmcount) == 1) || (!on && atomic_dec_and_test(&info->pmcount))) { /* need autopm_get/put here to ensure the usbcore sees * the new value */ rv = usb_autopm_get_interface(dev->intf); dev->intf->needs_remote_wakeup = on; if (!rv) usb_autopm_put_interface(dev->intf); } return 0; } static int qmi_wwan_cdc_wdm_manage_power(struct usb_interface *intf, int on) { struct usbnet *dev = usb_get_intfdata(intf); /* can be called while disconnecting */ if (!dev) return 0; return qmi_wwan_manage_power(dev, on); } /* collect all three endpoints and register subdriver */ static int qmi_wwan_register_subdriver(struct usbnet *dev) { int rv; struct usb_driver *subdriver = NULL; struct qmi_wwan_state *info = (void *)&dev->data; /* collect bulk endpoints */ rv = usbnet_get_endpoints(dev, info->data); if (rv < 0) goto err; /* update status endpoint if separate control interface */ if (info->control != info->data) dev->status = &info->control->cur_altsetting->endpoint[0]; /* require interrupt endpoint for subdriver */ if (!dev->status) { rv = -EINVAL; goto err; } /* for subdriver power management */ atomic_set(&info->pmcount, 0); /* register subdriver */ subdriver = usb_cdc_wdm_register(info->control, &dev->status->desc, 4096, WWAN_PORT_QMI, &qmi_wwan_cdc_wdm_manage_power); if (IS_ERR(subdriver)) { dev_err(&info->control->dev, "subdriver registration failed\n"); rv = PTR_ERR(subdriver); goto err; } /* prevent usbnet from using status endpoint */ dev->status = NULL; /* save subdriver struct for suspend/resume wrappers */ info->subdriver = subdriver; err: return rv; } /* Send CDC SetControlLineState request, setting or clearing the DTR. * "Required for Autoconnect and 9x30 to wake up" according to the * GobiNet driver. The requirement has been verified on an MDM9230 * based Sierra Wireless MC7455 */ static int qmi_wwan_change_dtr(struct usbnet *dev, bool on) { u8 intf = dev->intf->cur_altsetting->desc.bInterfaceNumber; return usbnet_write_cmd(dev, USB_CDC_REQ_SET_CONTROL_LINE_STATE, USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE, on ? 0x01 : 0x00, intf, NULL, 0); } static int qmi_wwan_bind(struct usbnet *dev, struct usb_interface *intf) { int status; u8 *buf = intf->cur_altsetting->extra; int len = intf->cur_altsetting->extralen; struct usb_interface_descriptor *desc = &intf->cur_altsetting->desc; struct usb_cdc_union_desc *cdc_union; struct usb_cdc_ether_desc *cdc_ether; struct usb_driver *driver = driver_of(intf); struct qmi_wwan_state *info = (void *)&dev->data; struct usb_cdc_parsed_header hdr; BUILD_BUG_ON((sizeof(((struct usbnet *)0)->data) < sizeof(struct qmi_wwan_state))); /* set up initial state */ info->control = intf; info->data = intf; /* and a number of CDC descriptors */ cdc_parse_cdc_header(&hdr, intf, buf, len); cdc_union = hdr.usb_cdc_union_desc; cdc_ether = hdr.usb_cdc_ether_desc; /* Use separate control and data interfaces if we found a CDC Union */ if (cdc_union) { info->data = usb_ifnum_to_if(dev->udev, cdc_union->bSlaveInterface0); if (desc->bInterfaceNumber != cdc_union->bMasterInterface0 || !info->data) { dev_err(&intf->dev, "bogus CDC Union: master=%u, slave=%u\n", cdc_union->bMasterInterface0, cdc_union->bSlaveInterface0); /* ignore and continue... */ cdc_union = NULL; info->data = intf; } } /* errors aren't fatal - we can live with the dynamic address */ if (cdc_ether && cdc_ether->wMaxSegmentSize) { dev->hard_mtu = le16_to_cpu(cdc_ether->wMaxSegmentSize); usbnet_get_ethernet_addr(dev, cdc_ether->iMACAddress); } /* claim data interface and set it up */ if (info->control != info->data) { status = usb_driver_claim_interface(driver, info->data, dev); if (status < 0) goto err; } status = qmi_wwan_register_subdriver(dev); if (status < 0 && info->control != info->data) { usb_set_intfdata(info->data, NULL); usb_driver_release_interface(driver, info->data); } /* disabling remote wakeup on MDM9x30 devices has the same * effect as clearing DTR. The device will not respond to QMI * requests until we set DTR again. This is similar to a * QMI_CTL SYNC request, clearing a lot of firmware state * including the client ID allocations. * * Our usage model allows a session to span multiple * open/close events, so we must prevent the firmware from * clearing out state the clients might need. * * MDM9x30 is the first QMI chipset with USB3 support. Abuse * this fact to enable the quirk for all USB3 devices. * * There are also chipsets with the same "set DTR" requirement * but without USB3 support. Devices based on these chips * need a quirk flag in the device ID table. */ if (dev->driver_info->data & QMI_WWAN_QUIRK_DTR || le16_to_cpu(dev->udev->descriptor.bcdUSB) >= 0x0201) { qmi_wwan_manage_power(dev, 1); qmi_wwan_change_dtr(dev, true); } /* Never use the same address on both ends of the link, even if the * buggy firmware told us to. Or, if device is assigned the well-known * buggy firmware MAC address, replace it with a random address, */ if (ether_addr_equal(dev->net->dev_addr, default_modem_addr) || ether_addr_equal(dev->net->dev_addr, buggy_fw_addr)) eth_hw_addr_random(dev->net); /* make MAC addr easily distinguishable from an IP header */ if (possibly_iphdr(dev->net->dev_addr)) { u8 addr = dev->net->dev_addr[0]; addr |= 0x02; /* set local assignment bit */ addr &= 0xbf; /* clear "IP" bit */ dev_addr_mod(dev->net, 0, &addr, 1); } dev->net->netdev_ops = &qmi_wwan_netdev_ops; dev->net->sysfs_groups[0] = &qmi_wwan_sysfs_attr_group; err: return status; } static void qmi_wwan_unbind(struct usbnet *dev, struct usb_interface *intf) { struct qmi_wwan_state *info = (void *)&dev->data; struct usb_driver *driver = driver_of(intf); struct usb_interface *other; if (info->subdriver && info->subdriver->disconnect) info->subdriver->disconnect(info->control); /* disable MDM9x30 quirk */ if (le16_to_cpu(dev->udev->descriptor.bcdUSB) >= 0x0201) { qmi_wwan_change_dtr(dev, false); qmi_wwan_manage_power(dev, 0); } /* allow user to unbind using either control or data */ if (intf == info->control) other = info->data; else other = info->control; /* only if not shared */ if (other && intf != other) { usb_set_intfdata(other, NULL); usb_driver_release_interface(driver, other); } info->subdriver = NULL; info->data = NULL; info->control = NULL; } /* suspend/resume wrappers calling both usbnet and the cdc-wdm * subdriver if present. * * NOTE: cdc-wdm also supports pre/post_reset, but we cannot provide * wrappers for those without adding usbnet reset support first. */ static int qmi_wwan_suspend(struct usb_interface *intf, pm_message_t message) { struct usbnet *dev = usb_get_intfdata(intf); struct qmi_wwan_state *info = (void *)&dev->data; int ret; /* Both usbnet_suspend() and subdriver->suspend() MUST return 0 * in system sleep context, otherwise, the resume callback has * to recover device from previous suspend failure. */ ret = usbnet_suspend(intf, message); if (ret < 0) goto err; if (intf == info->control && info->subdriver && info->subdriver->suspend) ret = info->subdriver->suspend(intf, message); if (ret < 0) usbnet_resume(intf); err: return ret; } static int qmi_wwan_resume(struct usb_interface *intf) { struct usbnet *dev = usb_get_intfdata(intf); struct qmi_wwan_state *info = (void *)&dev->data; int ret = 0; bool callsub = (intf == info->control && info->subdriver && info->subdriver->resume); if (callsub) ret = info->subdriver->resume(intf); if (ret < 0) goto err; ret = usbnet_resume(intf); if (ret < 0 && callsub) info->subdriver->suspend(intf, PMSG_SUSPEND); err: return ret; } static const struct driver_info qmi_wwan_info = { .description = "WWAN/QMI device", .flags = FLAG_WWAN | FLAG_SEND_ZLP, .bind = qmi_wwan_bind, .unbind = qmi_wwan_unbind, .manage_power = qmi_wwan_manage_power, .rx_fixup = qmi_wwan_rx_fixup, }; static const struct driver_info qmi_wwan_info_quirk_dtr = { .description = "WWAN/QMI device", .flags = FLAG_WWAN | FLAG_SEND_ZLP, .bind = qmi_wwan_bind, .unbind = qmi_wwan_unbind, .manage_power = qmi_wwan_manage_power, .rx_fixup = qmi_wwan_rx_fixup, .data = QMI_WWAN_QUIRK_DTR, }; #define HUAWEI_VENDOR_ID 0x12D1 /* map QMI/wwan function by a fixed interface number */ #define QMI_FIXED_INTF(vend, prod, num) \ USB_DEVICE_INTERFACE_NUMBER(vend, prod, num), \ .driver_info = (unsigned long)&qmi_wwan_info /* devices requiring "set DTR" quirk */ #define QMI_QUIRK_SET_DTR(vend, prod, num) \ USB_DEVICE_INTERFACE_NUMBER(vend, prod, num), \ .driver_info = (unsigned long)&qmi_wwan_info_quirk_dtr /* Gobi 1000 QMI/wwan interface number is 3 according to qcserial */ #define QMI_GOBI1K_DEVICE(vend, prod) \ QMI_FIXED_INTF(vend, prod, 3) /* Gobi 2000/3000 QMI/wwan interface number is 0 according to qcserial */ #define QMI_GOBI_DEVICE(vend, prod) \ QMI_FIXED_INTF(vend, prod, 0) /* Many devices have QMI and DIAG functions which are distinguishable * from other vendor specific functions by class, subclass and * protocol all being 0xff. The DIAG function has exactly 2 endpoints * and is silently rejected when probed. * * This makes it possible to match dynamically numbered QMI functions * as seen on e.g. many Quectel modems. */ #define QMI_MATCH_FF_FF_FF(vend, prod) \ USB_DEVICE_AND_INTERFACE_INFO(vend, prod, USB_CLASS_VENDOR_SPEC, \ USB_SUBCLASS_VENDOR_SPEC, 0xff), \ .driver_info = (unsigned long)&qmi_wwan_info_quirk_dtr static const struct usb_device_id products[] = { /* 1. CDC ECM like devices match on the control interface */ { /* Huawei E392, E398 and possibly others sharing both device id and more... */ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, USB_CLASS_VENDOR_SPEC, 1, 9), .driver_info = (unsigned long)&qmi_wwan_info, }, { /* Vodafone/Huawei K5005 (12d1:14c8) and similar modems */ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, USB_CLASS_VENDOR_SPEC, 1, 57), .driver_info = (unsigned long)&qmi_wwan_info, }, { /* HUAWEI_INTERFACE_NDIS_CONTROL_QUALCOMM */ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, USB_CLASS_VENDOR_SPEC, 0x01, 0x69), .driver_info = (unsigned long)&qmi_wwan_info, }, { /* Motorola Mapphone devices with MDM6600 */ USB_VENDOR_AND_INTERFACE_INFO(0x22b8, USB_CLASS_VENDOR_SPEC, 0xfb, 0xff), .driver_info = (unsigned long)&qmi_wwan_info, }, /* 2. Combined interface devices matching on class+protocol */ { /* Huawei E367 and possibly others in "Windows mode" */ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, USB_CLASS_VENDOR_SPEC, 1, 7), .driver_info = (unsigned long)&qmi_wwan_info, }, { /* Huawei E392, E398 and possibly others in "Windows mode" */ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, USB_CLASS_VENDOR_SPEC, 1, 17), .driver_info = (unsigned long)&qmi_wwan_info, }, { /* HUAWEI_NDIS_SINGLE_INTERFACE_VDF */ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, USB_CLASS_VENDOR_SPEC, 0x01, 0x37), .driver_info = (unsigned long)&qmi_wwan_info, }, { /* HUAWEI_INTERFACE_NDIS_HW_QUALCOMM */ USB_VENDOR_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, USB_CLASS_VENDOR_SPEC, 0x01, 0x67), .driver_info = (unsigned long)&qmi_wwan_info, }, { /* Pantech UML290, P4200 and more */ USB_VENDOR_AND_INTERFACE_INFO(0x106c, USB_CLASS_VENDOR_SPEC, 0xf0, 0xff), .driver_info = (unsigned long)&qmi_wwan_info, }, { /* Pantech UML290 - newer firmware */ USB_VENDOR_AND_INTERFACE_INFO(0x106c, USB_CLASS_VENDOR_SPEC, 0xf1, 0xff), .driver_info = (unsigned long)&qmi_wwan_info, }, { /* Novatel USB551L and MC551 */ USB_DEVICE_AND_INTERFACE_INFO(0x1410, 0xb001, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&qmi_wwan_info, }, { /* Novatel E362 */ USB_DEVICE_AND_INTERFACE_INFO(0x1410, 0x9010, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&qmi_wwan_info, }, { /* Novatel Expedite E371 */ USB_DEVICE_AND_INTERFACE_INFO(0x1410, 0x9011, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&qmi_wwan_info, }, { /* Dell Wireless 5800 (Novatel E362) */ USB_DEVICE_AND_INTERFACE_INFO(0x413C, 0x8195, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&qmi_wwan_info, }, { /* Dell Wireless 5800 V2 (Novatel E362) */ USB_DEVICE_AND_INTERFACE_INFO(0x413C, 0x8196, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&qmi_wwan_info, }, { /* Dell Wireless 5804 (Novatel E371) */ USB_DEVICE_AND_INTERFACE_INFO(0x413C, 0x819b, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&qmi_wwan_info, }, { /* ADU960S */ USB_DEVICE_AND_INTERFACE_INFO(0x16d5, 0x650a, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&qmi_wwan_info, }, { /* HP lt2523 (Novatel E371) */ USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x421d, USB_CLASS_COMM, USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&qmi_wwan_info, }, { /* HP lt4112 LTE/HSPA+ Gobi 4G Module (Huawei me906e) */ USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x581d, USB_CLASS_VENDOR_SPEC, 1, 7), .driver_info = (unsigned long)&qmi_wwan_info, }, {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0122)}, /* Quectel RG650V */ {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0125)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0306)}, /* Quectel EP06/EG06/EM06 */ {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0512)}, /* Quectel EG12/EM12 */ {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0620)}, /* Quectel EM160R-GL */ {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0800)}, /* Quectel RM500Q-GL */ {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0801)}, /* Quectel RM520N */ /* 3. Combined interface devices matching on interface number */ {QMI_FIXED_INTF(0x0408, 0xea42, 4)}, /* Yota / Megafon M100-1 */ {QMI_FIXED_INTF(0x05c6, 0x6001, 3)}, /* 4G LTE usb-modem U901 */ {QMI_FIXED_INTF(0x05c6, 0x7000, 0)}, {QMI_FIXED_INTF(0x05c6, 0x7001, 1)}, {QMI_FIXED_INTF(0x05c6, 0x7002, 1)}, {QMI_FIXED_INTF(0x05c6, 0x7101, 1)}, {QMI_FIXED_INTF(0x05c6, 0x7101, 2)}, {QMI_FIXED_INTF(0x05c6, 0x7101, 3)}, {QMI_FIXED_INTF(0x05c6, 0x7102, 1)}, {QMI_FIXED_INTF(0x05c6, 0x7102, 2)}, {QMI_FIXED_INTF(0x05c6, 0x7102, 3)}, {QMI_FIXED_INTF(0x05c6, 0x8000, 7)}, {QMI_FIXED_INTF(0x05c6, 0x8001, 6)}, {QMI_FIXED_INTF(0x05c6, 0x9000, 4)}, {QMI_FIXED_INTF(0x05c6, 0x9003, 4)}, {QMI_FIXED_INTF(0x05c6, 0x9005, 2)}, {QMI_FIXED_INTF(0x05c6, 0x900a, 4)}, {QMI_FIXED_INTF(0x05c6, 0x900b, 2)}, {QMI_FIXED_INTF(0x05c6, 0x900c, 4)}, {QMI_FIXED_INTF(0x05c6, 0x900c, 5)}, {QMI_FIXED_INTF(0x05c6, 0x900c, 6)}, {QMI_FIXED_INTF(0x05c6, 0x900d, 5)}, {QMI_FIXED_INTF(0x05c6, 0x900f, 3)}, {QMI_FIXED_INTF(0x05c6, 0x900f, 4)}, {QMI_FIXED_INTF(0x05c6, 0x900f, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9010, 4)}, {QMI_FIXED_INTF(0x05c6, 0x9010, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9011, 3)}, {QMI_FIXED_INTF(0x05c6, 0x9011, 4)}, {QMI_FIXED_INTF(0x05c6, 0x9021, 1)}, {QMI_FIXED_INTF(0x05c6, 0x9022, 2)}, {QMI_QUIRK_SET_DTR(0x05c6, 0x9025, 4)}, /* Alcatel-sbell ASB TL131 TDD LTE (China Mobile) */ {QMI_FIXED_INTF(0x05c6, 0x9026, 3)}, {QMI_FIXED_INTF(0x05c6, 0x902e, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9031, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9032, 4)}, {QMI_FIXED_INTF(0x05c6, 0x9033, 3)}, {QMI_FIXED_INTF(0x05c6, 0x9033, 4)}, {QMI_FIXED_INTF(0x05c6, 0x9033, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9033, 6)}, {QMI_FIXED_INTF(0x05c6, 0x9034, 3)}, {QMI_FIXED_INTF(0x05c6, 0x9034, 4)}, {QMI_FIXED_INTF(0x05c6, 0x9034, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9034, 6)}, {QMI_FIXED_INTF(0x05c6, 0x9034, 7)}, {QMI_FIXED_INTF(0x05c6, 0x9035, 4)}, {QMI_FIXED_INTF(0x05c6, 0x9036, 3)}, {QMI_FIXED_INTF(0x05c6, 0x9037, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9038, 4)}, {QMI_FIXED_INTF(0x05c6, 0x903b, 7)}, {QMI_FIXED_INTF(0x05c6, 0x903c, 6)}, {QMI_FIXED_INTF(0x05c6, 0x903d, 6)}, {QMI_FIXED_INTF(0x05c6, 0x903e, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9043, 3)}, {QMI_FIXED_INTF(0x05c6, 0x9046, 3)}, {QMI_FIXED_INTF(0x05c6, 0x9046, 4)}, {QMI_FIXED_INTF(0x05c6, 0x9046, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9047, 2)}, {QMI_FIXED_INTF(0x05c6, 0x9047, 3)}, {QMI_FIXED_INTF(0x05c6, 0x9047, 4)}, {QMI_FIXED_INTF(0x05c6, 0x9048, 4)}, {QMI_FIXED_INTF(0x05c6, 0x9048, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9048, 6)}, {QMI_FIXED_INTF(0x05c6, 0x9048, 7)}, {QMI_FIXED_INTF(0x05c6, 0x9048, 8)}, {QMI_FIXED_INTF(0x05c6, 0x904c, 5)}, {QMI_FIXED_INTF(0x05c6, 0x904c, 6)}, {QMI_FIXED_INTF(0x05c6, 0x904c, 7)}, {QMI_FIXED_INTF(0x05c6, 0x904c, 8)}, {QMI_FIXED_INTF(0x05c6, 0x9050, 3)}, {QMI_FIXED_INTF(0x05c6, 0x9052, 4)}, {QMI_FIXED_INTF(0x05c6, 0x9053, 6)}, {QMI_FIXED_INTF(0x05c6, 0x9053, 7)}, {QMI_FIXED_INTF(0x05c6, 0x9054, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9054, 6)}, {QMI_FIXED_INTF(0x05c6, 0x9055, 3)}, {QMI_FIXED_INTF(0x05c6, 0x9055, 4)}, {QMI_FIXED_INTF(0x05c6, 0x9055, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9055, 6)}, {QMI_FIXED_INTF(0x05c6, 0x9055, 7)}, {QMI_FIXED_INTF(0x05c6, 0x9056, 3)}, {QMI_FIXED_INTF(0x05c6, 0x9062, 2)}, {QMI_FIXED_INTF(0x05c6, 0x9062, 3)}, {QMI_FIXED_INTF(0x05c6, 0x9062, 4)}, {QMI_FIXED_INTF(0x05c6, 0x9062, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9062, 6)}, {QMI_FIXED_INTF(0x05c6, 0x9062, 7)}, {QMI_FIXED_INTF(0x05c6, 0x9062, 8)}, {QMI_FIXED_INTF(0x05c6, 0x9062, 9)}, {QMI_FIXED_INTF(0x05c6, 0x9064, 3)}, {QMI_FIXED_INTF(0x05c6, 0x9065, 6)}, {QMI_FIXED_INTF(0x05c6, 0x9065, 7)}, {QMI_FIXED_INTF(0x05c6, 0x9066, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9066, 6)}, {QMI_FIXED_INTF(0x05c6, 0x9067, 1)}, {QMI_FIXED_INTF(0x05c6, 0x9068, 2)}, {QMI_FIXED_INTF(0x05c6, 0x9068, 3)}, {QMI_FIXED_INTF(0x05c6, 0x9068, 4)}, {QMI_FIXED_INTF(0x05c6, 0x9068, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9068, 6)}, {QMI_FIXED_INTF(0x05c6, 0x9068, 7)}, {QMI_FIXED_INTF(0x05c6, 0x9069, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9069, 6)}, {QMI_FIXED_INTF(0x05c6, 0x9069, 7)}, {QMI_FIXED_INTF(0x05c6, 0x9069, 8)}, {QMI_FIXED_INTF(0x05c6, 0x9070, 4)}, {QMI_FIXED_INTF(0x05c6, 0x9070, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9075, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9076, 4)}, {QMI_FIXED_INTF(0x05c6, 0x9076, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9076, 6)}, {QMI_FIXED_INTF(0x05c6, 0x9076, 7)}, {QMI_FIXED_INTF(0x05c6, 0x9076, 8)}, {QMI_FIXED_INTF(0x05c6, 0x9077, 3)}, {QMI_FIXED_INTF(0x05c6, 0x9077, 4)}, {QMI_FIXED_INTF(0x05c6, 0x9077, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9077, 6)}, {QMI_FIXED_INTF(0x05c6, 0x9078, 3)}, {QMI_FIXED_INTF(0x05c6, 0x9079, 4)}, {QMI_FIXED_INTF(0x05c6, 0x9079, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9079, 6)}, {QMI_FIXED_INTF(0x05c6, 0x9079, 7)}, {QMI_FIXED_INTF(0x05c6, 0x9079, 8)}, {QMI_FIXED_INTF(0x05c6, 0x9080, 5)}, {QMI_FIXED_INTF(0x05c6, 0x9080, 6)}, {QMI_FIXED_INTF(0x05c6, 0x9080, 7)}, {QMI_FIXED_INTF(0x05c6, 0x9080, 8)}, {QMI_FIXED_INTF(0x05c6, 0x9083, 3)}, {QMI_FIXED_INTF(0x05c6, 0x9084, 4)}, {QMI_QUIRK_SET_DTR(0x05c6, 0x9091, 2)}, /* Compal RXM-G1 */ {QMI_FIXED_INTF(0x05c6, 0x90b2, 3)}, /* ublox R410M */ {QMI_QUIRK_SET_DTR(0x05c6, 0x90db, 2)}, /* Compal RXM-G1 */ {QMI_FIXED_INTF(0x05c6, 0x920d, 0)}, {QMI_FIXED_INTF(0x05c6, 0x920d, 5)}, {QMI_QUIRK_SET_DTR(0x05c6, 0x9625, 4)}, /* YUGA CLM920-NC5 */ {QMI_FIXED_INTF(0x0846, 0x68a2, 8)}, {QMI_FIXED_INTF(0x0846, 0x68d3, 8)}, /* Netgear Aircard 779S */ {QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */ {QMI_FIXED_INTF(0x12d1, 0x14ac, 1)}, /* Huawei E1820 */ {QMI_FIXED_INTF(0x1435, 0x0918, 3)}, /* Wistron NeWeb D16Q1 */ {QMI_FIXED_INTF(0x1435, 0x0918, 4)}, /* Wistron NeWeb D16Q1 */ {QMI_FIXED_INTF(0x1435, 0x0918, 5)}, /* Wistron NeWeb D16Q1 */ {QMI_FIXED_INTF(0x1435, 0x3185, 4)}, /* Wistron NeWeb M18Q5 */ {QMI_FIXED_INTF(0x1435, 0xd111, 4)}, /* M9615A DM11-1 D51QC */ {QMI_FIXED_INTF(0x1435, 0xd181, 3)}, /* Wistron NeWeb D18Q1 */ {QMI_FIXED_INTF(0x1435, 0xd181, 4)}, /* Wistron NeWeb D18Q1 */ {QMI_FIXED_INTF(0x1435, 0xd181, 5)}, /* Wistron NeWeb D18Q1 */ {QMI_FIXED_INTF(0x1435, 0xd182, 4)}, /* Wistron NeWeb D18 */ {QMI_FIXED_INTF(0x1435, 0xd182, 5)}, /* Wistron NeWeb D18 */ {QMI_FIXED_INTF(0x1435, 0xd191, 4)}, /* Wistron NeWeb D19Q1 */ {QMI_QUIRK_SET_DTR(0x1508, 0x1001, 4)}, /* Fibocom NL668 series */ {QMI_FIXED_INTF(0x1690, 0x7588, 4)}, /* ASKEY WWHC050 */ {QMI_FIXED_INTF(0x16d8, 0x6003, 0)}, /* CMOTech 6003 */ {QMI_FIXED_INTF(0x16d8, 0x6007, 0)}, /* CMOTech CHE-628S */ {QMI_FIXED_INTF(0x16d8, 0x6008, 0)}, /* CMOTech CMU-301 */ {QMI_FIXED_INTF(0x16d8, 0x6280, 0)}, /* CMOTech CHU-628 */ {QMI_FIXED_INTF(0x16d8, 0x7001, 0)}, /* CMOTech CHU-720S */ {QMI_FIXED_INTF(0x16d8, 0x7002, 0)}, /* CMOTech 7002 */ {QMI_FIXED_INTF(0x16d8, 0x7003, 4)}, /* CMOTech CHU-629K */ {QMI_FIXED_INTF(0x16d8, 0x7004, 3)}, /* CMOTech 7004 */ {QMI_FIXED_INTF(0x16d8, 0x7006, 5)}, /* CMOTech CGU-629 */ {QMI_FIXED_INTF(0x16d8, 0x700a, 4)}, /* CMOTech CHU-629S */ {QMI_FIXED_INTF(0x16d8, 0x7211, 0)}, /* CMOTech CHU-720I */ {QMI_FIXED_INTF(0x16d8, 0x7212, 0)}, /* CMOTech 7212 */ {QMI_FIXED_INTF(0x16d8, 0x7213, 0)}, /* CMOTech 7213 */ {QMI_FIXED_INTF(0x16d8, 0x7251, 1)}, /* CMOTech 7251 */ {QMI_FIXED_INTF(0x16d8, 0x7252, 1)}, /* CMOTech 7252 */ {QMI_FIXED_INTF(0x16d8, 0x7253, 1)}, /* CMOTech 7253 */ {QMI_FIXED_INTF(0x19d2, 0x0002, 1)}, {QMI_FIXED_INTF(0x19d2, 0x0012, 1)}, {QMI_FIXED_INTF(0x19d2, 0x0017, 3)}, {QMI_FIXED_INTF(0x19d2, 0x0019, 3)}, /* ONDA MT689DC */ {QMI_FIXED_INTF(0x19d2, 0x0021, 4)}, {QMI_FIXED_INTF(0x19d2, 0x0025, 1)}, {QMI_FIXED_INTF(0x19d2, 0x0031, 4)}, {QMI_FIXED_INTF(0x19d2, 0x0042, 4)}, {QMI_FIXED_INTF(0x19d2, 0x0049, 5)}, {QMI_FIXED_INTF(0x19d2, 0x0052, 4)}, {QMI_FIXED_INTF(0x19d2, 0x0055, 1)}, /* ZTE (Vodafone) K3520-Z */ {QMI_FIXED_INTF(0x19d2, 0x0058, 4)}, {QMI_FIXED_INTF(0x19d2, 0x0063, 4)}, /* ZTE (Vodafone) K3565-Z */ {QMI_FIXED_INTF(0x19d2, 0x0104, 4)}, /* ZTE (Vodafone) K4505-Z */ {QMI_FIXED_INTF(0x19d2, 0x0113, 5)}, {QMI_FIXED_INTF(0x19d2, 0x0118, 5)}, {QMI_FIXED_INTF(0x19d2, 0x0121, 5)}, {QMI_FIXED_INTF(0x19d2, 0x0123, 4)}, {QMI_FIXED_INTF(0x19d2, 0x0124, 5)}, {QMI_FIXED_INTF(0x19d2, 0x0125, 6)}, {QMI_FIXED_INTF(0x19d2, 0x0126, 5)}, {QMI_FIXED_INTF(0x19d2, 0x0130, 1)}, {QMI_FIXED_INTF(0x19d2, 0x0133, 3)}, {QMI_FIXED_INTF(0x19d2, 0x0141, 5)}, {QMI_FIXED_INTF(0x19d2, 0x0157, 5)}, /* ZTE MF683 */ {QMI_FIXED_INTF(0x19d2, 0x0158, 3)}, {QMI_FIXED_INTF(0x19d2, 0x0167, 4)}, /* ZTE MF820D */ {QMI_FIXED_INTF(0x19d2, 0x0168, 4)}, {QMI_FIXED_INTF(0x19d2, 0x0176, 3)}, {QMI_FIXED_INTF(0x19d2, 0x0178, 3)}, {QMI_FIXED_INTF(0x19d2, 0x0189, 4)}, /* ZTE MF290 */ {QMI_FIXED_INTF(0x19d2, 0x0191, 4)}, /* ZTE EuFi890 */ {QMI_FIXED_INTF(0x19d2, 0x0199, 1)}, /* ZTE MF820S */ {QMI_FIXED_INTF(0x19d2, 0x0200, 1)}, {QMI_FIXED_INTF(0x19d2, 0x0257, 3)}, /* ZTE MF821 */ {QMI_FIXED_INTF(0x19d2, 0x0265, 4)}, /* ONDA MT8205 4G LTE */ {QMI_FIXED_INTF(0x19d2, 0x0284, 4)}, /* ZTE MF880 */ {QMI_FIXED_INTF(0x19d2, 0x0326, 4)}, /* ZTE MF821D */ {QMI_FIXED_INTF(0x19d2, 0x0396, 3)}, /* ZTE ZM8620 */ {QMI_FIXED_INTF(0x19d2, 0x0412, 4)}, /* Telewell TW-LTE 4G */ {QMI_FIXED_INTF(0x19d2, 0x1008, 4)}, /* ZTE (Vodafone) K3570-Z */ {QMI_FIXED_INTF(0x19d2, 0x1010, 4)}, /* ZTE (Vodafone) K3571-Z */ {QMI_FIXED_INTF(0x19d2, 0x1012, 4)}, {QMI_FIXED_INTF(0x19d2, 0x1018, 3)}, /* ZTE (Vodafone) K5006-Z */ {QMI_FIXED_INTF(0x19d2, 0x1021, 2)}, {QMI_FIXED_INTF(0x19d2, 0x1245, 4)}, {QMI_FIXED_INTF(0x19d2, 0x1247, 4)}, {QMI_FIXED_INTF(0x19d2, 0x1252, 4)}, {QMI_FIXED_INTF(0x19d2, 0x1254, 4)}, {QMI_FIXED_INTF(0x19d2, 0x1255, 3)}, {QMI_FIXED_INTF(0x19d2, 0x1255, 4)}, {QMI_FIXED_INTF(0x19d2, 0x1256, 4)}, {QMI_FIXED_INTF(0x19d2, 0x1270, 5)}, /* ZTE MF667 */ {QMI_FIXED_INTF(0x19d2, 0x1275, 3)}, /* ZTE P685M */ {QMI_FIXED_INTF(0x19d2, 0x1401, 2)}, {QMI_FIXED_INTF(0x19d2, 0x1402, 2)}, /* ZTE MF60 */ {QMI_FIXED_INTF(0x19d2, 0x1424, 2)}, {QMI_FIXED_INTF(0x19d2, 0x1425, 2)}, {QMI_FIXED_INTF(0x19d2, 0x1426, 2)}, /* ZTE MF91 */ {QMI_FIXED_INTF(0x19d2, 0x1428, 2)}, /* Telewell TW-LTE 4G v2 */ {QMI_FIXED_INTF(0x19d2, 0x1432, 3)}, /* ZTE ME3620 */ {QMI_FIXED_INTF(0x19d2, 0x1485, 5)}, /* ZTE MF286D */ {QMI_FIXED_INTF(0x19d2, 0x2002, 4)}, /* ZTE (Vodafone) K3765-Z */ {QMI_FIXED_INTF(0x2001, 0x7e16, 3)}, /* D-Link DWM-221 */ {QMI_FIXED_INTF(0x2001, 0x7e19, 4)}, /* D-Link DWM-221 B1 */ {QMI_FIXED_INTF(0x2001, 0x7e35, 4)}, /* D-Link DWM-222 */ {QMI_FIXED_INTF(0x2001, 0x7e3d, 4)}, /* D-Link DWM-222 A2 */ {QMI_FIXED_INTF(0x2020, 0x2031, 4)}, /* Olicard 600 */ {QMI_FIXED_INTF(0x2020, 0x2033, 4)}, /* BroadMobi BM806U */ {QMI_QUIRK_SET_DTR(0x2020, 0x2060, 4)}, /* BroadMobi BM818 */ {QMI_FIXED_INTF(0x0f3d, 0x68a2, 8)}, /* Sierra Wireless MC7700 */ {QMI_FIXED_INTF(0x114f, 0x68a2, 8)}, /* Sierra Wireless MC7750 */ {QMI_FIXED_INTF(0x1199, 0x68a2, 8)}, /* Sierra Wireless MC7710 in QMI mode */ {QMI_FIXED_INTF(0x1199, 0x68a2, 19)}, /* Sierra Wireless MC7710 in QMI mode */ {QMI_QUIRK_SET_DTR(0x1199, 0x68c0, 8)}, /* Sierra Wireless MC7304/MC7354, WP76xx */ {QMI_QUIRK_SET_DTR(0x1199, 0x68c0, 10)},/* Sierra Wireless MC7304/MC7354 */ {QMI_FIXED_INTF(0x1199, 0x901c, 8)}, /* Sierra Wireless EM7700 */ {QMI_FIXED_INTF(0x1199, 0x901f, 8)}, /* Sierra Wireless EM7355 */ {QMI_FIXED_INTF(0x1199, 0x9041, 8)}, /* Sierra Wireless MC7305/MC7355 */ {QMI_FIXED_INTF(0x1199, 0x9041, 10)}, /* Sierra Wireless MC7305/MC7355 */ {QMI_FIXED_INTF(0x1199, 0x9051, 8)}, /* Netgear AirCard 340U */ {QMI_FIXED_INTF(0x1199, 0x9053, 8)}, /* Sierra Wireless Modem */ {QMI_FIXED_INTF(0x1199, 0x9054, 8)}, /* Sierra Wireless Modem */ {QMI_FIXED_INTF(0x1199, 0x9055, 8)}, /* Netgear AirCard 341U */ {QMI_FIXED_INTF(0x1199, 0x9056, 8)}, /* Sierra Wireless Modem */ {QMI_FIXED_INTF(0x1199, 0x9057, 8)}, {QMI_FIXED_INTF(0x1199, 0x9061, 8)}, /* Sierra Wireless Modem */ {QMI_FIXED_INTF(0x1199, 0x9063, 8)}, /* Sierra Wireless EM7305 */ {QMI_FIXED_INTF(0x1199, 0x9063, 10)}, /* Sierra Wireless EM7305 */ {QMI_QUIRK_SET_DTR(0x1199, 0x9071, 8)}, /* Sierra Wireless MC74xx */ {QMI_QUIRK_SET_DTR(0x1199, 0x9071, 10)},/* Sierra Wireless MC74xx */ {QMI_QUIRK_SET_DTR(0x1199, 0x9079, 8)}, /* Sierra Wireless EM74xx */ {QMI_QUIRK_SET_DTR(0x1199, 0x9079, 10)},/* Sierra Wireless EM74xx */ {QMI_QUIRK_SET_DTR(0x1199, 0x907b, 8)}, /* Sierra Wireless EM74xx */ {QMI_QUIRK_SET_DTR(0x1199, 0x907b, 10)},/* Sierra Wireless EM74xx */ {QMI_QUIRK_SET_DTR(0x1199, 0x9091, 8)}, /* Sierra Wireless EM7565 */ {QMI_QUIRK_SET_DTR(0x1199, 0xc081, 8)}, /* Sierra Wireless EM7590 */ {QMI_FIXED_INTF(0x1bbb, 0x011e, 4)}, /* Telekom Speedstick LTE II (Alcatel One Touch L100V LTE) */ {QMI_FIXED_INTF(0x1bbb, 0x0203, 2)}, /* Alcatel L800MA */ {QMI_FIXED_INTF(0x2357, 0x0201, 4)}, /* TP-LINK HSUPA Modem MA180 */ {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1031, 3)}, /* Telit LE910C1-EUX */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1034, 2)}, /* Telit LE910C4-WWX */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1037, 4)}, /* Telit LE910C4-WWX */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1038, 3)}, /* Telit LE910C4-WWX */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x103a, 0)}, /* Telit LE910C4-WWX */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1040, 2)}, /* Telit LE922A */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1050, 2)}, /* Telit FN980 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1057, 2)}, /* Telit FN980 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1060, 2)}, /* Telit LN920 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1070, 2)}, /* Telit FN990A */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1077, 2)}, /* Telit FN990A w/audio */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1080, 2)}, /* Telit FE990A */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a0, 0)}, /* Telit FN920C04 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a4, 0)}, /* Telit FN920C04 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10a9, 0)}, /* Telit FN920C04 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10b0, 0)}, /* Telit FE990B */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10c0, 0)}, /* Telit FE910C04 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10c4, 0)}, /* Telit FE910C04 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10c8, 0)}, /* Telit FE910C04 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x10d0, 0)}, /* Telit FN990B */ {QMI_FIXED_INTF(0x1bc7, 0x1100, 3)}, /* Telit ME910 */ {QMI_FIXED_INTF(0x1bc7, 0x1101, 3)}, /* Telit ME910 dual modem */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1201, 2)}, /* Telit LE920, LE920A4 */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1230, 2)}, /* Telit LE910Cx */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1250, 0)}, /* Telit LE910Cx */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1260, 2)}, /* Telit LE910Cx */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1261, 2)}, /* Telit LE910Cx */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x1900, 1)}, /* Telit LN940 series */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x3000, 0)}, /* Telit FN912 series */ {QMI_QUIRK_SET_DTR(0x1bc7, 0x3001, 0)}, /* Telit FN912 series */ {QMI_FIXED_INTF(0x1c9e, 0x9801, 3)}, /* Telewell TW-3G HSPA+ */ {QMI_FIXED_INTF(0x1c9e, 0x9803, 4)}, /* Telewell TW-3G HSPA+ */ {QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)}, /* XS Stick W100-2 from 4G Systems */ {QMI_QUIRK_SET_DTR(0x1c9e, 0x9b05, 4)}, /* Longsung U8300 */ {QMI_QUIRK_SET_DTR(0x1c9e, 0x9b3c, 4)}, /* Longsung U9300 */ {QMI_FIXED_INTF(0x0b3c, 0xc000, 4)}, /* Olivetti Olicard 100 */ {QMI_FIXED_INTF(0x0b3c, 0xc001, 4)}, /* Olivetti Olicard 120 */ {QMI_FIXED_INTF(0x0b3c, 0xc002, 4)}, /* Olivetti Olicard 140 */ {QMI_FIXED_INTF(0x0b3c, 0xc004, 6)}, /* Olivetti Olicard 155 */ {QMI_FIXED_INTF(0x0b3c, 0xc005, 6)}, /* Olivetti Olicard 200 */ {QMI_FIXED_INTF(0x0b3c, 0xc00a, 6)}, /* Olivetti Olicard 160 */ {QMI_FIXED_INTF(0x0b3c, 0xc00b, 4)}, /* Olivetti Olicard 500 */ {QMI_FIXED_INTF(0x1e2d, 0x0060, 4)}, /* Cinterion PLxx */ {QMI_QUIRK_SET_DTR(0x1e2d, 0x006f, 8)}, /* Cinterion PLS83/PLS63 */ {QMI_FIXED_INTF(0x1e2d, 0x0053, 4)}, /* Cinterion PHxx,PXxx */ {QMI_FIXED_INTF(0x1e2d, 0x0063, 10)}, /* Cinterion ALASxx (1 RmNet) */ {QMI_FIXED_INTF(0x1e2d, 0x0082, 4)}, /* Cinterion PHxx,PXxx (2 RmNet) */ {QMI_FIXED_INTF(0x1e2d, 0x0082, 5)}, /* Cinterion PHxx,PXxx (2 RmNet) */ {QMI_FIXED_INTF(0x1e2d, 0x0083, 4)}, /* Cinterion PHxx,PXxx (1 RmNet + USB Audio)*/ {QMI_QUIRK_SET_DTR(0x1e2d, 0x00b0, 4)}, /* Cinterion CLS8 */ {QMI_FIXED_INTF(0x1e2d, 0x00b7, 0)}, /* Cinterion MV31 RmNet */ {QMI_FIXED_INTF(0x1e2d, 0x00b9, 0)}, /* Cinterion MV31 RmNet based on new baseline */ {QMI_FIXED_INTF(0x1e2d, 0x00f3, 0)}, /* Cinterion MV32-W-A RmNet */ {QMI_FIXED_INTF(0x1e2d, 0x00f4, 0)}, /* Cinterion MV32-W-B RmNet */ {QMI_FIXED_INTF(0x413c, 0x81a2, 8)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ {QMI_FIXED_INTF(0x413c, 0x81a3, 8)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ {QMI_FIXED_INTF(0x413c, 0x81a4, 8)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ {QMI_FIXED_INTF(0x413c, 0x81a8, 8)}, /* Dell Wireless 5808 Gobi(TM) 4G LTE Mobile Broadband Card */ {QMI_FIXED_INTF(0x413c, 0x81a9, 8)}, /* Dell Wireless 5808e Gobi(TM) 4G LTE Mobile Broadband Card */ {QMI_FIXED_INTF(0x413c, 0x81b1, 8)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card */ {QMI_FIXED_INTF(0x413c, 0x81b3, 8)}, /* Dell Wireless 5809e Gobi(TM) 4G LTE Mobile Broadband Card (rev3) */ {QMI_FIXED_INTF(0x413c, 0x81b6, 8)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81b6, 10)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81c2, 8)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81cc, 8)}, /* Dell Wireless 5816e */ {QMI_FIXED_INTF(0x413c, 0x81d7, 0)}, /* Dell Wireless 5821e */ {QMI_FIXED_INTF(0x413c, 0x81d7, 1)}, /* Dell Wireless 5821e preproduction config */ {QMI_FIXED_INTF(0x413c, 0x81e0, 0)}, /* Dell Wireless 5821e with eSIM support*/ {QMI_FIXED_INTF(0x413c, 0x81e4, 0)}, /* Dell Wireless 5829e with eSIM support*/ {QMI_FIXED_INTF(0x413c, 0x81e6, 0)}, /* Dell Wireless 5829e */ {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ {QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */ {QMI_QUIRK_SET_DTR(0x22de, 0x9051, 2)}, /* Hucom Wireless HM-211S/K */ {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */ {QMI_QUIRK_SET_DTR(0x1e0e, 0x9001, 5)}, /* SIMCom 7100E, 7230E, 7600E ++ */ {QMI_QUIRK_SET_DTR(0x1e0e, 0x9071, 3)}, /* SIMCom 8230C ++ */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0195, 4)}, /* Quectel EG95 */ {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x030e, 4)}, /* Quectel EM05GV2 */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0316, 3)}, /* Quectel RG255C */ {QMI_QUIRK_SET_DTR(0x2cb7, 0x0104, 4)}, /* Fibocom NL678 series */ {QMI_QUIRK_SET_DTR(0x2cb7, 0x0112, 0)}, /* Fibocom FG132 */ {QMI_FIXED_INTF(0x0489, 0xe0b4, 0)}, /* Foxconn T77W968 LTE */ {QMI_FIXED_INTF(0x0489, 0xe0b5, 0)}, /* Foxconn T77W968 LTE with eSIM support*/ {QMI_FIXED_INTF(0x2692, 0x9025, 4)}, /* Cellient MPL200 (rebranded Qualcomm 05c6:9025) */ {QMI_QUIRK_SET_DTR(0x1546, 0x1312, 4)}, /* u-blox LARA-R6 01B */ {QMI_QUIRK_SET_DTR(0x1546, 0x1342, 4)}, /* u-blox LARA-L6 */ {QMI_QUIRK_SET_DTR(0x33f8, 0x0104, 4)}, /* Rolling RW101 RMNET */ {QMI_FIXED_INTF(0x2dee, 0x4d22, 5)}, /* MeiG Smart SRM825L */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ {QMI_GOBI1K_DEVICE(0x03f0, 0x1f1d)}, /* HP un2400 Gobi Modem Device */ {QMI_GOBI1K_DEVICE(0x04da, 0x250d)}, /* Panasonic Gobi Modem device */ {QMI_GOBI1K_DEVICE(0x413c, 0x8172)}, /* Dell Gobi Modem device */ {QMI_GOBI1K_DEVICE(0x1410, 0xa001)}, /* Novatel/Verizon USB-1000 */ {QMI_GOBI1K_DEVICE(0x1410, 0xa002)}, /* Novatel Gobi Modem device */ {QMI_GOBI1K_DEVICE(0x1410, 0xa003)}, /* Novatel Gobi Modem device */ {QMI_GOBI1K_DEVICE(0x1410, 0xa004)}, /* Novatel Gobi Modem device */ {QMI_GOBI1K_DEVICE(0x1410, 0xa005)}, /* Novatel Gobi Modem device */ {QMI_GOBI1K_DEVICE(0x1410, 0xa006)}, /* Novatel Gobi Modem device */ {QMI_GOBI1K_DEVICE(0x1410, 0xa007)}, /* Novatel Gobi Modem device */ {QMI_GOBI1K_DEVICE(0x0b05, 0x1776)}, /* Asus Gobi Modem device */ {QMI_GOBI1K_DEVICE(0x19d2, 0xfff3)}, /* ONDA Gobi Modem device */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9001)}, /* Generic Gobi Modem device */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9002)}, /* Generic Gobi Modem device */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9202)}, /* Generic Gobi Modem device */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9203)}, /* Generic Gobi Modem device */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9222)}, /* Generic Gobi Modem device */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9009)}, /* Generic Gobi Modem device */ /* 5. Gobi 2000 and 3000 devices */ {QMI_GOBI_DEVICE(0x413c, 0x8186)}, /* Dell Gobi 2000 Modem device (N0218, VU936) */ {QMI_GOBI_DEVICE(0x413c, 0x8194)}, /* Dell Gobi 3000 Composite */ {QMI_GOBI_DEVICE(0x05c6, 0x920b)}, /* Generic Gobi 2000 Modem device */ {QMI_GOBI_DEVICE(0x05c6, 0x9225)}, /* Sony Gobi 2000 Modem device (N0279, VU730) */ {QMI_GOBI_DEVICE(0x05c6, 0x9245)}, /* Samsung Gobi 2000 Modem device (VL176) */ {QMI_GOBI_DEVICE(0x03f0, 0x251d)}, /* HP Gobi 2000 Modem device (VP412) */ {QMI_GOBI_DEVICE(0x05c6, 0x9215)}, /* Acer Gobi 2000 Modem device (VP413) */ {QMI_FIXED_INTF(0x05c6, 0x9215, 4)}, /* Quectel EC20 Mini PCIe */ {QMI_GOBI_DEVICE(0x05c6, 0x9265)}, /* Asus Gobi 2000 Modem device (VR305) */ {QMI_GOBI_DEVICE(0x05c6, 0x9235)}, /* Top Global Gobi 2000 Modem device (VR306) */ {QMI_GOBI_DEVICE(0x05c6, 0x9275)}, /* iRex Technologies Gobi 2000 Modem device (VR307) */ {QMI_GOBI_DEVICE(0x0af0, 0x8120)}, /* Option GTM681W */ {QMI_GOBI_DEVICE(0x1199, 0x68a5)}, /* Sierra Wireless Modem */ {QMI_GOBI_DEVICE(0x1199, 0x68a9)}, /* Sierra Wireless Modem */ {QMI_GOBI_DEVICE(0x1199, 0x9001)}, /* Sierra Wireless Gobi 2000 Modem device (VT773) */ {QMI_GOBI_DEVICE(0x1199, 0x9002)}, /* Sierra Wireless Gobi 2000 Modem device (VT773) */ {QMI_GOBI_DEVICE(0x1199, 0x9003)}, /* Sierra Wireless Gobi 2000 Modem device (VT773) */ {QMI_GOBI_DEVICE(0x1199, 0x9004)}, /* Sierra Wireless Gobi 2000 Modem device (VT773) */ {QMI_GOBI_DEVICE(0x1199, 0x9005)}, /* Sierra Wireless Gobi 2000 Modem device (VT773) */ {QMI_GOBI_DEVICE(0x1199, 0x9006)}, /* Sierra Wireless Gobi 2000 Modem device (VT773) */ {QMI_GOBI_DEVICE(0x1199, 0x9007)}, /* Sierra Wireless Gobi 2000 Modem device (VT773) */ {QMI_GOBI_DEVICE(0x1199, 0x9008)}, /* Sierra Wireless Gobi 2000 Modem device (VT773) */ {QMI_GOBI_DEVICE(0x1199, 0x9009)}, /* Sierra Wireless Gobi 2000 Modem device (VT773) */ {QMI_GOBI_DEVICE(0x1199, 0x900a)}, /* Sierra Wireless Gobi 2000 Modem device (VT773) */ {QMI_GOBI_DEVICE(0x1199, 0x9011)}, /* Sierra Wireless Gobi 2000 Modem device (MC8305) */ {QMI_GOBI_DEVICE(0x16d8, 0x8002)}, /* CMDTech Gobi 2000 Modem device (VU922) */ {QMI_GOBI_DEVICE(0x05c6, 0x9205)}, /* Gobi 2000 Modem device */ {QMI_GOBI_DEVICE(0x1199, 0x9013)}, /* Sierra Wireless Gobi 3000 Modem device (MC8355) */ {QMI_GOBI_DEVICE(0x03f0, 0x371d)}, /* HP un2430 Mobile Broadband Module */ {QMI_GOBI_DEVICE(0x1199, 0x9015)}, /* Sierra Wireless Gobi 3000 Modem device */ {QMI_GOBI_DEVICE(0x1199, 0x9019)}, /* Sierra Wireless Gobi 3000 Modem device */ {QMI_GOBI_DEVICE(0x1199, 0x901b)}, /* Sierra Wireless MC7770 */ {QMI_GOBI_DEVICE(0x12d1, 0x14f1)}, /* Sony Gobi 3000 Composite */ {QMI_GOBI_DEVICE(0x1410, 0xa021)}, /* Foxconn Gobi 3000 Modem device (Novatel E396) */ { } /* END */ }; MODULE_DEVICE_TABLE(usb, products); static bool quectel_ec20_detected(struct usb_interface *intf) { struct usb_device *dev = interface_to_usbdev(intf); if (dev->actconfig && le16_to_cpu(dev->descriptor.idVendor) == 0x05c6 && le16_to_cpu(dev->descriptor.idProduct) == 0x9215 && dev->actconfig->desc.bNumInterfaces == 5) return true; return false; } static int qmi_wwan_probe(struct usb_interface *intf, const struct usb_device_id *prod) { struct usb_device_id *id = (struct usb_device_id *)prod; struct usb_interface_descriptor *desc = &intf->cur_altsetting->desc; /* Workaround to enable dynamic IDs. This disables usbnet * blacklisting functionality. Which, if required, can be * reimplemented here by using a magic "blacklist" value * instead of 0 in the static device id table */ if (!id->driver_info) { dev_dbg(&intf->dev, "setting defaults for dynamic device id\n"); id->driver_info = (unsigned long)&qmi_wwan_info; } /* There are devices where the same interface number can be * configured as different functions. We should only bind to * vendor specific functions when matching on interface number */ if (id->match_flags & USB_DEVICE_ID_MATCH_INT_NUMBER && desc->bInterfaceClass != USB_CLASS_VENDOR_SPEC) { dev_dbg(&intf->dev, "Rejecting interface number match for class %02x\n", desc->bInterfaceClass); return -ENODEV; } /* Quectel EC20 quirk where we've QMI on interface 4 instead of 0 */ if (quectel_ec20_detected(intf) && desc->bInterfaceNumber == 0) { dev_dbg(&intf->dev, "Quectel EC20 quirk, skipping interface 0\n"); return -ENODEV; } /* Several Quectel modems supports dynamic interface configuration, so * we need to match on class/subclass/protocol. These values are * identical for the diagnostic- and QMI-interface, but bNumEndpoints is * different. Ignore the current interface if the number of endpoints * equals the number for the diag interface (two). */ if (desc->bNumEndpoints == 2) return -ENODEV; return usbnet_probe(intf, id); } static void qmi_wwan_disconnect(struct usb_interface *intf) { struct usbnet *dev = usb_get_intfdata(intf); struct qmi_wwan_state *info; struct list_head *iter; struct net_device *ldev; LIST_HEAD(list); /* called twice if separate control and data intf */ if (!dev) return; info = (void *)&dev->data; if (info->flags & QMI_WWAN_FLAG_MUX) { if (!rtnl_trylock()) { restart_syscall(); return; } rcu_read_lock(); netdev_for_each_upper_dev_rcu(dev->net, ldev, iter) qmimux_unregister_device(ldev, &list); rcu_read_unlock(); unregister_netdevice_many(&list); rtnl_unlock(); info->flags &= ~QMI_WWAN_FLAG_MUX; } usbnet_disconnect(intf); } static struct usb_driver qmi_wwan_driver = { .name = "qmi_wwan", .id_table = products, .probe = qmi_wwan_probe, .disconnect = qmi_wwan_disconnect, .suspend = qmi_wwan_suspend, .resume = qmi_wwan_resume, .reset_resume = qmi_wwan_resume, .supports_autosuspend = 1, .disable_hub_initiated_lpm = 1, }; module_usb_driver(qmi_wwan_driver); MODULE_AUTHOR("Bjørn Mork <bjorn@mork.no>"); MODULE_DESCRIPTION("Qualcomm MSM Interface (QMI) WWAN driver"); MODULE_LICENSE("GPL");
1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 // SPDX-License-Identifier: GPL-2.0 /* ATM ioctl handling */ /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ /* 2003 John Levon <levon@movementarian.org> */ #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ #include <linux/module.h> #include <linux/kmod.h> #include <linux/net.h> /* struct socket, struct proto_ops */ #include <linux/atm.h> /* ATM stuff */ #include <linux/atmdev.h> #include <linux/atmclip.h> /* CLIP_*ENCAP */ #include <linux/atmarp.h> /* manifest constants */ #include <linux/capability.h> #include <linux/sonet.h> /* for ioctls */ #include <linux/atmsvc.h> #include <linux/atmmpc.h> #include <net/atmclip.h> #include <linux/atmlec.h> #include <linux/mutex.h> #include <asm/ioctls.h> #include <net/compat.h> #include "resources.h" #include "signaling.h" /* for WAITING and sigd_attach */ #include "common.h" static DEFINE_MUTEX(ioctl_mutex); static LIST_HEAD(ioctl_list); void register_atm_ioctl(struct atm_ioctl *ioctl) { mutex_lock(&ioctl_mutex); list_add_tail(&ioctl->list, &ioctl_list); mutex_unlock(&ioctl_mutex); } EXPORT_SYMBOL(register_atm_ioctl); void deregister_atm_ioctl(struct atm_ioctl *ioctl) { mutex_lock(&ioctl_mutex); list_del(&ioctl->list); mutex_unlock(&ioctl_mutex); } EXPORT_SYMBOL(deregister_atm_ioctl); static int do_vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg, int compat) { struct sock *sk = sock->sk; struct atm_vcc *vcc; int error; struct list_head *pos; void __user *argp = (void __user *)arg; void __user *buf; int __user *len; vcc = ATM_SD(sock); switch (cmd) { case SIOCOUTQ: if (sock->state != SS_CONNECTED || !test_bit(ATM_VF_READY, &vcc->flags)) { error = -EINVAL; goto done; } error = put_user(sk->sk_sndbuf - sk_wmem_alloc_get(sk), (int __user *)argp); goto done; case SIOCINQ: { struct sk_buff *skb; int amount; if (sock->state != SS_CONNECTED) { error = -EINVAL; goto done; } spin_lock_irq(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); amount = skb ? skb->len : 0; spin_unlock_irq(&sk->sk_receive_queue.lock); error = put_user(amount, (int __user *)argp); goto done; } case ATM_SETSC: net_warn_ratelimited("ATM_SETSC is obsolete; used by %s:%d\n", current->comm, task_pid_nr(current)); error = 0; goto done; case ATMSIGD_CTRL: if (!capable(CAP_NET_ADMIN)) { error = -EPERM; goto done; } /* * The user/kernel protocol for exchanging signalling * info uses kernel pointers as opaque references, * so the holder of the file descriptor can scribble * on the kernel... so we should make sure that we * have the same privileges that /proc/kcore needs */ if (!capable(CAP_SYS_RAWIO)) { error = -EPERM; goto done; } #ifdef CONFIG_COMPAT /* WTF? I don't even want to _think_ about making this work for 32-bit userspace. TBH I don't really want to think about it at all. dwmw2. */ if (compat) { net_warn_ratelimited("32-bit task cannot be atmsigd\n"); error = -EINVAL; goto done; } #endif error = sigd_attach(vcc); if (!error) sock->state = SS_CONNECTED; goto done; case ATM_SETBACKEND: case ATM_NEWBACKENDIF: { atm_backend_t backend; error = get_user(backend, (atm_backend_t __user *)argp); if (error) goto done; switch (backend) { case ATM_BACKEND_PPP: request_module("pppoatm"); break; case ATM_BACKEND_BR2684: request_module("br2684"); break; } break; } case ATMMPC_CTRL: case ATMMPC_DATA: request_module("mpoa"); break; case ATMARPD_CTRL: request_module("clip"); break; case ATMLEC_CTRL: request_module("lec"); break; } error = -ENOIOCTLCMD; mutex_lock(&ioctl_mutex); list_for_each(pos, &ioctl_list) { struct atm_ioctl *ic = list_entry(pos, struct atm_ioctl, list); if (try_module_get(ic->owner)) { error = ic->ioctl(sock, cmd, arg); module_put(ic->owner); if (error != -ENOIOCTLCMD) break; } } mutex_unlock(&ioctl_mutex); if (error != -ENOIOCTLCMD) goto done; if (cmd == ATM_GETNAMES) { if (IS_ENABLED(CONFIG_COMPAT) && compat) { #ifdef CONFIG_COMPAT struct compat_atm_iobuf __user *ciobuf = argp; compat_uptr_t cbuf; len = &ciobuf->length; if (get_user(cbuf, &ciobuf->buffer)) return -EFAULT; buf = compat_ptr(cbuf); #endif } else { struct atm_iobuf __user *iobuf = argp; len = &iobuf->length; if (get_user(buf, &iobuf->buffer)) return -EFAULT; } error = atm_getnames(buf, len); } else { int number; if (IS_ENABLED(CONFIG_COMPAT) && compat) { #ifdef CONFIG_COMPAT struct compat_atmif_sioc __user *csioc = argp; compat_uptr_t carg; len = &csioc->length; if (get_user(carg, &csioc->arg)) return -EFAULT; buf = compat_ptr(carg); if (get_user(number, &csioc->number)) return -EFAULT; #endif } else { struct atmif_sioc __user *sioc = argp; len = &sioc->length; if (get_user(buf, &sioc->arg)) return -EFAULT; if (get_user(number, &sioc->number)) return -EFAULT; } error = atm_dev_ioctl(cmd, buf, len, number, compat); } done: return error; } int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { return do_vcc_ioctl(sock, cmd, arg, 0); } #ifdef CONFIG_COMPAT /* * FIXME: * The compat_ioctl handling is duplicated, using both these conversion * routines and the compat argument to the actual handlers. Both * versions are somewhat incomplete and should be merged, e.g. by * moving the ioctl number translation into the actual handlers and * killing the conversion code. * * -arnd, November 2009 */ #define ATM_GETLINKRATE32 _IOW('a', ATMIOC_ITF+1, struct compat_atmif_sioc) #define ATM_GETNAMES32 _IOW('a', ATMIOC_ITF+3, struct compat_atm_iobuf) #define ATM_GETTYPE32 _IOW('a', ATMIOC_ITF+4, struct compat_atmif_sioc) #define ATM_GETESI32 _IOW('a', ATMIOC_ITF+5, struct compat_atmif_sioc) #define ATM_GETADDR32 _IOW('a', ATMIOC_ITF+6, struct compat_atmif_sioc) #define ATM_RSTADDR32 _IOW('a', ATMIOC_ITF+7, struct compat_atmif_sioc) #define ATM_ADDADDR32 _IOW('a', ATMIOC_ITF+8, struct compat_atmif_sioc) #define ATM_DELADDR32 _IOW('a', ATMIOC_ITF+9, struct compat_atmif_sioc) #define ATM_GETCIRANGE32 _IOW('a', ATMIOC_ITF+10, struct compat_atmif_sioc) #define ATM_SETCIRANGE32 _IOW('a', ATMIOC_ITF+11, struct compat_atmif_sioc) #define ATM_SETESI32 _IOW('a', ATMIOC_ITF+12, struct compat_atmif_sioc) #define ATM_SETESIF32 _IOW('a', ATMIOC_ITF+13, struct compat_atmif_sioc) #define ATM_GETSTAT32 _IOW('a', ATMIOC_SARCOM+0, struct compat_atmif_sioc) #define ATM_GETSTATZ32 _IOW('a', ATMIOC_SARCOM+1, struct compat_atmif_sioc) #define ATM_GETLOOP32 _IOW('a', ATMIOC_SARCOM+2, struct compat_atmif_sioc) #define ATM_SETLOOP32 _IOW('a', ATMIOC_SARCOM+3, struct compat_atmif_sioc) #define ATM_QUERYLOOP32 _IOW('a', ATMIOC_SARCOM+4, struct compat_atmif_sioc) static struct { unsigned int cmd32; unsigned int cmd; } atm_ioctl_map[] = { { ATM_GETLINKRATE32, ATM_GETLINKRATE }, { ATM_GETNAMES32, ATM_GETNAMES }, { ATM_GETTYPE32, ATM_GETTYPE }, { ATM_GETESI32, ATM_GETESI }, { ATM_GETADDR32, ATM_GETADDR }, { ATM_RSTADDR32, ATM_RSTADDR }, { ATM_ADDADDR32, ATM_ADDADDR }, { ATM_DELADDR32, ATM_DELADDR }, { ATM_GETCIRANGE32, ATM_GETCIRANGE }, { ATM_SETCIRANGE32, ATM_SETCIRANGE }, { ATM_SETESI32, ATM_SETESI }, { ATM_SETESIF32, ATM_SETESIF }, { ATM_GETSTAT32, ATM_GETSTAT }, { ATM_GETSTATZ32, ATM_GETSTATZ }, { ATM_GETLOOP32, ATM_GETLOOP }, { ATM_SETLOOP32, ATM_SETLOOP }, { ATM_QUERYLOOP32, ATM_QUERYLOOP }, }; #define NR_ATM_IOCTL ARRAY_SIZE(atm_ioctl_map) static int do_atm_iobuf(struct socket *sock, unsigned int cmd, unsigned long arg) { struct compat_atm_iobuf __user *iobuf32 = compat_ptr(arg); u32 data; if (get_user(data, &iobuf32->buffer)) return -EFAULT; return atm_getnames(&iobuf32->length, compat_ptr(data)); } static int do_atmif_sioc(struct socket *sock, unsigned int cmd, unsigned long arg) { struct compat_atmif_sioc __user *sioc32 = compat_ptr(arg); int number; u32 data; if (get_user(data, &sioc32->arg) || get_user(number, &sioc32->number)) return -EFAULT; return atm_dev_ioctl(cmd, compat_ptr(data), &sioc32->length, number, 0); } static int do_atm_ioctl(struct socket *sock, unsigned int cmd32, unsigned long arg) { int i; unsigned int cmd = 0; switch (cmd32) { case SONET_GETSTAT: case SONET_GETSTATZ: case SONET_GETDIAG: case SONET_SETDIAG: case SONET_CLRDIAG: case SONET_SETFRAMING: case SONET_GETFRAMING: case SONET_GETFRSENSE: return do_atmif_sioc(sock, cmd32, arg); } for (i = 0; i < NR_ATM_IOCTL; i++) { if (cmd32 == atm_ioctl_map[i].cmd32) { cmd = atm_ioctl_map[i].cmd; break; } } if (i == NR_ATM_IOCTL) return -EINVAL; switch (cmd) { case ATM_GETNAMES: return do_atm_iobuf(sock, cmd, arg); case ATM_GETLINKRATE: case ATM_GETTYPE: case ATM_GETESI: case ATM_GETADDR: case ATM_RSTADDR: case ATM_ADDADDR: case ATM_DELADDR: case ATM_GETCIRANGE: case ATM_SETCIRANGE: case ATM_SETESI: case ATM_SETESIF: case ATM_GETSTAT: case ATM_GETSTATZ: case ATM_GETLOOP: case ATM_SETLOOP: case ATM_QUERYLOOP: return do_atmif_sioc(sock, cmd, arg); } return -EINVAL; } int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { int ret; ret = do_vcc_ioctl(sock, cmd, arg, 1); if (ret != -ENOIOCTLCMD) return ret; return do_atm_ioctl(sock, cmd, arg); } #endif
25 25 7 27 1 1 11 1 2 8 5 1 1 6 1 1 1 2 1 1 1 1 1 1 1 3 1 1 1 1 1 1 2 1 1 1 25 2 1 22 11 2 1 3 1 1 1 1 1 8 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 // SPDX-License-Identifier: GPL-2.0-or-later /* * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org> * (C) 2012 by Vyatta Inc. <http://www.vyatta.com> */ #include <linux/init.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/rculist.h> #include <linux/rculist_nulls.h> #include <linux/types.h> #include <linux/timer.h> #include <linux/security.h> #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/netlink.h> #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/slab.h> #include <linux/netfilter.h> #include <net/netlink.h> #include <net/netns/generic.h> #include <net/sock.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_tuple.h> #include <net/netfilter/nf_conntrack_timeout.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_cttimeout.h> static unsigned int nfct_timeout_id __read_mostly; struct ctnl_timeout { struct list_head head; struct list_head free_head; struct rcu_head rcu_head; refcount_t refcnt; char name[CTNL_TIMEOUT_NAME_MAX]; /* must be at the end */ struct nf_ct_timeout timeout; }; struct nfct_timeout_pernet { struct list_head nfct_timeout_list; struct list_head nfct_timeout_freelist; }; MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tuning"); static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = { [CTA_TIMEOUT_NAME] = { .type = NLA_NUL_STRING, .len = CTNL_TIMEOUT_NAME_MAX - 1}, [CTA_TIMEOUT_L3PROTO] = { .type = NLA_U16 }, [CTA_TIMEOUT_L4PROTO] = { .type = NLA_U8 }, [CTA_TIMEOUT_DATA] = { .type = NLA_NESTED }, }; static struct nfct_timeout_pernet *nfct_timeout_pernet(struct net *net) { return net_generic(net, nfct_timeout_id); } static int ctnl_timeout_parse_policy(void *timeout, const struct nf_conntrack_l4proto *l4proto, struct net *net, const struct nlattr *attr) { struct nlattr **tb; int ret = 0; tb = kcalloc(l4proto->ctnl_timeout.nlattr_max + 1, sizeof(*tb), GFP_KERNEL); if (!tb) return -ENOMEM; ret = nla_parse_nested_deprecated(tb, l4proto->ctnl_timeout.nlattr_max, attr, l4proto->ctnl_timeout.nla_policy, NULL); if (ret < 0) goto err; ret = l4proto->ctnl_timeout.nlattr_to_obj(tb, net, timeout); err: kfree(tb); return ret; } static int cttimeout_new_timeout(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]) { struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(info->net); __u16 l3num; __u8 l4num; const struct nf_conntrack_l4proto *l4proto; struct ctnl_timeout *timeout, *matching = NULL; char *name; int ret; if (!cda[CTA_TIMEOUT_NAME] || !cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO] || !cda[CTA_TIMEOUT_DATA]) return -EINVAL; name = nla_data(cda[CTA_TIMEOUT_NAME]); l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); list_for_each_entry(timeout, &pernet->nfct_timeout_list, head) { if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; if (info->nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; matching = timeout; break; } if (matching) { if (info->nlh->nlmsg_flags & NLM_F_REPLACE) { /* You cannot replace one timeout policy by another of * different kind, sorry. */ if (matching->timeout.l3num != l3num || matching->timeout.l4proto->l4proto != l4num) return -EINVAL; return ctnl_timeout_parse_policy(&matching->timeout.data, matching->timeout.l4proto, info->net, cda[CTA_TIMEOUT_DATA]); } return -EBUSY; } l4proto = nf_ct_l4proto_find(l4num); /* This protocol is not supportted, skip. */ if (l4proto->l4proto != l4num) { ret = -EOPNOTSUPP; goto err_proto_put; } timeout = kzalloc(sizeof(struct ctnl_timeout) + l4proto->ctnl_timeout.obj_size, GFP_KERNEL); if (timeout == NULL) { ret = -ENOMEM; goto err_proto_put; } ret = ctnl_timeout_parse_policy(&timeout->timeout.data, l4proto, info->net, cda[CTA_TIMEOUT_DATA]); if (ret < 0) goto err; strcpy(timeout->name, nla_data(cda[CTA_TIMEOUT_NAME])); timeout->timeout.l3num = l3num; timeout->timeout.l4proto = l4proto; refcount_set(&timeout->refcnt, 1); __module_get(THIS_MODULE); list_add_tail_rcu(&timeout->head, &pernet->nfct_timeout_list); return 0; err: kfree(timeout); err_proto_put: return ret; } static int ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct ctnl_timeout *timeout) { struct nlmsghdr *nlh; unsigned int flags = portid ? NLM_F_MULTI : 0; const struct nf_conntrack_l4proto *l4proto = timeout->timeout.l4proto; struct nlattr *nest_parms; int ret; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event); nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, NFNETLINK_V0, 0); if (!nlh) goto nlmsg_failure; if (nla_put_string(skb, CTA_TIMEOUT_NAME, timeout->name) || nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(timeout->timeout.l3num)) || nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto) || nla_put_be32(skb, CTA_TIMEOUT_USE, htonl(refcount_read(&timeout->refcnt)))) goto nla_put_failure; nest_parms = nla_nest_start(skb, CTA_TIMEOUT_DATA); if (!nest_parms) goto nla_put_failure; ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, &timeout->timeout.data); if (ret < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); nlmsg_end(skb, nlh); return skb->len; nlmsg_failure: nla_put_failure: nlmsg_cancel(skb, nlh); return -1; } static int ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct nfct_timeout_pernet *pernet; struct net *net = sock_net(skb->sk); struct ctnl_timeout *cur, *last; if (cb->args[2]) return 0; last = (struct ctnl_timeout *)cb->args[1]; if (cb->args[1]) cb->args[1] = 0; rcu_read_lock(); pernet = nfct_timeout_pernet(net); list_for_each_entry_rcu(cur, &pernet->nfct_timeout_list, head) { if (last) { if (cur != last) continue; last = NULL; } if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), IPCTNL_MSG_TIMEOUT_NEW, cur) < 0) { cb->args[1] = (unsigned long)cur; break; } } if (!cb->args[1]) cb->args[2] = 1; rcu_read_unlock(); return skb->len; } static int cttimeout_get_timeout(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]) { struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(info->net); int ret = -ENOENT; char *name; struct ctnl_timeout *cur; if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = ctnl_timeout_dump, }; return netlink_dump_start(info->sk, skb, info->nlh, &c); } if (!cda[CTA_TIMEOUT_NAME]) return -EINVAL; name = nla_data(cda[CTA_TIMEOUT_NAME]); list_for_each_entry(cur, &pernet->nfct_timeout_list, head) { struct sk_buff *skb2; if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (skb2 == NULL) { ret = -ENOMEM; break; } ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, NFNL_MSG_TYPE(info->nlh->nlmsg_type), IPCTNL_MSG_TIMEOUT_NEW, cur); if (ret <= 0) { kfree_skb(skb2); break; } ret = nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid); break; } return ret; } /* try to delete object, fail if it is still in use. */ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout) { int ret = 0; /* We want to avoid races with ctnl_timeout_put. So only when the * current refcnt is 1, we decrease it to 0. */ if (refcount_dec_if_one(&timeout->refcnt)) { /* We are protected by nfnl mutex. */ list_del_rcu(&timeout->head); nf_ct_untimeout(net, &timeout->timeout); kfree_rcu(timeout, rcu_head); } else { ret = -EBUSY; } return ret; } static int cttimeout_del_timeout(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]) { struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(info->net); struct ctnl_timeout *cur, *tmp; int ret = -ENOENT; char *name; if (!cda[CTA_TIMEOUT_NAME]) { list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_list, head) ctnl_timeout_try_del(info->net, cur); return 0; } name = nla_data(cda[CTA_TIMEOUT_NAME]); list_for_each_entry(cur, &pernet->nfct_timeout_list, head) { if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; ret = ctnl_timeout_try_del(info->net, cur); if (ret < 0) return ret; break; } return ret; } static int cttimeout_default_set(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]) { const struct nf_conntrack_l4proto *l4proto; __u8 l4num; int ret; if (!cda[CTA_TIMEOUT_L4PROTO] || !cda[CTA_TIMEOUT_DATA]) return -EINVAL; l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); l4proto = nf_ct_l4proto_find(l4num); /* This protocol is not supported, skip. */ if (l4proto->l4proto != l4num) { ret = -EOPNOTSUPP; goto err; } ret = ctnl_timeout_parse_policy(NULL, l4proto, info->net, cda[CTA_TIMEOUT_DATA]); if (ret < 0) goto err; return 0; err: return ret; } static int cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, u16 l3num, const struct nf_conntrack_l4proto *l4proto, const unsigned int *timeouts) { struct nlmsghdr *nlh; unsigned int flags = portid ? NLM_F_MULTI : 0; struct nlattr *nest_parms; int ret; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event); nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC, NFNETLINK_V0, 0); if (!nlh) goto nlmsg_failure; if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l3num)) || nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto)) goto nla_put_failure; nest_parms = nla_nest_start(skb, CTA_TIMEOUT_DATA); if (!nest_parms) goto nla_put_failure; ret = l4proto->ctnl_timeout.obj_to_nlattr(skb, timeouts); if (ret < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); nlmsg_end(skb, nlh); return skb->len; nlmsg_failure: nla_put_failure: nlmsg_cancel(skb, nlh); return -1; } static int cttimeout_default_get(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const cda[]) { const struct nf_conntrack_l4proto *l4proto; unsigned int *timeouts = NULL; struct sk_buff *skb2; __u16 l3num; __u8 l4num; int ret; if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO]) return -EINVAL; l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO])); l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]); l4proto = nf_ct_l4proto_find(l4num); if (l4proto->l4proto != l4num) return -EOPNOTSUPP; switch (l4proto->l4proto) { case IPPROTO_ICMP: timeouts = &nf_icmp_pernet(info->net)->timeout; break; case IPPROTO_TCP: timeouts = nf_tcp_pernet(info->net)->timeouts; break; case IPPROTO_UDP: case IPPROTO_UDPLITE: timeouts = nf_udp_pernet(info->net)->timeouts; break; case IPPROTO_ICMPV6: timeouts = &nf_icmpv6_pernet(info->net)->timeout; break; case IPPROTO_SCTP: #ifdef CONFIG_NF_CT_PROTO_SCTP timeouts = nf_sctp_pernet(info->net)->timeouts; #endif break; case IPPROTO_GRE: #ifdef CONFIG_NF_CT_PROTO_GRE timeouts = nf_gre_pernet(info->net)->timeouts; #endif break; case 255: timeouts = &nf_generic_pernet(info->net)->timeout; break; default: WARN_ONCE(1, "Missing timeouts for proto %d", l4proto->l4proto); break; } if (!timeouts) return -EOPNOTSUPP; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb2) return -ENOMEM; ret = cttimeout_default_fill_info(info->net, skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, NFNL_MSG_TYPE(info->nlh->nlmsg_type), IPCTNL_MSG_TIMEOUT_DEFAULT_SET, l3num, l4proto, timeouts); if (ret <= 0) { kfree_skb(skb2); return -ENOMEM; } return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid); } static struct nf_ct_timeout *ctnl_timeout_find_get(struct net *net, const char *name) { struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net); struct ctnl_timeout *timeout, *matching = NULL; list_for_each_entry_rcu(timeout, &pernet->nfct_timeout_list, head) { if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; if (!refcount_inc_not_zero(&timeout->refcnt)) goto err; matching = timeout; break; } err: return matching ? &matching->timeout : NULL; } static void ctnl_timeout_put(struct nf_ct_timeout *t) { struct ctnl_timeout *timeout = container_of(t, struct ctnl_timeout, timeout); if (refcount_dec_and_test(&timeout->refcnt)) { kfree_rcu(timeout, rcu_head); module_put(THIS_MODULE); } } static const struct nfnl_callback cttimeout_cb[IPCTNL_MSG_TIMEOUT_MAX] = { [IPCTNL_MSG_TIMEOUT_NEW] = { .call = cttimeout_new_timeout, .type = NFNL_CB_MUTEX, .attr_count = CTA_TIMEOUT_MAX, .policy = cttimeout_nla_policy }, [IPCTNL_MSG_TIMEOUT_GET] = { .call = cttimeout_get_timeout, .type = NFNL_CB_MUTEX, .attr_count = CTA_TIMEOUT_MAX, .policy = cttimeout_nla_policy }, [IPCTNL_MSG_TIMEOUT_DELETE] = { .call = cttimeout_del_timeout, .type = NFNL_CB_MUTEX, .attr_count = CTA_TIMEOUT_MAX, .policy = cttimeout_nla_policy }, [IPCTNL_MSG_TIMEOUT_DEFAULT_SET] = { .call = cttimeout_default_set, .type = NFNL_CB_MUTEX, .attr_count = CTA_TIMEOUT_MAX, .policy = cttimeout_nla_policy }, [IPCTNL_MSG_TIMEOUT_DEFAULT_GET] = { .call = cttimeout_default_get, .type = NFNL_CB_MUTEX, .attr_count = CTA_TIMEOUT_MAX, .policy = cttimeout_nla_policy }, }; static const struct nfnetlink_subsystem cttimeout_subsys = { .name = "conntrack_timeout", .subsys_id = NFNL_SUBSYS_CTNETLINK_TIMEOUT, .cb_count = IPCTNL_MSG_TIMEOUT_MAX, .cb = cttimeout_cb, }; MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_TIMEOUT); static int __net_init cttimeout_net_init(struct net *net) { struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net); INIT_LIST_HEAD(&pernet->nfct_timeout_list); INIT_LIST_HEAD(&pernet->nfct_timeout_freelist); return 0; } static void __net_exit cttimeout_net_pre_exit(struct net *net) { struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net); struct ctnl_timeout *cur, *tmp; list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_list, head) { list_del_rcu(&cur->head); list_add(&cur->free_head, &pernet->nfct_timeout_freelist); } /* core calls synchronize_rcu() after this */ } static void __net_exit cttimeout_net_exit(struct net *net) { struct nfct_timeout_pernet *pernet = nfct_timeout_pernet(net); struct ctnl_timeout *cur, *tmp; if (list_empty(&pernet->nfct_timeout_freelist)) return; nf_ct_untimeout(net, NULL); list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_freelist, free_head) { list_del(&cur->free_head); if (refcount_dec_and_test(&cur->refcnt)) kfree_rcu(cur, rcu_head); } } static struct pernet_operations cttimeout_ops = { .init = cttimeout_net_init, .pre_exit = cttimeout_net_pre_exit, .exit = cttimeout_net_exit, .id = &nfct_timeout_id, .size = sizeof(struct nfct_timeout_pernet), }; static const struct nf_ct_timeout_hooks hooks = { .timeout_find_get = ctnl_timeout_find_get, .timeout_put = ctnl_timeout_put, }; static int __init cttimeout_init(void) { int ret; ret = register_pernet_subsys(&cttimeout_ops); if (ret < 0) return ret; ret = nfnetlink_subsys_register(&cttimeout_subsys); if (ret < 0) { pr_err("cttimeout_init: cannot register cttimeout with " "nfnetlink.\n"); goto err_out; } RCU_INIT_POINTER(nf_ct_timeout_hook, &hooks); return 0; err_out: unregister_pernet_subsys(&cttimeout_ops); return ret; } static int untimeout(struct nf_conn *ct, void *timeout) { struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct); if (timeout_ext) RCU_INIT_POINTER(timeout_ext->timeout, NULL); return 0; } static void __exit cttimeout_exit(void) { nfnetlink_subsys_unregister(&cttimeout_subsys); unregister_pernet_subsys(&cttimeout_ops); RCU_INIT_POINTER(nf_ct_timeout_hook, NULL); nf_ct_iterate_destroy(untimeout, NULL); } module_init(cttimeout_init); module_exit(cttimeout_exit);
9735 9716 234 233 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 // SPDX-License-Identifier: GPL-2.0-only /* Common code for 32 and 64-bit NUMA */ #include <linux/acpi.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/of.h> #include <linux/string.h> #include <linux/init.h> #include <linux/memblock.h> #include <linux/mmzone.h> #include <linux/ctype.h> #include <linux/nodemask.h> #include <linux/sched.h> #include <linux/topology.h> #include <linux/sort.h> #include <linux/numa_memblks.h> #include <asm/e820/api.h> #include <asm/proto.h> #include <asm/dma.h> #include <asm/numa.h> #include <asm/amd/nb.h> #include "mm_internal.h" int numa_off; static __init int numa_setup(char *opt) { if (!opt) return -EINVAL; if (!strncmp(opt, "off", 3)) numa_off = 1; if (!strncmp(opt, "fake=", 5)) return numa_emu_cmdline(opt + 5); if (!strncmp(opt, "noacpi", 6)) disable_srat(); if (!strncmp(opt, "nohmat", 6)) disable_hmat(); return 0; } early_param("numa", numa_setup); /* * apicid, cpu, node mappings */ s16 __apicid_to_node[MAX_LOCAL_APIC] = { [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE }; int numa_cpu_node(int cpu) { u32 apicid = early_per_cpu(x86_cpu_to_apicid, cpu); if (apicid != BAD_APICID) return __apicid_to_node[apicid]; return NUMA_NO_NODE; } cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; EXPORT_SYMBOL(node_to_cpumask_map); /* * Map cpu index to node index */ DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); void numa_set_node(int cpu, int node) { int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); /* early setting, no percpu area yet */ if (cpu_to_node_map) { cpu_to_node_map[cpu] = node; return; } #ifdef CONFIG_DEBUG_PER_CPU_MAPS if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) { printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); dump_stack(); return; } #endif per_cpu(x86_cpu_to_node_map, cpu) = node; set_cpu_numa_node(cpu, node); } void numa_clear_node(int cpu) { numa_set_node(cpu, NUMA_NO_NODE); } /* * Allocate node_to_cpumask_map based on number of available nodes * Requires node_possible_map to be valid. * * Note: cpumask_of_node() is not valid until after this is done. * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.) */ void __init setup_node_to_cpumask_map(void) { unsigned int node; /* setup nr_node_ids if not done yet */ if (nr_node_ids == MAX_NUMNODES) setup_nr_node_ids(); /* allocate the map */ for (node = 0; node < nr_node_ids; node++) alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); /* cpumask_of_node() will now work */ pr_debug("Node to cpumask map for %u nodes\n", nr_node_ids); } static int __init numa_register_nodes(void) { int nid; if (!memblock_validate_numa_coverage(SZ_1M)) return -EINVAL; /* Finally register nodes. */ for_each_node_mask(nid, node_possible_map) { unsigned long start_pfn, end_pfn; /* * Note, get_pfn_range_for_nid() depends on * memblock_set_node() having already happened */ get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); if (start_pfn >= end_pfn) continue; alloc_node_data(nid); node_set_online(nid); } /* Dump memblock with node info and return. */ memblock_dump_all(); return 0; } /* * There are unfortunately some poorly designed mainboards around that * only connect memory to a single CPU. This breaks the 1:1 cpu->node * mapping. To avoid this fill in the mapping for all possible CPUs, * as the number of CPUs is not known yet. We round robin the existing * nodes. */ static void __init numa_init_array(void) { int rr, i; rr = first_node(node_online_map); for (i = 0; i < nr_cpu_ids; i++) { if (early_cpu_to_node(i) != NUMA_NO_NODE) continue; numa_set_node(i, rr); rr = next_node_in(rr, node_online_map); } } static int __init numa_init(int (*init_func)(void)) { int i; int ret; for (i = 0; i < MAX_LOCAL_APIC; i++) set_apicid_to_node(i, NUMA_NO_NODE); ret = numa_memblks_init(init_func, /* memblock_force_top_down */ true); if (ret < 0) return ret; ret = numa_register_nodes(); if (ret < 0) return ret; for (i = 0; i < nr_cpu_ids; i++) { int nid = early_cpu_to_node(i); if (nid == NUMA_NO_NODE) continue; if (!node_online(nid)) numa_clear_node(i); } numa_init_array(); return 0; } /** * dummy_numa_init - Fallback dummy NUMA init * * Used if there's no underlying NUMA architecture, NUMA initialization * fails, or NUMA is disabled on the command line. * * Must online at least one node and add memory blocks that cover all * allowed memory. This function must not fail. */ static int __init dummy_numa_init(void) { printk(KERN_INFO "%s\n", numa_off ? "NUMA turned off" : "No NUMA configuration found"); printk(KERN_INFO "Faking a node at [mem %#018Lx-%#018Lx]\n", 0LLU, PFN_PHYS(max_pfn) - 1); node_set(0, numa_nodes_parsed); numa_add_memblk(0, 0, PFN_PHYS(max_pfn)); return 0; } /** * x86_numa_init - Initialize NUMA * * Try each configured NUMA initialization method until one succeeds. The * last fallback is dummy single node config encompassing whole memory and * never fails. */ void __init x86_numa_init(void) { if (!numa_off) { #ifdef CONFIG_ACPI_NUMA if (!numa_init(x86_acpi_numa_init)) return; #endif #ifdef CONFIG_AMD_NUMA if (!numa_init(amd_numa_init)) return; #endif if (acpi_disabled && !numa_init(of_numa_init)) return; } numa_init(dummy_numa_init); } /* * A node may exist which has one or more Generic Initiators but no CPUs and no * memory. * * This function must be called after init_cpu_to_node(), to ensure that any * memoryless CPU nodes have already been brought online, and before the * node_data[nid] is needed for zone list setup in build_all_zonelists(). * * When this function is called, any nodes containing either memory and/or CPUs * will already be online and there is no need to do anything extra, even if * they also contain one or more Generic Initiators. */ void __init init_gi_nodes(void) { int nid; /* * Exclude this node from * bringup_nonboot_cpus * cpu_up * __try_online_node * register_node * because node_subsys is not initialized yet. * TODO remove dependency on node_online */ for_each_node_state(nid, N_GENERIC_INITIATOR) if (!node_online(nid)) node_set_online(nid); } /* * Setup early cpu_to_node. * * Populate cpu_to_node[] only if x86_cpu_to_apicid[], * and apicid_to_node[] tables have valid entries for a CPU. * This means we skip cpu_to_node[] initialisation for NUMA * emulation and faking node case (when running a kernel compiled * for NUMA on a non NUMA box), which is OK as cpu_to_node[] * is already initialized in a round robin manner at numa_init_array, * prior to this call, and this initialization is good enough * for the fake NUMA cases. * * Called before the per_cpu areas are setup. */ void __init init_cpu_to_node(void) { int cpu; u32 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); BUG_ON(cpu_to_apicid == NULL); for_each_possible_cpu(cpu) { int node = numa_cpu_node(cpu); if (node == NUMA_NO_NODE) continue; /* * Exclude this node from * bringup_nonboot_cpus * cpu_up * __try_online_node * register_node * because node_subsys is not initialized yet. * TODO remove dependency on node_online */ if (!node_online(node)) node_set_online(node); numa_set_node(cpu, node); } } #ifndef CONFIG_DEBUG_PER_CPU_MAPS # ifndef CONFIG_NUMA_EMU void numa_add_cpu(unsigned int cpu) { cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); } void numa_remove_cpu(unsigned int cpu) { cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); } # endif /* !CONFIG_NUMA_EMU */ #else /* !CONFIG_DEBUG_PER_CPU_MAPS */ int __cpu_to_node(int cpu) { if (early_per_cpu_ptr(x86_cpu_to_node_map)) { printk(KERN_WARNING "cpu_to_node(%d): usage too early!\n", cpu); dump_stack(); return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; } return per_cpu(x86_cpu_to_node_map, cpu); } EXPORT_SYMBOL(__cpu_to_node); /* * Same function as cpu_to_node() but used if called before the * per_cpu areas are setup. */ int early_cpu_to_node(int cpu) { if (early_per_cpu_ptr(x86_cpu_to_node_map)) return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; if (!cpu_possible(cpu)) { printk(KERN_WARNING "early_cpu_to_node(%d): no per_cpu area!\n", cpu); dump_stack(); return NUMA_NO_NODE; } return per_cpu(x86_cpu_to_node_map, cpu); } void debug_cpumask_set_cpu(unsigned int cpu, int node, bool enable) { struct cpumask *mask; if (node == NUMA_NO_NODE) { /* early_cpu_to_node() already emits a warning and trace */ return; } mask = node_to_cpumask_map[node]; if (!cpumask_available(mask)) { pr_err("node_to_cpumask_map[%i] NULL\n", node); dump_stack(); return; } if (enable) cpumask_set_cpu(cpu, mask); else cpumask_clear_cpu(cpu, mask); printk(KERN_DEBUG "%s cpu %d node %d: mask now %*pbl\n", enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, cpumask_pr_args(mask)); return; } # ifndef CONFIG_NUMA_EMU static void numa_set_cpumask(int cpu, bool enable) { debug_cpumask_set_cpu(cpu, early_cpu_to_node(cpu), enable); } void numa_add_cpu(unsigned int cpu) { numa_set_cpumask(cpu, true); } void numa_remove_cpu(unsigned int cpu) { numa_set_cpumask(cpu, false); } # endif /* !CONFIG_NUMA_EMU */ /* * Returns a pointer to the bitmask of CPUs on Node 'node'. */ const struct cpumask *cpumask_of_node(int node) { if ((unsigned)node >= nr_node_ids) { printk(KERN_WARNING "cpumask_of_node(%d): (unsigned)node >= nr_node_ids(%u)\n", node, nr_node_ids); dump_stack(); return cpu_none_mask; } if (!cpumask_available(node_to_cpumask_map[node])) { printk(KERN_WARNING "cpumask_of_node(%d): no node_to_cpumask_map!\n", node); dump_stack(); return cpu_online_mask; } return node_to_cpumask_map[node]; } EXPORT_SYMBOL(cpumask_of_node); #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ #ifdef CONFIG_NUMA_EMU void __init numa_emu_update_cpu_to_node(int *emu_nid_to_phys, unsigned int nr_emu_nids) { int i, j; /* * Transform __apicid_to_node table to use emulated nids by * reverse-mapping phys_nid. The maps should always exist but fall * back to zero just in case. */ for (i = 0; i < ARRAY_SIZE(__apicid_to_node); i++) { if (__apicid_to_node[i] == NUMA_NO_NODE) continue; for (j = 0; j < nr_emu_nids; j++) if (__apicid_to_node[i] == emu_nid_to_phys[j]) break; __apicid_to_node[i] = j < nr_emu_nids ? j : 0; } } u64 __init numa_emu_dma_end(void) { return PFN_PHYS(MAX_DMA32_PFN); } #endif /* CONFIG_NUMA_EMU */
78 3015 1674 1155 1158 1323 23 235 28 28 209 2360 1270 1268 1996 151 1 1 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_NS_COMMON_H #define _LINUX_NS_COMMON_H #include <linux/ns/ns_common_types.h> #include <linux/refcount.h> #include <linux/vfsdebug.h> #include <uapi/linux/sched.h> #include <uapi/linux/nsfs.h> bool is_current_namespace(struct ns_common *ns); int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_operations *ops, int inum); void __ns_common_free(struct ns_common *ns); struct ns_common *__must_check ns_owner(struct ns_common *ns); static __always_inline bool is_ns_init_inum(const struct ns_common *ns) { VFS_WARN_ON_ONCE(ns->inum == 0); return unlikely(in_range(ns->inum, MNT_NS_INIT_INO, IPC_NS_INIT_INO - MNT_NS_INIT_INO + 1)); } static __always_inline bool is_ns_init_id(const struct ns_common *ns) { VFS_WARN_ON_ONCE(ns->ns_id == 0); return ns->ns_id <= NS_LAST_INIT_ID; } #define NS_COMMON_INIT(nsname) \ { \ .ns_type = ns_common_type(&nsname), \ .ns_id = ns_init_id(&nsname), \ .inum = ns_init_inum(&nsname), \ .ops = to_ns_operations(&nsname), \ .stashed = NULL, \ .__ns_ref = REFCOUNT_INIT(1), \ .__ns_ref_active = ATOMIC_INIT(1), \ .ns_unified_node.ns_list_entry = LIST_HEAD_INIT(nsname.ns.ns_unified_node.ns_list_entry), \ .ns_tree_node.ns_list_entry = LIST_HEAD_INIT(nsname.ns.ns_tree_node.ns_list_entry), \ .ns_owner_node.ns_list_entry = LIST_HEAD_INIT(nsname.ns.ns_owner_node.ns_list_entry), \ .ns_owner_root.ns_list_head = LIST_HEAD_INIT(nsname.ns.ns_owner_root.ns_list_head), \ } #define ns_common_init(__ns) \ __ns_common_init(to_ns_common(__ns), \ ns_common_type(__ns), \ to_ns_operations(__ns), \ (((__ns) == ns_init_ns(__ns)) ? ns_init_inum(__ns) : 0)) #define ns_common_init_inum(__ns, __inum) \ __ns_common_init(to_ns_common(__ns), \ ns_common_type(__ns), \ to_ns_operations(__ns), \ __inum) #define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns))) static __always_inline __must_check int __ns_ref_active_read(const struct ns_common *ns) { return atomic_read(&ns->__ns_ref_active); } static __always_inline __must_check int __ns_ref_read(const struct ns_common *ns) { return refcount_read(&ns->__ns_ref); } static __always_inline __must_check bool __ns_ref_put(struct ns_common *ns) { if (is_ns_init_id(ns)) { VFS_WARN_ON_ONCE(__ns_ref_read(ns) != 1); VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) != 1); return false; } if (refcount_dec_and_test(&ns->__ns_ref)) { VFS_WARN_ON_ONCE(__ns_ref_active_read(ns)); return true; } return false; } static __always_inline __must_check bool __ns_ref_get(struct ns_common *ns) { if (is_ns_init_id(ns)) { VFS_WARN_ON_ONCE(__ns_ref_read(ns) != 1); VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) != 1); return true; } if (refcount_inc_not_zero(&ns->__ns_ref)) return true; VFS_WARN_ON_ONCE(__ns_ref_active_read(ns)); return false; } static __always_inline void __ns_ref_inc(struct ns_common *ns) { if (is_ns_init_id(ns)) { VFS_WARN_ON_ONCE(__ns_ref_read(ns) != 1); VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) != 1); return; } refcount_inc(&ns->__ns_ref); } static __always_inline __must_check bool __ns_ref_dec_and_lock(struct ns_common *ns, spinlock_t *ns_lock) { if (is_ns_init_id(ns)) { VFS_WARN_ON_ONCE(__ns_ref_read(ns) != 1); VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) != 1); return false; } return refcount_dec_and_lock(&ns->__ns_ref, ns_lock); } #define ns_ref_read(__ns) __ns_ref_read(to_ns_common((__ns))) #define ns_ref_inc(__ns) \ do { if (__ns) __ns_ref_inc(to_ns_common((__ns))); } while (0) #define ns_ref_get(__ns) \ ((__ns) ? __ns_ref_get(to_ns_common((__ns))) : false) #define ns_ref_put(__ns) \ ((__ns) ? __ns_ref_put(to_ns_common((__ns))) : false) #define ns_ref_put_and_lock(__ns, __ns_lock) \ ((__ns) ? __ns_ref_dec_and_lock(to_ns_common((__ns)), __ns_lock) : false) #define ns_ref_active_read(__ns) \ ((__ns) ? __ns_ref_active_read(to_ns_common(__ns)) : 0) void __ns_ref_active_put(struct ns_common *ns); #define ns_ref_active_put(__ns) \ do { if (__ns) __ns_ref_active_put(to_ns_common(__ns)); } while (0) static __always_inline struct ns_common *__must_check ns_get_unless_inactive(struct ns_common *ns) { if (!__ns_ref_active_read(ns)) { VFS_WARN_ON_ONCE(is_ns_init_id(ns)); return NULL; } if (!__ns_ref_get(ns)) return NULL; return ns; } void __ns_ref_active_get(struct ns_common *ns); #define ns_ref_active_get(__ns) \ do { if (__ns) __ns_ref_active_get(to_ns_common(__ns)); } while (0) #endif
34 1 1 2 22 12 5 5 28 30 32 32 32 32 15 21 20 7 9 27 31 16 6 30 8 10 10 9 1 28 9 20 29 21 29 2 31 28 5 32 4 28 14 19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 // SPDX-License-Identifier: GPL-2.0-or-later /* mpi-pow.c - MPI functions * Copyright (C) 1994, 1996, 1998, 2000 Free Software Foundation, Inc. * * This file is part of GnuPG. * * Note: This code is heavily based on the GNU MP Library. * Actually it's the same code with only minor changes in the * way the data is stored; this is to support the abstraction * of an optional secure memory allocation which may be used * to avoid revealing of sensitive data due to paging etc. * The GNU MP Library itself is published under the LGPL; * however I decided to publish this code under the plain GPL. */ #include <linux/export.h> #include <linux/sched.h> #include <linux/string.h> #include "mpi-internal.h" #include "longlong.h" /**************** * RES = BASE ^ EXP mod MOD */ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod) { mpi_ptr_t mp_marker = NULL, bp_marker = NULL, ep_marker = NULL; struct karatsuba_ctx karactx = {}; mpi_ptr_t xp_marker = NULL; mpi_ptr_t tspace = NULL; mpi_ptr_t rp, ep, mp, bp; mpi_size_t esize, msize, bsize, rsize; int msign, bsign, rsign; mpi_size_t size; int mod_shift_cnt; int negative_result; int assign_rp = 0; mpi_size_t tsize = 0; /* to avoid compiler warning */ /* fixme: we should check that the warning is void */ int rc = -ENOMEM; esize = exp->nlimbs; msize = mod->nlimbs; size = 2 * msize; msign = mod->sign; rp = res->d; ep = exp->d; if (!msize) return -EINVAL; if (!esize) { /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 * depending on if MOD equals 1. */ res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1; if (res->nlimbs) { if (mpi_resize(res, 1) < 0) goto enomem; rp = res->d; rp[0] = 1; } res->sign = 0; goto leave; } /* Normalize MOD (i.e. make its most significant bit set) as required by * mpn_divrem. This will make the intermediate values in the calculation * slightly larger, but the correct result is obtained after a final * reduction using the original MOD value. */ mp = mp_marker = mpi_alloc_limb_space(msize); if (!mp) goto enomem; mod_shift_cnt = count_leading_zeros(mod->d[msize - 1]); if (mod_shift_cnt) mpihelp_lshift(mp, mod->d, msize, mod_shift_cnt); else MPN_COPY(mp, mod->d, msize); bsize = base->nlimbs; bsign = base->sign; if (bsize > msize) { /* The base is larger than the module. Reduce it. */ /* Allocate (BSIZE + 1) with space for remainder and quotient. * (The quotient is (bsize - msize + 1) limbs.) */ bp = bp_marker = mpi_alloc_limb_space(bsize + 1); if (!bp) goto enomem; MPN_COPY(bp, base->d, bsize); /* We don't care about the quotient, store it above the remainder, * at BP + MSIZE. */ mpihelp_divrem(bp + msize, 0, bp, bsize, mp, msize); bsize = msize; /* Canonicalize the base, since we are going to multiply with it * quite a few times. */ MPN_NORMALIZE(bp, bsize); } else bp = base->d; if (!bsize) { res->nlimbs = 0; res->sign = 0; goto leave; } if (res->alloced < size) { /* We have to allocate more space for RES. If any of the input * parameters are identical to RES, defer deallocation of the old * space. */ if (rp == ep || rp == mp || rp == bp) { rp = mpi_alloc_limb_space(size); if (!rp) goto enomem; assign_rp = 1; } else { if (mpi_resize(res, size) < 0) goto enomem; rp = res->d; } } else { /* Make BASE, EXP and MOD not overlap with RES. */ if (rp == bp) { /* RES and BASE are identical. Allocate temp. space for BASE. */ BUG_ON(bp_marker); bp = bp_marker = mpi_alloc_limb_space(bsize); if (!bp) goto enomem; MPN_COPY(bp, rp, bsize); } if (rp == ep) { /* RES and EXP are identical. Allocate temp. space for EXP. */ ep = ep_marker = mpi_alloc_limb_space(esize); if (!ep) goto enomem; MPN_COPY(ep, rp, esize); } if (rp == mp) { /* RES and MOD are identical. Allocate temporary space for MOD. */ BUG_ON(mp_marker); mp = mp_marker = mpi_alloc_limb_space(msize); if (!mp) goto enomem; MPN_COPY(mp, rp, msize); } } MPN_COPY(rp, bp, bsize); rsize = bsize; rsign = bsign; { mpi_size_t i; mpi_ptr_t xp; int c; mpi_limb_t e; mpi_limb_t carry_limb; xp = xp_marker = mpi_alloc_limb_space(2 * (msize + 1)); if (!xp) goto enomem; negative_result = (ep[0] & 1) && base->sign; i = esize - 1; e = ep[i]; c = count_leading_zeros(e); e = (e << c) << 1; /* shift the exp bits to the left, lose msb */ c = BITS_PER_MPI_LIMB - 1 - c; /* Main loop. * * Make the result be pointed to alternately by XP and RP. This * helps us avoid block copying, which would otherwise be necessary * with the overlap restrictions of mpihelp_divmod. With 50% probability * the result after this loop will be in the area originally pointed * by RP (==RES->d), and with 50% probability in the area originally * pointed to by XP. */ for (;;) { while (c) { mpi_size_t xsize; /*if (mpihelp_mul_n(xp, rp, rp, rsize) < 0) goto enomem */ if (rsize < KARATSUBA_THRESHOLD) mpih_sqr_n_basecase(xp, rp, rsize); else { if (!tspace) { tsize = 2 * rsize; tspace = mpi_alloc_limb_space(tsize); if (!tspace) goto enomem; } else if (tsize < (2 * rsize)) { mpi_free_limb_space(tspace); tsize = 2 * rsize; tspace = mpi_alloc_limb_space(tsize); if (!tspace) goto enomem; } mpih_sqr_n(xp, rp, rsize, tspace); } xsize = 2 * rsize; if (xsize > msize) { mpihelp_divrem(xp + msize, 0, xp, xsize, mp, msize); xsize = msize; } swap(rp, xp); rsize = xsize; if ((mpi_limb_signed_t) e < 0) { /*mpihelp_mul( xp, rp, rsize, bp, bsize ); */ if (bsize < KARATSUBA_THRESHOLD) { mpi_limb_t tmp; if (mpihelp_mul (xp, rp, rsize, bp, bsize, &tmp) < 0) goto enomem; } else { if (mpihelp_mul_karatsuba_case (xp, rp, rsize, bp, bsize, &karactx) < 0) goto enomem; } xsize = rsize + bsize; if (xsize > msize) { mpihelp_divrem(xp + msize, 0, xp, xsize, mp, msize); xsize = msize; } swap(rp, xp); rsize = xsize; } e <<= 1; c--; cond_resched(); } i--; if (i < 0) break; e = ep[i]; c = BITS_PER_MPI_LIMB; } /* We shifted MOD, the modulo reduction argument, left MOD_SHIFT_CNT * steps. Adjust the result by reducing it with the original MOD. * * Also make sure the result is put in RES->d (where it already * might be, see above). */ if (mod_shift_cnt) { carry_limb = mpihelp_lshift(res->d, rp, rsize, mod_shift_cnt); rp = res->d; if (carry_limb) { rp[rsize] = carry_limb; rsize++; } } else { MPN_COPY(res->d, rp, rsize); rp = res->d; } if (rsize >= msize) { mpihelp_divrem(rp + msize, 0, rp, rsize, mp, msize); rsize = msize; } /* Remove any leading zero words from the result. */ if (mod_shift_cnt) mpihelp_rshift(rp, rp, rsize, mod_shift_cnt); MPN_NORMALIZE(rp, rsize); } if (negative_result && rsize) { if (mod_shift_cnt) mpihelp_rshift(mp, mp, msize, mod_shift_cnt); mpihelp_sub(rp, mp, msize, rp, rsize); rsize = msize; rsign = msign; MPN_NORMALIZE(rp, rsize); } res->nlimbs = rsize; res->sign = rsign; leave: rc = 0; enomem: mpihelp_release_karatsuba_ctx(&karactx); if (assign_rp) mpi_assign_limb_space(res, rp, size); if (mp_marker) mpi_free_limb_space(mp_marker); if (bp_marker) mpi_free_limb_space(bp_marker); if (ep_marker) mpi_free_limb_space(ep_marker); if (xp_marker) mpi_free_limb_space(xp_marker); if (tspace) mpi_free_limb_space(tspace); return rc; } EXPORT_SYMBOL_GPL(mpi_powm);
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 // SPDX-License-Identifier: GPL-2.0-only #include <linux/phy.h> #include <linux/ethtool_netlink.h> #include "netlink.h" #include "common.h" struct plca_req_info { struct ethnl_req_info base; }; struct plca_reply_data { struct ethnl_reply_data base; struct phy_plca_cfg plca_cfg; struct phy_plca_status plca_st; }; // Helpers ------------------------------------------------------------------ // #define PLCA_REPDATA(__reply_base) \ container_of(__reply_base, struct plca_reply_data, base) // PLCA get configuration message ------------------------------------------- // const struct nla_policy ethnl_plca_get_cfg_policy[] = { [ETHTOOL_A_PLCA_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_phy), }; static void plca_update_sint(int *dst, struct nlattr **tb, u32 attrid, bool *mod) { const struct nlattr *attr = tb[attrid]; if (!attr || WARN_ON_ONCE(attrid >= ARRAY_SIZE(ethnl_plca_set_cfg_policy))) return; switch (ethnl_plca_set_cfg_policy[attrid].type) { case NLA_U8: *dst = nla_get_u8(attr); break; case NLA_U32: *dst = nla_get_u32(attr); break; default: WARN_ON_ONCE(1); } *mod = true; } static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct plca_reply_data *data = PLCA_REPDATA(reply_base); struct net_device *dev = reply_base->dev; const struct ethtool_phy_ops *ops; struct nlattr **tb = info->attrs; struct phy_device *phydev; int ret; phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PLCA_HEADER, info->extack); // check that the PHY device is available and connected if (IS_ERR_OR_NULL(phydev)) { ret = -EOPNOTSUPP; goto out; } // note: rtnl_lock is held already by ethnl_default_doit ops = ethtool_phy_ops; if (!ops || !ops->get_plca_cfg) { ret = -EOPNOTSUPP; goto out; } ret = ethnl_ops_begin(dev); if (ret < 0) goto out; memset(&data->plca_cfg, 0xff, sizeof_field(struct plca_reply_data, plca_cfg)); ret = ops->get_plca_cfg(phydev, &data->plca_cfg); ethnl_ops_complete(dev); out: return ret; } static int plca_get_cfg_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { return nla_total_size(sizeof(u16)) + /* _VERSION */ nla_total_size(sizeof(u8)) + /* _ENABLED */ nla_total_size(sizeof(u32)) + /* _NODE_CNT */ nla_total_size(sizeof(u32)) + /* _NODE_ID */ nla_total_size(sizeof(u32)) + /* _TO_TIMER */ nla_total_size(sizeof(u32)) + /* _BURST_COUNT */ nla_total_size(sizeof(u32)); /* _BURST_TIMER */ } static int plca_get_cfg_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct plca_reply_data *data = PLCA_REPDATA(reply_base); const struct phy_plca_cfg *plca = &data->plca_cfg; if ((plca->version >= 0 && nla_put_u16(skb, ETHTOOL_A_PLCA_VERSION, plca->version)) || (plca->enabled >= 0 && nla_put_u8(skb, ETHTOOL_A_PLCA_ENABLED, !!plca->enabled)) || (plca->node_id >= 0 && nla_put_u32(skb, ETHTOOL_A_PLCA_NODE_ID, plca->node_id)) || (plca->node_cnt >= 0 && nla_put_u32(skb, ETHTOOL_A_PLCA_NODE_CNT, plca->node_cnt)) || (plca->to_tmr >= 0 && nla_put_u32(skb, ETHTOOL_A_PLCA_TO_TMR, plca->to_tmr)) || (plca->burst_cnt >= 0 && nla_put_u32(skb, ETHTOOL_A_PLCA_BURST_CNT, plca->burst_cnt)) || (plca->burst_tmr >= 0 && nla_put_u32(skb, ETHTOOL_A_PLCA_BURST_TMR, plca->burst_tmr))) return -EMSGSIZE; return 0; }; // PLCA set configuration message ------------------------------------------- // const struct nla_policy ethnl_plca_set_cfg_policy[] = { [ETHTOOL_A_PLCA_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_phy), [ETHTOOL_A_PLCA_ENABLED] = NLA_POLICY_MAX(NLA_U8, 1), [ETHTOOL_A_PLCA_NODE_ID] = NLA_POLICY_MAX(NLA_U32, 255), [ETHTOOL_A_PLCA_NODE_CNT] = NLA_POLICY_RANGE(NLA_U32, 1, 255), [ETHTOOL_A_PLCA_TO_TMR] = NLA_POLICY_MAX(NLA_U32, 255), [ETHTOOL_A_PLCA_BURST_CNT] = NLA_POLICY_MAX(NLA_U32, 255), [ETHTOOL_A_PLCA_BURST_TMR] = NLA_POLICY_MAX(NLA_U32, 255), }; static int ethnl_set_plca(struct ethnl_req_info *req_info, struct genl_info *info) { const struct ethtool_phy_ops *ops; struct nlattr **tb = info->attrs; struct phy_plca_cfg plca_cfg; struct phy_device *phydev; bool mod = false; int ret; phydev = ethnl_req_get_phydev(req_info, tb, ETHTOOL_A_PLCA_HEADER, info->extack); // check that the PHY device is available and connected if (IS_ERR_OR_NULL(phydev)) return -EOPNOTSUPP; ops = ethtool_phy_ops; if (!ops || !ops->set_plca_cfg) return -EOPNOTSUPP; memset(&plca_cfg, 0xff, sizeof(plca_cfg)); plca_update_sint(&plca_cfg.enabled, tb, ETHTOOL_A_PLCA_ENABLED, &mod); plca_update_sint(&plca_cfg.node_id, tb, ETHTOOL_A_PLCA_NODE_ID, &mod); plca_update_sint(&plca_cfg.node_cnt, tb, ETHTOOL_A_PLCA_NODE_CNT, &mod); plca_update_sint(&plca_cfg.to_tmr, tb, ETHTOOL_A_PLCA_TO_TMR, &mod); plca_update_sint(&plca_cfg.burst_cnt, tb, ETHTOOL_A_PLCA_BURST_CNT, &mod); plca_update_sint(&plca_cfg.burst_tmr, tb, ETHTOOL_A_PLCA_BURST_TMR, &mod); if (!mod) return 0; ret = ops->set_plca_cfg(phydev, &plca_cfg, info->extack); return ret < 0 ? ret : 1; } const struct ethnl_request_ops ethnl_plca_cfg_request_ops = { .request_cmd = ETHTOOL_MSG_PLCA_GET_CFG, .reply_cmd = ETHTOOL_MSG_PLCA_GET_CFG_REPLY, .hdr_attr = ETHTOOL_A_PLCA_HEADER, .req_info_size = sizeof(struct plca_req_info), .reply_data_size = sizeof(struct plca_reply_data), .prepare_data = plca_get_cfg_prepare_data, .reply_size = plca_get_cfg_reply_size, .fill_reply = plca_get_cfg_fill_reply, .set = ethnl_set_plca, .set_ntf_cmd = ETHTOOL_MSG_PLCA_NTF, }; // PLCA get status message -------------------------------------------------- // const struct nla_policy ethnl_plca_get_status_policy[] = { [ETHTOOL_A_PLCA_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_phy), }; static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct plca_reply_data *data = PLCA_REPDATA(reply_base); struct net_device *dev = reply_base->dev; const struct ethtool_phy_ops *ops; struct nlattr **tb = info->attrs; struct phy_device *phydev; int ret; phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_PLCA_HEADER, info->extack); // check that the PHY device is available and connected if (IS_ERR_OR_NULL(phydev)) { ret = -EOPNOTSUPP; goto out; } // note: rtnl_lock is held already by ethnl_default_doit ops = ethtool_phy_ops; if (!ops || !ops->get_plca_status) { ret = -EOPNOTSUPP; goto out; } ret = ethnl_ops_begin(dev); if (ret < 0) goto out; memset(&data->plca_st, 0xff, sizeof_field(struct plca_reply_data, plca_st)); ret = ops->get_plca_status(phydev, &data->plca_st); ethnl_ops_complete(dev); out: return ret; } static int plca_get_status_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { return nla_total_size(sizeof(u8)); /* _STATUS */ } static int plca_get_status_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct plca_reply_data *data = PLCA_REPDATA(reply_base); const u8 status = data->plca_st.pst; if (nla_put_u8(skb, ETHTOOL_A_PLCA_STATUS, !!status)) return -EMSGSIZE; return 0; }; const struct ethnl_request_ops ethnl_plca_status_request_ops = { .request_cmd = ETHTOOL_MSG_PLCA_GET_STATUS, .reply_cmd = ETHTOOL_MSG_PLCA_GET_STATUS_REPLY, .hdr_attr = ETHTOOL_A_PLCA_HEADER, .req_info_size = sizeof(struct plca_req_info), .reply_data_size = sizeof(struct plca_reply_data), .prepare_data = plca_get_status_prepare_data, .reply_size = plca_get_status_reply_size, .fill_reply = plca_get_status_fill_reply, };
6 1 6 6 6 4 1 6 6 6 6 6 6 6 6 6 6 6 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 // SPDX-License-Identifier: GPL-2.0-or-later /* * SPCA508 chip based cameras subdriver * * Copyright (C) 2009 Jean-Francois Moine <http://moinejf.free.fr> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "spca508" #include "gspca.h" MODULE_AUTHOR("Michel Xhaard <mxhaard@users.sourceforge.net>"); MODULE_DESCRIPTION("GSPCA/SPCA508 USB Camera Driver"); MODULE_LICENSE("GPL"); /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ u8 subtype; #define CreativeVista 0 #define HamaUSBSightcam 1 #define HamaUSBSightcam2 2 #define IntelEasyPCCamera 3 #define MicroInnovationIC200 4 #define ViewQuestVQ110 5 }; static const struct v4l2_pix_format sif_mode[] = { {160, 120, V4L2_PIX_FMT_SPCA508, V4L2_FIELD_NONE, .bytesperline = 160, .sizeimage = 160 * 120 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 3}, {176, 144, V4L2_PIX_FMT_SPCA508, V4L2_FIELD_NONE, .bytesperline = 176, .sizeimage = 176 * 144 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 2}, {320, 240, V4L2_PIX_FMT_SPCA508, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 1}, {352, 288, V4L2_PIX_FMT_SPCA508, V4L2_FIELD_NONE, .bytesperline = 352, .sizeimage = 352 * 288 * 3 / 2, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, }; /* Frame packet header offsets for the spca508 */ #define SPCA508_OFFSET_DATA 37 /* * Initialization data: this is the first set-up data written to the * device (before the open data). */ static const u16 spca508_init_data[][2] = { {0x0000, 0x870b}, {0x0020, 0x8112}, /* Video drop enable, ISO streaming disable */ {0x0003, 0x8111}, /* Reset compression & memory */ {0x0000, 0x8110}, /* Disable all outputs */ /* READ {0x0000, 0x8114} -> 0000: 00 */ {0x0000, 0x8114}, /* SW GPIO data */ {0x0008, 0x8110}, /* Enable charge pump output */ {0x0002, 0x8116}, /* 200 kHz pump clock */ /* UNKNOWN DIRECTION (URB_FUNCTION_SELECT_INTERFACE:) */ {0x0003, 0x8111}, /* Reset compression & memory */ {0x0000, 0x8111}, /* Normal mode (not reset) */ {0x0098, 0x8110}, /* Enable charge pump output, sync.serial,external 2x clock */ {0x000d, 0x8114}, /* SW GPIO data */ {0x0002, 0x8116}, /* 200 kHz pump clock */ {0x0020, 0x8112}, /* Video drop enable, ISO streaming disable */ /* --------------------------------------- */ {0x000f, 0x8402}, /* memory bank */ {0x0000, 0x8403}, /* ... address */ /* --------------------------------------- */ /* 0x88__ is Synchronous Serial Interface. */ /* TBD: This table could be expressed more compactly */ /* using spca508_write_i2c_vector(). */ /* TBD: Should see if the values in spca50x_i2c_data */ /* would work with the VQ110 instead of the values */ /* below. */ {0x00c0, 0x8804}, /* SSI slave addr */ {0x0008, 0x8802}, /* 375 Khz SSI clock */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, /* 375 Khz SSI clock */ {0x0012, 0x8801}, /* SSI reg addr */ {0x0080, 0x8800}, /* SSI data to write */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, /* 375 Khz SSI clock */ {0x0012, 0x8801}, /* SSI reg addr */ {0x0000, 0x8800}, /* SSI data to write */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, /* 375 Khz SSI clock */ {0x0011, 0x8801}, /* SSI reg addr */ {0x0040, 0x8800}, /* SSI data to write */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0013, 0x8801}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0014, 0x8801}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0015, 0x8801}, {0x0001, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0016, 0x8801}, {0x0003, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0017, 0x8801}, {0x0036, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0018, 0x8801}, {0x00ec, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x001a, 0x8801}, {0x0094, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x001b, 0x8801}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0027, 0x8801}, {0x00a2, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0028, 0x8801}, {0x0040, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x002a, 0x8801}, {0x0084, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x002b, 0x8801}, {0x00a8, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x002c, 0x8801}, {0x00fe, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x002d, 0x8801}, {0x0003, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0038, 0x8801}, {0x0083, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0033, 0x8801}, {0x0081, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0034, 0x8801}, {0x004a, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0039, 0x8801}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0010, 0x8801}, {0x00a8, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0006, 0x8801}, {0x0058, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0000, 0x8801}, {0x0004, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0040, 0x8801}, {0x0080, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0041, 0x8801}, {0x000c, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0042, 0x8801}, {0x000c, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0043, 0x8801}, {0x0028, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0044, 0x8801}, {0x0080, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0045, 0x8801}, {0x0020, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0046, 0x8801}, {0x0020, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0047, 0x8801}, {0x0080, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0048, 0x8801}, {0x004c, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x0049, 0x8801}, {0x0084, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x004a, 0x8801}, {0x0084, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x0008, 0x8802}, {0x004b, 0x8801}, {0x0084, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* --------------------------------------- */ {0x0012, 0x8700}, /* Clock speed 48Mhz/(2+2)/2= 6 Mhz */ {0x0000, 0x8701}, /* CKx1 clock delay adj */ {0x0000, 0x8701}, /* CKx1 clock delay adj */ {0x0001, 0x870c}, /* CKOx2 output */ /* --------------------------------------- */ {0x0080, 0x8600}, /* Line memory read counter (L) */ {0x0001, 0x8606}, /* reserved */ {0x0064, 0x8607}, /* Line memory read counter (H) 0x6480=25,728 */ {0x002a, 0x8601}, /* CDSP sharp interpolation mode, * line sel for color sep, edge enhance enab */ {0x0000, 0x8602}, /* optical black level for user settng = 0 */ {0x0080, 0x8600}, /* Line memory read counter (L) */ {0x000a, 0x8603}, /* optical black level calc mode: * auto; optical black offset = 10 */ {0x00df, 0x865b}, /* Horiz offset for valid pixels (L)=0xdf */ {0x0012, 0x865c}, /* Vert offset for valid lines (L)=0x12 */ /* The following two lines seem to be the "wrong" resolution. */ /* But perhaps these indicate the actual size of the sensor */ /* rather than the size of the current video mode. */ {0x0058, 0x865d}, /* Horiz valid pixels (*4) (L) = 352 */ {0x0048, 0x865e}, /* Vert valid lines (*4) (L) = 288 */ {0x0015, 0x8608}, /* A11 Coef ... */ {0x0030, 0x8609}, {0x00fb, 0x860a}, {0x003e, 0x860b}, {0x00ce, 0x860c}, {0x00f4, 0x860d}, {0x00eb, 0x860e}, {0x00dc, 0x860f}, {0x0039, 0x8610}, {0x0001, 0x8611}, /* R offset for white balance ... */ {0x0000, 0x8612}, {0x0001, 0x8613}, {0x0000, 0x8614}, {0x005b, 0x8651}, /* R gain for white balance ... */ {0x0040, 0x8652}, {0x0060, 0x8653}, {0x0040, 0x8654}, {0x0000, 0x8655}, {0x0001, 0x863f}, /* Fixed gamma correction enable, USB control, * lum filter disable, lum noise clip disable */ {0x00a1, 0x8656}, /* Window1 size 256x256, Windows2 size 64x64, * gamma look-up disable, * new edge enhancement enable */ {0x0018, 0x8657}, /* Edge gain high thresh */ {0x0020, 0x8658}, /* Edge gain low thresh */ {0x000a, 0x8659}, /* Edge bandwidth high threshold */ {0x0005, 0x865a}, /* Edge bandwidth low threshold */ /* -------------------------------- */ {0x0030, 0x8112}, /* Video drop enable, ISO streaming enable */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0xa908, 0x8802}, {0x0034, 0x8801}, /* SSI reg addr */ {0x00ca, 0x8800}, /* SSI data to write */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0x1f08, 0x8802}, {0x0006, 0x8801}, {0x0080, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* ----- Read back coefs we wrote earlier. */ /* READ { 0x0000, 0x8608 } -> 0000: 15 */ /* READ { 0x0000, 0x8609 } -> 0000: 30 */ /* READ { 0x0000, 0x860a } -> 0000: fb */ /* READ { 0x0000, 0x860b } -> 0000: 3e */ /* READ { 0x0000, 0x860c } -> 0000: ce */ /* READ { 0x0000, 0x860d } -> 0000: f4 */ /* READ { 0x0000, 0x860e } -> 0000: eb */ /* READ { 0x0000, 0x860f } -> 0000: dc */ /* READ { 0x0000, 0x8610 } -> 0000: 39 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 08 */ {0xb008, 0x8802}, {0x0006, 0x8801}, {0x007d, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* This chunk is seemingly redundant with */ /* earlier commands (A11 Coef...), but if I disable it, */ /* the image appears too dark. Maybe there was some kind of */ /* reset since the earlier commands, so this is necessary again. */ {0x0015, 0x8608}, {0x0030, 0x8609}, {0xfffb, 0x860a}, {0x003e, 0x860b}, {0xffce, 0x860c}, {0xfff4, 0x860d}, {0xffeb, 0x860e}, {0xffdc, 0x860f}, {0x0039, 0x8610}, {0x0018, 0x8657}, {0x0000, 0x8508}, /* Disable compression. */ /* Previous line was: {0x0021, 0x8508}, * Enable compression. */ {0x0032, 0x850b}, /* compression stuff */ {0x0003, 0x8509}, /* compression stuff */ {0x0011, 0x850a}, /* compression stuff */ {0x0021, 0x850d}, /* compression stuff */ {0x0010, 0x850c}, /* compression stuff */ {0x0003, 0x8500}, /* *** Video mode: 160x120 */ {0x0001, 0x8501}, /* Hardware-dominated snap control */ {0x0061, 0x8656}, /* Window1 size 128x128, Windows2 size 128x128, * gamma look-up disable, * new edge enhancement enable */ {0x0018, 0x8617}, /* Window1 start X (*2) */ {0x0008, 0x8618}, /* Window1 start Y (*2) */ {0x0061, 0x8656}, /* Window1 size 128x128, Windows2 size 128x128, * gamma look-up disable, * new edge enhancement enable */ {0x0058, 0x8619}, /* Window2 start X (*2) */ {0x0008, 0x861a}, /* Window2 start Y (*2) */ {0x00ff, 0x8615}, /* High lum thresh for white balance */ {0x0000, 0x8616}, /* Low lum thresh for white balance */ {0x0012, 0x8700}, /* Clock speed 48Mhz/(2+2)/2= 6 Mhz */ {0x0012, 0x8700}, /* Clock speed 48Mhz/(2+2)/2= 6 Mhz */ /* READ { 0x0000, 0x8656 } -> 0000: 61 */ {0x0028, 0x8802}, /* 375 Khz SSI clock, SSI r/w sync with VSYNC */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 28 */ {0x1f28, 0x8802}, /* 375 Khz SSI clock, SSI r/w sync with VSYNC */ {0x0010, 0x8801}, /* SSI reg addr */ {0x003e, 0x8800}, /* SSI data to write */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ {0x0028, 0x8802}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 28 */ {0x1f28, 0x8802}, {0x0000, 0x8801}, {0x001f, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ {0x0001, 0x8602}, /* optical black level for user settning = 1 */ /* Original: */ {0x0023, 0x8700}, /* Clock speed 48Mhz/(3+2)/4= 2.4 Mhz */ {0x000f, 0x8602}, /* optical black level for user settning = 15 */ {0x0028, 0x8802}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 28 */ {0x1f28, 0x8802}, {0x0010, 0x8801}, {0x007b, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ {0x002f, 0x8651}, /* R gain for white balance ... */ {0x0080, 0x8653}, /* READ { 0x0000, 0x8655 } -> 0000: 00 */ {0x0000, 0x8655}, {0x0030, 0x8112}, /* Video drop enable, ISO streaming enable */ {0x0020, 0x8112}, /* Video drop enable, ISO streaming disable */ /* UNKNOWN DIRECTION (URB_FUNCTION_SELECT_INTERFACE: (ALT=0) ) */ {} }; /* * Initialization data for Intel EasyPC Camera CS110 */ static const u16 spca508cs110_init_data[][2] = { {0x0000, 0x870b}, /* Reset CTL3 */ {0x0003, 0x8111}, /* Soft Reset compression, memory, TG & CDSP */ {0x0000, 0x8111}, /* Normal operation on reset */ {0x0090, 0x8110}, /* External Clock 2x & Synchronous Serial Interface Output */ {0x0020, 0x8112}, /* Video Drop packet enable */ {0x0000, 0x8114}, /* Software GPIO output data */ {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0003, 0x8114}, /* Initial sequence Synchronous Serial Interface */ {0x000f, 0x8402}, /* Memory bank Address */ {0x0000, 0x8403}, /* Memory bank Address */ {0x00ba, 0x8804}, /* SSI Slave address */ {0x0010, 0x8802}, /* 93.75kHz SSI Clock Two DataByte */ {0x0010, 0x8802}, /* 93.75kHz SSI Clock two DataByte */ {0x0001, 0x8801}, {0x000a, 0x8805}, /* a - NWG: Dunno what this is about */ {0x0000, 0x8800}, {0x0010, 0x8802}, {0x0002, 0x8801}, {0x0000, 0x8805}, {0x0000, 0x8800}, {0x0010, 0x8802}, {0x0003, 0x8801}, {0x0027, 0x8805}, {0x0001, 0x8800}, {0x0010, 0x8802}, {0x0004, 0x8801}, {0x0065, 0x8805}, {0x0001, 0x8800}, {0x0010, 0x8802}, {0x0005, 0x8801}, {0x0003, 0x8805}, {0x0000, 0x8800}, {0x0010, 0x8802}, {0x0006, 0x8801}, {0x001c, 0x8805}, {0x0000, 0x8800}, {0x0010, 0x8802}, {0x0007, 0x8801}, {0x002a, 0x8805}, {0x0000, 0x8800}, {0x0010, 0x8802}, {0x0002, 0x8704}, /* External input CKIx1 */ {0x0001, 0x8606}, /* 1 Line memory Read Counter (H) Result: (d)410 */ {0x009a, 0x8600}, /* Line memory Read Counter (L) */ {0x0001, 0x865b}, /* 1 Horizontal Offset for Valid Pixel(L) */ {0x0003, 0x865c}, /* 3 Vertical Offset for Valid Lines(L) */ {0x0058, 0x865d}, /* 58 Horizontal Valid Pixel Window(L) */ {0x0006, 0x8660}, /* Nibble data + input order */ {0x000a, 0x8602}, /* Optical black level set to 0x0a */ {0x0000, 0x8603}, /* Optical black level Offset */ /* {0x0000, 0x8611}, * 0 R Offset for white Balance */ /* {0x0000, 0x8612}, * 1 Gr Offset for white Balance */ /* {0x0000, 0x8613}, * 1f B Offset for white Balance */ /* {0x0000, 0x8614}, * f0 Gb Offset for white Balance */ {0x0040, 0x8651}, /* 2b BLUE gain for white balance good at all 60 */ {0x0030, 0x8652}, /* 41 Gr Gain for white Balance (L) */ {0x0035, 0x8653}, /* 26 RED gain for white balance */ {0x0035, 0x8654}, /* 40Gb Gain for white Balance (L) */ {0x0041, 0x863f}, /* Fixed Gamma correction enabled (makes colours look better) */ {0x0000, 0x8655}, /* High bits for white balance*****brightness control*** */ {} }; static const u16 spca508_sightcam_init_data[][2] = { /* This line seems to setup the frame/canvas */ {0x000f, 0x8402}, /* These 6 lines are needed to startup the webcam */ {0x0090, 0x8110}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0003, 0x8114}, {0x0080, 0x8804}, /* This part seems to make the pictures darker? (autobrightness?) */ {0x0001, 0x8801}, {0x0004, 0x8800}, {0x0003, 0x8801}, {0x00e0, 0x8800}, {0x0004, 0x8801}, {0x00b4, 0x8800}, {0x0005, 0x8801}, {0x0000, 0x8800}, {0x0006, 0x8801}, {0x00e0, 0x8800}, {0x0007, 0x8801}, {0x000c, 0x8800}, /* This section is just needed, it probably * does something like the previous section, * but the cam won't start if it's not included. */ {0x0014, 0x8801}, {0x0008, 0x8800}, {0x0015, 0x8801}, {0x0067, 0x8800}, {0x0016, 0x8801}, {0x0000, 0x8800}, {0x0017, 0x8801}, {0x0020, 0x8800}, {0x0018, 0x8801}, {0x0044, 0x8800}, /* Makes the picture darker - and the * cam won't start if not included */ {0x001e, 0x8801}, {0x00ea, 0x8800}, {0x001f, 0x8801}, {0x0001, 0x8800}, {0x0003, 0x8801}, {0x00e0, 0x8800}, /* seems to place the colors ontop of each other #1 */ {0x0006, 0x8704}, {0x0001, 0x870c}, {0x0016, 0x8600}, {0x0002, 0x8606}, /* if not included the pictures becomes _very_ dark */ {0x0064, 0x8607}, {0x003a, 0x8601}, {0x0000, 0x8602}, /* seems to place the colors ontop of each other #2 */ {0x0016, 0x8600}, {0x0018, 0x8617}, {0x0008, 0x8618}, {0x00a1, 0x8656}, /* webcam won't start if not included */ {0x0007, 0x865b}, {0x0001, 0x865c}, {0x0058, 0x865d}, {0x0048, 0x865e}, /* adjusts the colors */ {0x0049, 0x8651}, {0x0040, 0x8652}, {0x004c, 0x8653}, {0x0040, 0x8654}, {} }; static const u16 spca508_sightcam2_init_data[][2] = { {0x0020, 0x8112}, {0x000f, 0x8402}, {0x0000, 0x8403}, {0x0008, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0009, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x000a, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x000b, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x000c, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x000d, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x000e, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0007, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x000f, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0018, 0x8660}, {0x0010, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0011, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0000, 0x86b0}, {0x0034, 0x86b1}, {0x0000, 0x86b2}, {0x0049, 0x86b3}, {0x0000, 0x86b4}, {0x0000, 0x86b4}, {0x0012, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0013, 0x8201}, {0x0008, 0x8200}, {0x0001, 0x8200}, {0x0001, 0x86b0}, {0x00aa, 0x86b1}, {0x0000, 0x86b2}, {0x00e4, 0x86b3}, {0x0000, 0x86b4}, {0x0000, 0x86b4}, {0x0018, 0x8660}, {0x0090, 0x8110}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0003, 0x8114}, {0x0080, 0x8804}, {0x0003, 0x8801}, {0x0012, 0x8800}, {0x0004, 0x8801}, {0x0005, 0x8800}, {0x0005, 0x8801}, {0x0000, 0x8800}, {0x0006, 0x8801}, {0x0000, 0x8800}, {0x0007, 0x8801}, {0x0000, 0x8800}, {0x0008, 0x8801}, {0x0005, 0x8800}, {0x000a, 0x8700}, {0x000e, 0x8801}, {0x0004, 0x8800}, {0x0005, 0x8801}, {0x0047, 0x8800}, {0x0006, 0x8801}, {0x0000, 0x8800}, {0x0007, 0x8801}, {0x00c0, 0x8800}, {0x0008, 0x8801}, {0x0003, 0x8800}, {0x0013, 0x8801}, {0x0001, 0x8800}, {0x0009, 0x8801}, {0x0000, 0x8800}, {0x000a, 0x8801}, {0x0000, 0x8800}, {0x000b, 0x8801}, {0x0000, 0x8800}, {0x000c, 0x8801}, {0x0000, 0x8800}, {0x000e, 0x8801}, {0x0004, 0x8800}, {0x000f, 0x8801}, {0x0000, 0x8800}, {0x0010, 0x8801}, {0x0006, 0x8800}, {0x0011, 0x8801}, {0x0006, 0x8800}, {0x0012, 0x8801}, {0x0000, 0x8800}, {0x0013, 0x8801}, {0x0001, 0x8800}, {0x000a, 0x8700}, {0x0000, 0x8702}, {0x0000, 0x8703}, {0x00c2, 0x8704}, {0x0001, 0x870c}, {0x0044, 0x8600}, {0x0002, 0x8606}, {0x0064, 0x8607}, {0x003a, 0x8601}, {0x0008, 0x8602}, {0x0044, 0x8600}, {0x0018, 0x8617}, {0x0008, 0x8618}, {0x00a1, 0x8656}, {0x0004, 0x865b}, {0x0002, 0x865c}, {0x0058, 0x865d}, {0x0048, 0x865e}, {0x0012, 0x8608}, {0x002c, 0x8609}, {0x0002, 0x860a}, {0x002c, 0x860b}, {0x00db, 0x860c}, {0x00f9, 0x860d}, {0x00f1, 0x860e}, {0x00e3, 0x860f}, {0x002c, 0x8610}, {0x006c, 0x8651}, {0x0041, 0x8652}, {0x0059, 0x8653}, {0x0040, 0x8654}, {0x00fa, 0x8611}, {0x00ff, 0x8612}, {0x00f8, 0x8613}, {0x0000, 0x8614}, {0x0001, 0x863f}, {0x0000, 0x8640}, {0x0026, 0x8641}, {0x0045, 0x8642}, {0x0060, 0x8643}, {0x0075, 0x8644}, {0x0088, 0x8645}, {0x009b, 0x8646}, {0x00b0, 0x8647}, {0x00c5, 0x8648}, {0x00d2, 0x8649}, {0x00dc, 0x864a}, {0x00e5, 0x864b}, {0x00eb, 0x864c}, {0x00f0, 0x864d}, {0x00f6, 0x864e}, {0x00fa, 0x864f}, {0x00ff, 0x8650}, {0x0060, 0x8657}, {0x0010, 0x8658}, {0x0018, 0x8659}, {0x0005, 0x865a}, {0x0018, 0x8660}, {0x0003, 0x8509}, {0x0011, 0x850a}, {0x0032, 0x850b}, {0x0010, 0x850c}, {0x0021, 0x850d}, {0x0001, 0x8500}, {0x0000, 0x8508}, {0x0012, 0x8608}, {0x002c, 0x8609}, {0x0002, 0x860a}, {0x0039, 0x860b}, {0x00d0, 0x860c}, {0x00f7, 0x860d}, {0x00ed, 0x860e}, {0x00db, 0x860f}, {0x0039, 0x8610}, {0x0012, 0x8657}, {0x000c, 0x8619}, {0x0004, 0x861a}, {0x00a1, 0x8656}, {0x00c8, 0x8615}, {0x0032, 0x8616}, {0x0030, 0x8112}, {0x0020, 0x8112}, {0x0020, 0x8112}, {0x000f, 0x8402}, {0x0000, 0x8403}, {0x0090, 0x8110}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0003, 0x8114}, {0x0080, 0x8804}, {0x0003, 0x8801}, {0x0012, 0x8800}, {0x0004, 0x8801}, {0x0005, 0x8800}, {0x0005, 0x8801}, {0x0047, 0x8800}, {0x0006, 0x8801}, {0x0000, 0x8800}, {0x0007, 0x8801}, {0x00c0, 0x8800}, {0x0008, 0x8801}, {0x0003, 0x8800}, {0x000a, 0x8700}, {0x000e, 0x8801}, {0x0004, 0x8800}, {0x0005, 0x8801}, {0x0047, 0x8800}, {0x0006, 0x8801}, {0x0000, 0x8800}, {0x0007, 0x8801}, {0x00c0, 0x8800}, {0x0008, 0x8801}, {0x0003, 0x8800}, {0x0013, 0x8801}, {0x0001, 0x8800}, {0x0009, 0x8801}, {0x0000, 0x8800}, {0x000a, 0x8801}, {0x0000, 0x8800}, {0x000b, 0x8801}, {0x0000, 0x8800}, {0x000c, 0x8801}, {0x0000, 0x8800}, {0x000e, 0x8801}, {0x0004, 0x8800}, {0x000f, 0x8801}, {0x0000, 0x8800}, {0x0010, 0x8801}, {0x0006, 0x8800}, {0x0011, 0x8801}, {0x0006, 0x8800}, {0x0012, 0x8801}, {0x0000, 0x8800}, {0x0013, 0x8801}, {0x0001, 0x8800}, {0x000a, 0x8700}, {0x0000, 0x8702}, {0x0000, 0x8703}, {0x00c2, 0x8704}, {0x0001, 0x870c}, {0x0044, 0x8600}, {0x0002, 0x8606}, {0x0064, 0x8607}, {0x003a, 0x8601}, {0x0008, 0x8602}, {0x0044, 0x8600}, {0x0018, 0x8617}, {0x0008, 0x8618}, {0x00a1, 0x8656}, {0x0004, 0x865b}, {0x0002, 0x865c}, {0x0058, 0x865d}, {0x0048, 0x865e}, {0x0012, 0x8608}, {0x002c, 0x8609}, {0x0002, 0x860a}, {0x002c, 0x860b}, {0x00db, 0x860c}, {0x00f9, 0x860d}, {0x00f1, 0x860e}, {0x00e3, 0x860f}, {0x002c, 0x8610}, {0x006c, 0x8651}, {0x0041, 0x8652}, {0x0059, 0x8653}, {0x0040, 0x8654}, {0x00fa, 0x8611}, {0x00ff, 0x8612}, {0x00f8, 0x8613}, {0x0000, 0x8614}, {0x0001, 0x863f}, {0x0000, 0x8640}, {0x0026, 0x8641}, {0x0045, 0x8642}, {0x0060, 0x8643}, {0x0075, 0x8644}, {0x0088, 0x8645}, {0x009b, 0x8646}, {0x00b0, 0x8647}, {0x00c5, 0x8648}, {0x00d2, 0x8649}, {0x00dc, 0x864a}, {0x00e5, 0x864b}, {0x00eb, 0x864c}, {0x00f0, 0x864d}, {0x00f6, 0x864e}, {0x00fa, 0x864f}, {0x00ff, 0x8650}, {0x0060, 0x8657}, {0x0010, 0x8658}, {0x0018, 0x8659}, {0x0005, 0x865a}, {0x0018, 0x8660}, {0x0003, 0x8509}, {0x0011, 0x850a}, {0x0032, 0x850b}, {0x0010, 0x850c}, {0x0021, 0x850d}, {0x0001, 0x8500}, {0x0000, 0x8508}, {0x0012, 0x8608}, {0x002c, 0x8609}, {0x0002, 0x860a}, {0x0039, 0x860b}, {0x00d0, 0x860c}, {0x00f7, 0x860d}, {0x00ed, 0x860e}, {0x00db, 0x860f}, {0x0039, 0x8610}, {0x0012, 0x8657}, {0x0064, 0x8619}, /* This line starts it all, it is not needed here */ /* since it has been build into the driver */ /* jfm: don't start now */ /* {0x0030, 0x8112}, */ {} }; /* * Initialization data for Creative Webcam Vista */ static const u16 spca508_vista_init_data[][2] = { {0x0008, 0x8200}, /* Clear register */ {0x0000, 0x870b}, /* Reset CTL3 */ {0x0020, 0x8112}, /* Video Drop packet enable */ {0x0003, 0x8111}, /* Soft Reset compression, memory, TG & CDSP */ {0x0000, 0x8110}, /* Disable everything */ {0x0000, 0x8114}, /* Software GPIO output data */ {0x0000, 0x8114}, {0x0003, 0x8111}, {0x0000, 0x8111}, {0x0090, 0x8110}, /* Enable: SSI output, External 2X clock output */ {0x0020, 0x8112}, {0x0000, 0x8114}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0001, 0x8114}, {0x0003, 0x8114}, {0x000f, 0x8402}, /* Memory bank Address */ {0x0000, 0x8403}, /* Memory bank Address */ {0x00ba, 0x8804}, /* SSI Slave address */ {0x0010, 0x8802}, /* 93.75kHz SSI Clock Two DataByte */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, /* Will write 2 bytes (DATA1+DATA2) */ {0x0020, 0x8801}, /* Register address for SSI read/write */ {0x0044, 0x8805}, /* DATA2 */ {0x0004, 0x8800}, /* DATA1 -> write triggered */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0009, 0x8801}, {0x0042, 0x8805}, {0x0001, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x003c, 0x8801}, {0x0001, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0001, 0x8801}, {0x000a, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0002, 0x8801}, {0x0000, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0003, 0x8801}, {0x0027, 0x8805}, {0x0001, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0004, 0x8801}, {0x0065, 0x8805}, {0x0001, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0005, 0x8801}, {0x0003, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0006, 0x8801}, {0x001c, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0007, 0x8801}, {0x002a, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x000e, 0x8801}, {0x0000, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0028, 0x8801}, {0x002e, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0039, 0x8801}, {0x0013, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x003b, 0x8801}, {0x000c, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0035, 0x8801}, {0x0028, 0x8805}, {0x0000, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8803 } -> 0000: 00 */ /* READ { 0x0001, 0x8802 } -> 0000: 10 */ {0x0010, 0x8802}, {0x0009, 0x8801}, {0x0042, 0x8805}, {0x0001, 0x8800}, /* READ { 0x0001, 0x8803 } -> 0000: 00 */ {0x0050, 0x8703}, {0x0002, 0x8704}, /* External input CKIx1 */ {0x0001, 0x870c}, /* Select CKOx2 output */ {0x009a, 0x8600}, /* Line memory Read Counter (L) */ {0x0001, 0x8606}, /* 1 Line memory Read Counter (H) Result: (d)410 */ {0x0023, 0x8601}, {0x0010, 0x8602}, {0x000a, 0x8603}, {0x009a, 0x8600}, {0x0001, 0x865b}, /* 1 Horizontal Offset for Valid Pixel(L) */ {0x0003, 0x865c}, /* Vertical offset for valid lines (L) */ {0x0058, 0x865d}, /* Horizontal valid pixels window (L) */ {0x0048, 0x865e}, /* Vertical valid lines window (L) */ {0x0000, 0x865f}, {0x0006, 0x8660}, /* Enable nibble data input, select nibble input order */ {0x0013, 0x8608}, /* A11 Coeficients for color correction */ {0x0028, 0x8609}, /* Note: these values are confirmed at the end of array */ {0x0005, 0x860a}, /* ... */ {0x0025, 0x860b}, {0x00e1, 0x860c}, {0x00fa, 0x860d}, {0x00f4, 0x860e}, {0x00e8, 0x860f}, {0x0025, 0x8610}, /* A33 Coef. */ {0x00fc, 0x8611}, /* White balance offset: R */ {0x0001, 0x8612}, /* White balance offset: Gr */ {0x00fe, 0x8613}, /* White balance offset: B */ {0x0000, 0x8614}, /* White balance offset: Gb */ {0x0064, 0x8651}, /* R gain for white balance (L) */ {0x0040, 0x8652}, /* Gr gain for white balance (L) */ {0x0066, 0x8653}, /* B gain for white balance (L) */ {0x0040, 0x8654}, /* Gb gain for white balance (L) */ {0x0001, 0x863f}, /* Enable fixed gamma correction */ {0x00a1, 0x8656}, /* Size - Window1: 256x256, Window2: 128x128, * UV division: UV no change, * Enable New edge enhancement */ {0x0018, 0x8657}, /* Edge gain high threshold */ {0x0020, 0x8658}, /* Edge gain low threshold */ {0x000a, 0x8659}, /* Edge bandwidth high threshold */ {0x0005, 0x865a}, /* Edge bandwidth low threshold */ {0x0064, 0x8607}, /* UV filter enable */ {0x0016, 0x8660}, {0x0000, 0x86b0}, /* Bad pixels compensation address */ {0x00dc, 0x86b1}, /* X coord for bad pixels compensation (L) */ {0x0000, 0x86b2}, {0x0009, 0x86b3}, /* Y coord for bad pixels compensation (L) */ {0x0000, 0x86b4}, {0x0001, 0x86b0}, {0x00f5, 0x86b1}, {0x0000, 0x86b2}, {0x00c6, 0x86b3}, {0x0000, 0x86b4}, {0x0002, 0x86b0}, {0x001c, 0x86b1}, {0x0001, 0x86b2}, {0x00d7, 0x86b3}, {0x0000, 0x86b4}, {0x0003, 0x86b0}, {0x001c, 0x86b1}, {0x0001, 0x86b2}, {0x00d8, 0x86b3}, {0x0000, 0x86b4}, {0x0004, 0x86b0}, {0x001d, 0x86b1}, {0x0001, 0x86b2}, {0x00d8, 0x86b3}, {0x0000, 0x86b4}, {0x001e, 0x8660}, /* READ { 0x0000, 0x8608 } -> 0000: 13 */ /* READ { 0x0000, 0x8609 } -> 0000: 28 */ /* READ { 0x0000, 0x8610 } -> 0000: 05 */ /* READ { 0x0000, 0x8611 } -> 0000: 25 */ /* READ { 0x0000, 0x8612 } -> 0000: e1 */ /* READ { 0x0000, 0x8613 } -> 0000: fa */ /* READ { 0x0000, 0x8614 } -> 0000: f4 */ /* READ { 0x0000, 0x8615 } -> 0000: e8 */ /* READ { 0x0000, 0x8616 } -> 0000: 25 */ {} }; static int reg_write(struct gspca_dev *gspca_dev, u16 index, u16 value) { int ret; struct usb_device *dev = gspca_dev->dev; ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), 0, /* request */ USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, NULL, 0, 500); gspca_dbg(gspca_dev, D_USBO, "reg write i:0x%04x = 0x%02x\n", index, value); if (ret < 0) pr_err("reg write: error %d\n", ret); return ret; } /* read 1 byte */ /* returns: negative is error, pos or zero is data */ static int reg_read(struct gspca_dev *gspca_dev, u16 index) /* wIndex */ { int ret; ret = usb_control_msg(gspca_dev->dev, usb_rcvctrlpipe(gspca_dev->dev, 0), 0, /* register */ USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, /* value */ index, gspca_dev->usb_buf, 1, 500); /* timeout */ gspca_dbg(gspca_dev, D_USBI, "reg read i:%04x --> %02x\n", index, gspca_dev->usb_buf[0]); if (ret < 0) { pr_err("reg_read err %d\n", ret); return ret; } return gspca_dev->usb_buf[0]; } /* send 1 or 2 bytes to the sensor via the Synchronous Serial Interface */ static int ssi_w(struct gspca_dev *gspca_dev, u16 reg, u16 val) { int ret, retry; ret = reg_write(gspca_dev, 0x8802, reg >> 8); if (ret < 0) goto out; ret = reg_write(gspca_dev, 0x8801, reg & 0x00ff); if (ret < 0) goto out; if ((reg & 0xff00) == 0x1000) { /* if 2 bytes */ ret = reg_write(gspca_dev, 0x8805, val & 0x00ff); if (ret < 0) goto out; val >>= 8; } ret = reg_write(gspca_dev, 0x8800, val); if (ret < 0) goto out; /* poll until not busy */ retry = 10; for (;;) { ret = reg_read(gspca_dev, 0x8803); if (ret < 0) break; if (gspca_dev->usb_buf[0] == 0) break; if (--retry <= 0) { gspca_err(gspca_dev, "ssi_w busy %02x\n", gspca_dev->usb_buf[0]); ret = -1; break; } msleep(8); } out: return ret; } static int write_vector(struct gspca_dev *gspca_dev, const u16 (*data)[2]) { int ret = 0; while ((*data)[1] != 0) { if ((*data)[1] & 0x8000) { if ((*data)[1] == 0xdd00) /* delay */ msleep((*data)[0]); else ret = reg_write(gspca_dev, (*data)[1], (*data)[0]); } else { ret = ssi_w(gspca_dev, (*data)[1], (*data)[0]); } if (ret < 0) break; data++; } return ret; } /* this function is called at probe time */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct sd *sd = (struct sd *) gspca_dev; struct cam *cam; const u16 (*init_data)[2]; static const u16 (*(init_data_tb[]))[2] = { spca508_vista_init_data, /* CreativeVista 0 */ spca508_sightcam_init_data, /* HamaUSBSightcam 1 */ spca508_sightcam2_init_data, /* HamaUSBSightcam2 2 */ spca508cs110_init_data, /* IntelEasyPCCamera 3 */ spca508cs110_init_data, /* MicroInnovationIC200 4 */ spca508_init_data, /* ViewQuestVQ110 5 */ }; int data1, data2; /* Read from global register the USB product and vendor IDs, just to * prove that we can communicate with the device. This works, which * confirms at we are communicating properly and that the device * is a 508. */ data1 = reg_read(gspca_dev, 0x8104); data2 = reg_read(gspca_dev, 0x8105); gspca_dbg(gspca_dev, D_PROBE, "Webcam Vendor ID: 0x%02x%02x\n", data2, data1); data1 = reg_read(gspca_dev, 0x8106); data2 = reg_read(gspca_dev, 0x8107); gspca_dbg(gspca_dev, D_PROBE, "Webcam Product ID: 0x%02x%02x\n", data2, data1); data1 = reg_read(gspca_dev, 0x8621); gspca_dbg(gspca_dev, D_PROBE, "Window 1 average luminance: %d\n", data1); cam = &gspca_dev->cam; cam->cam_mode = sif_mode; cam->nmodes = ARRAY_SIZE(sif_mode); sd->subtype = id->driver_info; init_data = init_data_tb[sd->subtype]; return write_vector(gspca_dev, init_data); } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { return 0; } static int sd_start(struct gspca_dev *gspca_dev) { int mode; mode = gspca_dev->cam.cam_mode[gspca_dev->curr_mode].priv; reg_write(gspca_dev, 0x8500, mode); switch (mode) { case 0: case 1: reg_write(gspca_dev, 0x8700, 0x28); /* clock */ break; default: /* case 2: */ /* case 3: */ reg_write(gspca_dev, 0x8700, 0x23); /* clock */ break; } reg_write(gspca_dev, 0x8112, 0x10 | 0x20); return 0; } static void sd_stopN(struct gspca_dev *gspca_dev) { /* Video ISO disable, Video Drop Packet enable: */ reg_write(gspca_dev, 0x8112, 0x20); } static void sd_pkt_scan(struct gspca_dev *gspca_dev, u8 *data, /* isoc packet */ int len) /* iso packet length */ { switch (data[0]) { case 0: /* start of frame */ gspca_frame_add(gspca_dev, LAST_PACKET, NULL, 0); data += SPCA508_OFFSET_DATA; len -= SPCA508_OFFSET_DATA; gspca_frame_add(gspca_dev, FIRST_PACKET, data, len); break; case 0xff: /* drop */ break; default: data += 1; len -= 1; gspca_frame_add(gspca_dev, INTER_PACKET, data, len); break; } } static void setbrightness(struct gspca_dev *gspca_dev, s32 brightness) { /* MX seem contrast */ reg_write(gspca_dev, 0x8651, brightness); reg_write(gspca_dev, 0x8652, brightness); reg_write(gspca_dev, 0x8653, brightness); reg_write(gspca_dev, 0x8654, brightness); } static int sd_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); gspca_dev->usb_err = 0; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { case V4L2_CID_BRIGHTNESS: setbrightness(gspca_dev, ctrl->val); break; } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops sd_ctrl_ops = { .s_ctrl = sd_s_ctrl, }; static int sd_init_controls(struct gspca_dev *gspca_dev) { struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 5); v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_BRIGHTNESS, 0, 255, 1, 128); if (hdl->error) { pr_err("Could not initialize controls\n"); return hdl->error; } return 0; } /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .init_controls = sd_init_controls, .start = sd_start, .stopN = sd_stopN, .pkt_scan = sd_pkt_scan, }; /* -- module initialisation -- */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x0130, 0x0130), .driver_info = HamaUSBSightcam}, {USB_DEVICE(0x041e, 0x4018), .driver_info = CreativeVista}, {USB_DEVICE(0x0733, 0x0110), .driver_info = ViewQuestVQ110}, {USB_DEVICE(0x0af9, 0x0010), .driver_info = HamaUSBSightcam}, {USB_DEVICE(0x0af9, 0x0011), .driver_info = HamaUSBSightcam2}, {USB_DEVICE(0x8086, 0x0110), .driver_info = IntelEasyPCCamera}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver);
2 1 1 1 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 // SPDX-License-Identifier: GPL-2.0-only /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> */ #include <linux/types.h> #include <linux/jiffies.h> #include <linux/timer.h> #include <linux/netfilter.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_timeout.h> static const unsigned int nf_ct_generic_timeout = 600*HZ; #ifdef CONFIG_NF_CONNTRACK_TIMEOUT #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_cttimeout.h> static int generic_timeout_nlattr_to_obj(struct nlattr *tb[], struct net *net, void *data) { struct nf_generic_net *gn = nf_generic_pernet(net); unsigned int *timeout = data; if (!timeout) timeout = &gn->timeout; if (tb[CTA_TIMEOUT_GENERIC_TIMEOUT]) *timeout = ntohl(nla_get_be32(tb[CTA_TIMEOUT_GENERIC_TIMEOUT])) * HZ; else { /* Set default generic timeout. */ *timeout = gn->timeout; } return 0; } static int generic_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) { const unsigned int *timeout = data; if (nla_put_be32(skb, CTA_TIMEOUT_GENERIC_TIMEOUT, htonl(*timeout / HZ))) goto nla_put_failure; return 0; nla_put_failure: return -ENOSPC; } static const struct nla_policy generic_timeout_nla_policy[CTA_TIMEOUT_GENERIC_MAX+1] = { [CTA_TIMEOUT_GENERIC_TIMEOUT] = { .type = NLA_U32 }, }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ void nf_conntrack_generic_init_net(struct net *net) { struct nf_generic_net *gn = nf_generic_pernet(net); gn->timeout = nf_ct_generic_timeout; } const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic = { .l4proto = 255, .allow_clash = true, #ifdef CONFIG_NF_CONNTRACK_TIMEOUT .ctnl_timeout = { .nlattr_to_obj = generic_timeout_nlattr_to_obj, .obj_to_nlattr = generic_timeout_obj_to_nlattr, .nlattr_max = CTA_TIMEOUT_GENERIC_MAX, .obj_size = sizeof(unsigned int), .nla_policy = generic_timeout_nla_policy, }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ };
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 /* SPDX-License-Identifier: GPL-2.0-only */ /* * The NFC Controller Interface is the communication protocol between an * NFC Controller (NFCC) and a Device Host (DH). * * Copyright (C) 2011 Texas Instruments, Inc. * Copyright (C) 2013 Intel Corporation. All rights reserved. * Copyright (C) 2014 Marvell International Ltd. * * Written by Ilan Elias <ilane@ti.com> * * Acknowledgements: * This file is based on hci_core.h, which was written * by Maxim Krasnyansky. */ #ifndef __NCI_CORE_H #define __NCI_CORE_H #include <linux/interrupt.h> #include <linux/skbuff.h> #include <linux/tty.h> #include <net/nfc/nfc.h> #include <net/nfc/nci.h> /* NCI device flags */ enum nci_flag { NCI_INIT, NCI_UP, NCI_DATA_EXCHANGE, NCI_DATA_EXCHANGE_TO, NCI_UNREG, }; /* NCI device states */ enum nci_state { NCI_IDLE, NCI_DISCOVERY, NCI_W4_ALL_DISCOVERIES, NCI_W4_HOST_SELECT, NCI_POLL_ACTIVE, NCI_LISTEN_ACTIVE, NCI_LISTEN_SLEEP, }; /* NCI timeouts */ #define NCI_RESET_TIMEOUT 5000 #define NCI_INIT_TIMEOUT 5000 #define NCI_SET_CONFIG_TIMEOUT 5000 #define NCI_RF_DISC_TIMEOUT 5000 #define NCI_RF_DISC_SELECT_TIMEOUT 5000 #define NCI_RF_DEACTIVATE_TIMEOUT 30000 #define NCI_CMD_TIMEOUT 5000 #define NCI_DATA_TIMEOUT 3000 struct nci_dev; struct nci_driver_ops { __u16 opcode; int (*rsp)(struct nci_dev *dev, struct sk_buff *skb); int (*ntf)(struct nci_dev *dev, struct sk_buff *skb); }; struct nci_ops { int (*init)(struct nci_dev *ndev); int (*open)(struct nci_dev *ndev); int (*close)(struct nci_dev *ndev); int (*send)(struct nci_dev *ndev, struct sk_buff *skb); int (*setup)(struct nci_dev *ndev); int (*post_setup)(struct nci_dev *ndev); int (*fw_download)(struct nci_dev *ndev, const char *firmware_name); __u32 (*get_rfprotocol)(struct nci_dev *ndev, __u8 rf_protocol); int (*discover_se)(struct nci_dev *ndev); int (*disable_se)(struct nci_dev *ndev, u32 se_idx); int (*enable_se)(struct nci_dev *ndev, u32 se_idx); int (*se_io)(struct nci_dev *ndev, u32 se_idx, u8 *apdu, size_t apdu_length, se_io_cb_t cb, void *cb_context); int (*hci_load_session)(struct nci_dev *ndev); void (*hci_event_received)(struct nci_dev *ndev, u8 pipe, u8 event, struct sk_buff *skb); void (*hci_cmd_received)(struct nci_dev *ndev, u8 pipe, u8 cmd, struct sk_buff *skb); const struct nci_driver_ops *prop_ops; size_t n_prop_ops; const struct nci_driver_ops *core_ops; size_t n_core_ops; }; #define NCI_MAX_SUPPORTED_RF_INTERFACES 4 #define NCI_MAX_DISCOVERED_TARGETS 10 #define NCI_MAX_NUM_NFCEE 255 #define NCI_MAX_CONN_ID 7 #define NCI_MAX_PROPRIETARY_CMD 64 struct nci_conn_info { struct list_head list; /* NCI specification 4.4.2 Connection Creation * The combination of destination type and destination specific * parameters shall uniquely identify a single destination for the * Logical Connection */ struct dest_spec_params *dest_params; __u8 dest_type; __u8 conn_id; __u8 max_pkt_payload_len; atomic_t credits_cnt; __u8 initial_num_credits; data_exchange_cb_t data_exchange_cb; void *data_exchange_cb_context; struct sk_buff *rx_skb; }; #define NCI_INVALID_CONN_ID 0x80 #define NCI_HCI_ANY_OPEN_PIPE 0x03 /* Gates */ #define NCI_HCI_ADMIN_GATE 0x00 #define NCI_HCI_LOOPBACK_GATE 0x04 #define NCI_HCI_IDENTITY_MGMT_GATE 0x05 #define NCI_HCI_LINK_MGMT_GATE 0x06 /* Pipes */ #define NCI_HCI_LINK_MGMT_PIPE 0x00 #define NCI_HCI_ADMIN_PIPE 0x01 /* Generic responses */ #define NCI_HCI_ANY_OK 0x00 #define NCI_HCI_ANY_E_NOT_CONNECTED 0x01 #define NCI_HCI_ANY_E_CMD_PAR_UNKNOWN 0x02 #define NCI_HCI_ANY_E_NOK 0x03 #define NCI_HCI_ANY_E_PIPES_FULL 0x04 #define NCI_HCI_ANY_E_REG_PAR_UNKNOWN 0x05 #define NCI_HCI_ANY_E_PIPE_NOT_OPENED 0x06 #define NCI_HCI_ANY_E_CMD_NOT_SUPPORTED 0x07 #define NCI_HCI_ANY_E_INHIBITED 0x08 #define NCI_HCI_ANY_E_TIMEOUT 0x09 #define NCI_HCI_ANY_E_REG_ACCESS_DENIED 0x0a #define NCI_HCI_ANY_E_PIPE_ACCESS_DENIED 0x0b #define NCI_HCI_DO_NOT_OPEN_PIPE 0x81 #define NCI_HCI_INVALID_PIPE 0x80 #define NCI_HCI_INVALID_GATE 0xFF #define NCI_HCI_INVALID_HOST 0x80 #define NCI_HCI_MAX_CUSTOM_GATES 50 /* * According to specification 102 622 chapter 4.4 Pipes, * the pipe identifier is 7 bits long. */ #define NCI_HCI_MAX_PIPES 128 struct nci_hci_gate { u8 gate; u8 pipe; u8 dest_host; } __packed; struct nci_hci_pipe { u8 gate; u8 host; } __packed; struct nci_hci_init_data { u8 gate_count; struct nci_hci_gate gates[NCI_HCI_MAX_CUSTOM_GATES]; char session_id[9]; }; #define NCI_HCI_MAX_GATES 256 struct nci_hci_dev { u8 nfcee_id; struct nci_dev *ndev; struct nci_conn_info *conn_info; struct nci_hci_init_data init_data; struct nci_hci_pipe pipes[NCI_HCI_MAX_PIPES]; u8 gate2pipe[NCI_HCI_MAX_GATES]; int expected_pipes; int count_pipes; struct sk_buff_head rx_hcp_frags; struct work_struct msg_rx_work; struct sk_buff_head msg_rx_queue; }; /* NCI Core structures */ struct nci_dev { struct nfc_dev *nfc_dev; const struct nci_ops *ops; struct nci_hci_dev *hci_dev; int tx_headroom; int tx_tailroom; atomic_t state; unsigned long flags; atomic_t cmd_cnt; __u8 cur_conn_id; struct list_head conn_info_list; struct nci_conn_info *rf_conn_info; struct timer_list cmd_timer; struct timer_list data_timer; struct workqueue_struct *cmd_wq; struct work_struct cmd_work; struct workqueue_struct *rx_wq; struct work_struct rx_work; struct workqueue_struct *tx_wq; struct work_struct tx_work; struct sk_buff_head cmd_q; struct sk_buff_head rx_q; struct sk_buff_head tx_q; struct mutex req_lock; struct completion req_completion; __u32 req_status; __u32 req_result; void *driver_data; __u32 poll_prots; __u32 target_active_prot; struct nfc_target targets[NCI_MAX_DISCOVERED_TARGETS]; int n_targets; /* received during NCI_OP_CORE_RESET_RSP */ __u8 nci_ver; /* received during NCI_OP_CORE_INIT_RSP */ __u32 nfcc_features; __u8 num_supported_rf_interfaces; __u8 supported_rf_interfaces [NCI_MAX_SUPPORTED_RF_INTERFACES]; __u8 max_logical_connections; __u16 max_routing_table_size; __u8 max_ctrl_pkt_payload_len; __u16 max_size_for_large_params; __u8 manufact_id; __u32 manufact_specific_info; /* Save RF Discovery ID or NFCEE ID under conn_create */ struct dest_spec_params cur_params; /* Save destination type under conn_create */ __u8 cur_dest_type; /* stored during nci_data_exchange */ struct sk_buff *rx_data_reassembly; /* stored during intf_activated_ntf */ __u8 remote_gb[NFC_MAX_GT_LEN]; __u8 remote_gb_len; /* stored during intf_activated_ntf */ __u8 target_ats[NFC_ATS_MAXSIZE]; __u8 target_ats_len; }; /* ----- NCI Devices ----- */ struct nci_dev *nci_allocate_device(const struct nci_ops *ops, __u32 supported_protocols, int tx_headroom, int tx_tailroom); void nci_free_device(struct nci_dev *ndev); int nci_register_device(struct nci_dev *ndev); void nci_unregister_device(struct nci_dev *ndev); int nci_request(struct nci_dev *ndev, void (*req)(struct nci_dev *ndev, const void *opt), const void *opt, __u32 timeout); int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, const __u8 *payload); int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, const __u8 *payload); int nci_core_reset(struct nci_dev *ndev); int nci_core_init(struct nci_dev *ndev); int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb); int nci_send_frame(struct nci_dev *ndev, struct sk_buff *skb); int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, const __u8 *val); int nci_nfcee_discover(struct nci_dev *ndev, u8 action); int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode); int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type, u8 number_destination_params, size_t params_len, const struct core_conn_create_dest_spec_params *params); int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id); int nci_nfcc_loopback(struct nci_dev *ndev, const void *data, size_t data_len, struct sk_buff **resp); struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev); void nci_hci_deallocate(struct nci_dev *ndev); int nci_hci_send_event(struct nci_dev *ndev, u8 gate, u8 event, const u8 *param, size_t param_len); int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd, const u8 *param, size_t param_len, struct sk_buff **skb); int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe); int nci_hci_connect_gate(struct nci_dev *ndev, u8 dest_host, u8 dest_gate, u8 pipe); int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx, const u8 *param, size_t param_len); int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx, struct sk_buff **skb); int nci_hci_clear_all_pipes(struct nci_dev *ndev); int nci_hci_dev_session_init(struct nci_dev *ndev); static inline struct sk_buff *nci_skb_alloc(struct nci_dev *ndev, unsigned int len, gfp_t how) { struct sk_buff *skb; skb = alloc_skb(len + ndev->tx_headroom + ndev->tx_tailroom, how); if (skb) skb_reserve(skb, ndev->tx_headroom); return skb; } static inline void nci_set_parent_dev(struct nci_dev *ndev, struct device *dev) { nfc_set_parent_dev(ndev->nfc_dev, dev); } static inline void nci_set_drvdata(struct nci_dev *ndev, void *data) { ndev->driver_data = data; } static inline void *nci_get_drvdata(struct nci_dev *ndev) { return ndev->driver_data; } static inline int nci_set_vendor_cmds(struct nci_dev *ndev, const struct nfc_vendor_cmd *cmds, int n_cmds) { return nfc_set_vendor_cmds(ndev->nfc_dev, cmds, n_cmds); } void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb); void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb); int nci_prop_rsp_packet(struct nci_dev *ndev, __u16 opcode, struct sk_buff *skb); int nci_prop_ntf_packet(struct nci_dev *ndev, __u16 opcode, struct sk_buff *skb); int nci_core_rsp_packet(struct nci_dev *ndev, __u16 opcode, struct sk_buff *skb); int nci_core_ntf_packet(struct nci_dev *ndev, __u16 opcode, struct sk_buff *skb); void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb); int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, const void *payload); int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb); int nci_conn_max_data_pkt_payload_size(struct nci_dev *ndev, __u8 conn_id); void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb, __u8 conn_id, int err); void nci_hci_data_received_cb(void *context, struct sk_buff *skb, int err); void nci_clear_target_list(struct nci_dev *ndev); /* ----- NCI requests ----- */ #define NCI_REQ_DONE 0 #define NCI_REQ_PEND 1 #define NCI_REQ_CANCELED 2 void nci_req_complete(struct nci_dev *ndev, int result); struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev, int conn_id); int nci_get_conn_info_by_dest_type_params(struct nci_dev *ndev, u8 dest_type, const struct dest_spec_params *params); /* ----- NCI status code ----- */ int nci_to_errno(__u8 code); /* ----- NCI over SPI acknowledge modes ----- */ #define NCI_SPI_CRC_DISABLED 0x00 #define NCI_SPI_CRC_ENABLED 0x01 /* ----- NCI SPI structures ----- */ struct nci_spi { struct nci_dev *ndev; struct spi_device *spi; unsigned int xfer_udelay; /* microseconds delay between transactions */ unsigned int xfer_speed_hz; /* * SPI clock frequency * 0 => default clock */ u8 acknowledge_mode; struct completion req_completion; u8 req_result; }; /* ----- NCI SPI ----- */ struct nci_spi *nci_spi_allocate_spi(struct spi_device *spi, u8 acknowledge_mode, unsigned int delay, struct nci_dev *ndev); int nci_spi_send(struct nci_spi *nspi, struct completion *write_handshake_completion, struct sk_buff *skb); struct sk_buff *nci_spi_read(struct nci_spi *nspi); /* ----- NCI UART ---- */ /* Ioctl */ #define NCIUARTSETDRIVER _IOW('U', 0, char *) enum nci_uart_driver { NCI_UART_DRIVER_MARVELL = 0, NCI_UART_DRIVER_MAX }; struct nci_uart; struct nci_uart_ops { int (*open)(struct nci_uart *nci_uart); void (*close)(struct nci_uart *nci_uart); int (*recv)(struct nci_uart *nci_uart, struct sk_buff *skb); int (*send)(struct nci_uart *nci_uart, struct sk_buff *skb); void (*tx_start)(struct nci_uart *nci_uart); void (*tx_done)(struct nci_uart *nci_uart); }; struct nci_uart { struct module *owner; struct nci_uart_ops ops; const char *name; enum nci_uart_driver driver; /* Dynamic data */ struct nci_dev *ndev; spinlock_t rx_lock; struct work_struct write_work; struct tty_struct *tty; unsigned long tx_state; struct sk_buff_head tx_q; struct sk_buff *tx_skb; struct sk_buff *rx_skb; int rx_packet_len; void *drv_data; }; int nci_uart_register(struct nci_uart *nu); void nci_uart_unregister(struct nci_uart *nu); void nci_uart_set_config(struct nci_uart *nu, int baudrate, int flow_ctrl); #endif /* __NCI_CORE_H */
643 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM fib6 #if !defined(_TRACE_FIB6_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_FIB6_H #include <linux/in6.h> #include <net/flow.h> #include <net/ip6_fib.h> #include <linux/tracepoint.h> TRACE_EVENT(fib6_table_lookup, TP_PROTO(const struct net *net, const struct fib6_result *res, struct fib6_table *table, const struct flowi6 *flp), TP_ARGS(net, res, table, flp), TP_STRUCT__entry( __field( u32, tb_id ) __field( int, err ) __field( int, oif ) __field( int, iif ) __field( u32, flowlabel ) __field( __u8, tos ) __field( __u8, scope ) __field( __u8, flags ) __array( __u8, src, 16 ) __array( __u8, dst, 16 ) __field( u16, sport ) __field( u16, dport ) __field( u8, proto ) __field( u8, rt_type ) __array( char, name, IFNAMSIZ ) __array( __u8, gw, 16 ) ), TP_fast_assign( struct in6_addr *in6; __entry->tb_id = table->tb6_id; __entry->err = ip6_rt_type_to_error(res->fib6_type); __entry->oif = flp->flowi6_oif; __entry->iif = flp->flowi6_iif; __entry->flowlabel = ntohl(flowi6_get_flowlabel(flp)); __entry->tos = ip6_tclass(flp->flowlabel); __entry->scope = flp->flowi6_scope; __entry->flags = flp->flowi6_flags; in6 = (struct in6_addr *)__entry->src; *in6 = flp->saddr; in6 = (struct in6_addr *)__entry->dst; *in6 = flp->daddr; __entry->proto = flp->flowi6_proto; if (__entry->proto == IPPROTO_TCP || __entry->proto == IPPROTO_UDP) { __entry->sport = ntohs(flp->fl6_sport); __entry->dport = ntohs(flp->fl6_dport); } else { __entry->sport = 0; __entry->dport = 0; } if (res->nh && res->nh->fib_nh_dev) { strscpy(__entry->name, res->nh->fib_nh_dev->name, IFNAMSIZ); } else { strcpy(__entry->name, "-"); } if (res->f6i == net->ipv6.fib6_null_entry) { in6 = (struct in6_addr *)__entry->gw; *in6 = in6addr_any; } else if (res->nh) { in6 = (struct in6_addr *)__entry->gw; *in6 = res->nh->fib_nh_gw6; } ), TP_printk("table %3u oif %d iif %d proto %u %pI6c/%u -> %pI6c/%u flowlabel %#x tos %d scope %d flags %x ==> dev %s gw %pI6c err %d", __entry->tb_id, __entry->oif, __entry->iif, __entry->proto, __entry->src, __entry->sport, __entry->dst, __entry->dport, __entry->flowlabel, __entry->tos, __entry->scope, __entry->flags, __entry->name, __entry->gw, __entry->err) ); #endif /* _TRACE_FIB6_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
2 4 1 4 4 2 4 2 4 1 4 1 1 4 5 3 4 4 3 3 3 2 2 1 2 1 1 1 4 4 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * (C) Copyright Red Hat Inc. 2017 * * This file is part of the SCTP kernel implementation * * These functions manipulate sctp stream queue/scheduling. * * Please send any bug reports or fixes you make to the * email addresched(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> */ #include <linux/list.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> #include <net/sctp/stream_sched.h> /* Priority handling * RFC DRAFT ndata section 3.2 */ static void sctp_sched_rr_unsched_all(struct sctp_stream *stream); static void sctp_sched_rr_next_stream(struct sctp_stream *stream) { struct list_head *pos; pos = stream->rr_next->rr_list.next; if (pos == &stream->rr_list) pos = pos->next; stream->rr_next = list_entry(pos, struct sctp_stream_out_ext, rr_list); } static void sctp_sched_rr_unsched(struct sctp_stream *stream, struct sctp_stream_out_ext *soute) { if (stream->rr_next == soute) /* Try to move to the next stream */ sctp_sched_rr_next_stream(stream); list_del_init(&soute->rr_list); /* If we have no other stream queued, clear next */ if (list_empty(&stream->rr_list)) stream->rr_next = NULL; } static void sctp_sched_rr_sched(struct sctp_stream *stream, struct sctp_stream_out_ext *soute) { if (!list_empty(&soute->rr_list)) /* Already scheduled. */ return; /* Schedule the stream */ list_add_tail(&soute->rr_list, &stream->rr_list); if (!stream->rr_next) stream->rr_next = soute; } static int sctp_sched_rr_set(struct sctp_stream *stream, __u16 sid, __u16 prio, gfp_t gfp) { return 0; } static int sctp_sched_rr_get(struct sctp_stream *stream, __u16 sid, __u16 *value) { return 0; } static int sctp_sched_rr_init(struct sctp_stream *stream) { INIT_LIST_HEAD(&stream->rr_list); stream->rr_next = NULL; return 0; } static int sctp_sched_rr_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp) { INIT_LIST_HEAD(&SCTP_SO(stream, sid)->ext->rr_list); return 0; } static void sctp_sched_rr_free_sid(struct sctp_stream *stream, __u16 sid) { } static void sctp_sched_rr_enqueue(struct sctp_outq *q, struct sctp_datamsg *msg) { struct sctp_stream *stream; struct sctp_chunk *ch; __u16 sid; ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list); sid = sctp_chunk_stream_no(ch); stream = &q->asoc->stream; sctp_sched_rr_sched(stream, SCTP_SO(stream, sid)->ext); } static struct sctp_chunk *sctp_sched_rr_dequeue(struct sctp_outq *q) { struct sctp_stream *stream = &q->asoc->stream; struct sctp_stream_out_ext *soute; struct sctp_chunk *ch = NULL; /* Bail out quickly if queue is empty */ if (list_empty(&q->out_chunk_list)) goto out; /* Find which chunk is next */ if (stream->out_curr) soute = stream->out_curr->ext; else soute = stream->rr_next; ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list); sctp_sched_dequeue_common(q, ch); out: return ch; } static void sctp_sched_rr_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch) { struct sctp_stream_out_ext *soute; __u16 sid; /* Last chunk on that msg, move to the next stream */ sid = sctp_chunk_stream_no(ch); soute = SCTP_SO(&q->asoc->stream, sid)->ext; sctp_sched_rr_next_stream(&q->asoc->stream); if (list_empty(&soute->outq)) sctp_sched_rr_unsched(&q->asoc->stream, soute); } static void sctp_sched_rr_sched_all(struct sctp_stream *stream) { struct sctp_association *asoc; struct sctp_stream_out_ext *soute; struct sctp_chunk *ch; asoc = container_of(stream, struct sctp_association, stream); list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) { __u16 sid; sid = sctp_chunk_stream_no(ch); soute = SCTP_SO(stream, sid)->ext; if (soute) sctp_sched_rr_sched(stream, soute); } } static void sctp_sched_rr_unsched_all(struct sctp_stream *stream) { struct sctp_stream_out_ext *soute, *tmp; list_for_each_entry_safe(soute, tmp, &stream->rr_list, rr_list) sctp_sched_rr_unsched(stream, soute); } static const struct sctp_sched_ops sctp_sched_rr = { .set = sctp_sched_rr_set, .get = sctp_sched_rr_get, .init = sctp_sched_rr_init, .init_sid = sctp_sched_rr_init_sid, .free_sid = sctp_sched_rr_free_sid, .enqueue = sctp_sched_rr_enqueue, .dequeue = sctp_sched_rr_dequeue, .dequeue_done = sctp_sched_rr_dequeue_done, .sched_all = sctp_sched_rr_sched_all, .unsched_all = sctp_sched_rr_unsched_all, }; void sctp_sched_ops_rr_init(void) { sctp_sched_ops_register(SCTP_SS_RR, &sctp_sched_rr); }
2 2 2 2 1 1 2 2 2 2 2 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 // SPDX-License-Identifier: GPL-2.0-only /* * Hauppauge HD PVR USB driver - video 4 linux 2 interface * * Copyright (C) 2008 Janne Grunau (j@jannau.net) */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/usb.h> #include <linux/mutex.h> #include <linux/videodev2.h> #include <media/v4l2-common.h> #include "hdpvr.h" int hdpvr_config_call(struct hdpvr_device *dev, uint value, u8 valbuf) { int ret; char request_type = 0x38, snd_request = 0x01; mutex_lock(&dev->usbc_mutex); dev->usbc_buf[0] = valbuf; ret = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), snd_request, 0x00 | request_type, value, CTRL_DEFAULT_INDEX, dev->usbc_buf, 1, 10000); mutex_unlock(&dev->usbc_mutex); v4l2_dbg(MSG_INFO, hdpvr_debug, &dev->v4l2_dev, "config call request for value 0x%x returned %d\n", value, ret); return ret < 0 ? ret : 0; } int get_video_info(struct hdpvr_device *dev, struct hdpvr_video_info *vidinf) { int ret; vidinf->valid = false; mutex_lock(&dev->usbc_mutex); ret = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0), 0x81, 0x80 | 0x38, 0x1400, 0x0003, dev->usbc_buf, 5, 1000); #ifdef HDPVR_DEBUG if (hdpvr_debug & MSG_INFO) v4l2_dbg(MSG_INFO, hdpvr_debug, &dev->v4l2_dev, "get video info returned: %d, %5ph\n", ret, dev->usbc_buf); #endif mutex_unlock(&dev->usbc_mutex); if (ret < 0) return ret; vidinf->width = dev->usbc_buf[1] << 8 | dev->usbc_buf[0]; vidinf->height = dev->usbc_buf[3] << 8 | dev->usbc_buf[2]; vidinf->fps = dev->usbc_buf[4]; vidinf->valid = vidinf->width && vidinf->height && vidinf->fps; return 0; } int get_input_lines_info(struct hdpvr_device *dev) { int ret, lines; mutex_lock(&dev->usbc_mutex); ret = usb_control_msg(dev->udev, usb_rcvctrlpipe(dev->udev, 0), 0x81, 0x80 | 0x38, 0x1800, 0x0003, dev->usbc_buf, 3, 1000); #ifdef HDPVR_DEBUG if (hdpvr_debug & MSG_INFO) v4l2_dbg(MSG_INFO, hdpvr_debug, &dev->v4l2_dev, "get input lines info returned: %d, %3ph\n", ret, dev->usbc_buf); #else (void)ret; /* suppress compiler warning */ #endif lines = dev->usbc_buf[1] << 8 | dev->usbc_buf[0]; mutex_unlock(&dev->usbc_mutex); return lines; } int hdpvr_set_bitrate(struct hdpvr_device *dev) { int ret; mutex_lock(&dev->usbc_mutex); memset(dev->usbc_buf, 0, 4); dev->usbc_buf[0] = dev->options.bitrate; dev->usbc_buf[2] = dev->options.peak_bitrate; ret = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), 0x01, 0x38, CTRL_BITRATE_VALUE, CTRL_DEFAULT_INDEX, dev->usbc_buf, 4, 1000); mutex_unlock(&dev->usbc_mutex); return ret; } int hdpvr_set_audio(struct hdpvr_device *dev, u8 input, enum v4l2_mpeg_audio_encoding codec) { int ret = 0; if (dev->flags & HDPVR_FLAG_AC3_CAP) { mutex_lock(&dev->usbc_mutex); memset(dev->usbc_buf, 0, 2); dev->usbc_buf[0] = input; if (codec == V4L2_MPEG_AUDIO_ENCODING_AAC) dev->usbc_buf[1] = 0; else if (codec == V4L2_MPEG_AUDIO_ENCODING_AC3) dev->usbc_buf[1] = 1; else { mutex_unlock(&dev->usbc_mutex); v4l2_err(&dev->v4l2_dev, "invalid audio codec %d\n", codec); ret = -EINVAL; goto error; } ret = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), 0x01, 0x38, CTRL_AUDIO_INPUT_VALUE, CTRL_DEFAULT_INDEX, dev->usbc_buf, 2, 1000); mutex_unlock(&dev->usbc_mutex); if (ret == 2) ret = 0; } else ret = hdpvr_config_call(dev, CTRL_AUDIO_INPUT_VALUE, input); error: return ret; } int hdpvr_set_options(struct hdpvr_device *dev) { hdpvr_config_call(dev, CTRL_VIDEO_STD_TYPE, dev->options.video_std); hdpvr_config_call(dev, CTRL_VIDEO_INPUT_VALUE, dev->options.video_input+1); hdpvr_set_audio(dev, dev->options.audio_input+1, dev->options.audio_codec); hdpvr_set_bitrate(dev); hdpvr_config_call(dev, CTRL_BITRATE_MODE_VALUE, dev->options.bitrate_mode); hdpvr_config_call(dev, CTRL_GOP_MODE_VALUE, dev->options.gop_mode); hdpvr_config_call(dev, CTRL_BRIGHTNESS, dev->options.brightness); hdpvr_config_call(dev, CTRL_CONTRAST, dev->options.contrast); hdpvr_config_call(dev, CTRL_HUE, dev->options.hue); hdpvr_config_call(dev, CTRL_SATURATION, dev->options.saturation); hdpvr_config_call(dev, CTRL_SHARPNESS, dev->options.sharpness); return 0; }
308 42 279 570 541 76 502 225 226 225 226 225 226 434 443 501 525 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __KVM_X86_MMU_H #define __KVM_X86_MMU_H #include <linux/kvm_host.h> #include "kvm_cache_regs.h" #include "x86.h" #include "cpuid.h" extern bool __read_mostly enable_mmio_caching; #define PT_WRITABLE_SHIFT 1 #define PT_USER_SHIFT 2 #define PT_PRESENT_MASK (1ULL << 0) #define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT) #define PT_USER_MASK (1ULL << PT_USER_SHIFT) #define PT_PWT_MASK (1ULL << 3) #define PT_PCD_MASK (1ULL << 4) #define PT_ACCESSED_SHIFT 5 #define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT) #define PT_DIRTY_SHIFT 6 #define PT_DIRTY_MASK (1ULL << PT_DIRTY_SHIFT) #define PT_PAGE_SIZE_SHIFT 7 #define PT_PAGE_SIZE_MASK (1ULL << PT_PAGE_SIZE_SHIFT) #define PT_PAT_MASK (1ULL << 7) #define PT_GLOBAL_MASK (1ULL << 8) #define PT64_NX_SHIFT 63 #define PT64_NX_MASK (1ULL << PT64_NX_SHIFT) #define PT_PAT_SHIFT 7 #define PT_DIR_PAT_SHIFT 12 #define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT) #define PT64_ROOT_5LEVEL 5 #define PT64_ROOT_4LEVEL 4 #define PT32_ROOT_LEVEL 2 #define PT32E_ROOT_LEVEL 3 #define KVM_MMU_CR4_ROLE_BITS (X86_CR4_PSE | X86_CR4_PAE | X86_CR4_LA57 | \ X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE) #define KVM_MMU_CR0_ROLE_BITS (X86_CR0_PG | X86_CR0_WP) #define KVM_MMU_EFER_ROLE_BITS (EFER_LME | EFER_NX) static __always_inline u64 rsvd_bits(int s, int e) { BUILD_BUG_ON(__builtin_constant_p(e) && __builtin_constant_p(s) && e < s); if (__builtin_constant_p(e)) BUILD_BUG_ON(e > 63); else e &= 63; if (e < s) return 0; return ((2ULL << (e - s)) - 1) << s; } static inline gfn_t kvm_mmu_max_gfn(void) { /* * Note that this uses the host MAXPHYADDR, not the guest's. * EPT/NPT cannot support GPAs that would exceed host.MAXPHYADDR; * assuming KVM is running on bare metal, guest accesses beyond * host.MAXPHYADDR will hit a #PF(RSVD) and never cause a vmexit * (either EPT Violation/Misconfig or #NPF), and so KVM will never * install a SPTE for such addresses. If KVM is running as a VM * itself, on the other hand, it might see a MAXPHYADDR that is less * than hardware's real MAXPHYADDR. Using the host MAXPHYADDR * disallows such SPTEs entirely and simplifies the TDP MMU. */ int max_gpa_bits = likely(tdp_enabled) ? kvm_host.maxphyaddr : 52; return (1ULL << (max_gpa_bits - PAGE_SHIFT)) - 1; } u8 kvm_mmu_get_max_tdp_level(void); void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask); void kvm_mmu_set_mmio_spte_value(struct kvm *kvm, u64 mmio_value); void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask); void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only); void kvm_init_mmu(struct kvm_vcpu *vcpu); void kvm_init_shadow_npt_mmu(struct kvm_vcpu *vcpu, unsigned long cr0, unsigned long cr4, u64 efer, gpa_t nested_cr3); void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, int huge_page_level, bool accessed_dirty, gpa_t new_eptp); bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu); int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, u64 fault_address, char *insn, int insn_len); void __kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu); int kvm_mmu_load(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu); void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu); void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu); void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes); static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu) { if (kvm_check_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu)) kvm_mmu_free_obsolete_roots(vcpu); /* * Checking root.hpa is sufficient even when KVM has mirror root. * We can have either: * (1) mirror_root_hpa = INVALID_PAGE, root.hpa = INVALID_PAGE * (2) mirror_root_hpa = root, root.hpa = INVALID_PAGE * (3) mirror_root_hpa = root1, root.hpa = root2 * We don't ever have: * mirror_root_hpa = INVALID_PAGE, root.hpa = root */ if (likely(vcpu->arch.mmu->root.hpa != INVALID_PAGE)) return 0; return kvm_mmu_load(vcpu); } static inline unsigned long kvm_get_pcid(struct kvm_vcpu *vcpu, gpa_t cr3) { BUILD_BUG_ON((X86_CR3_PCID_MASK & PAGE_MASK) != 0); return kvm_is_cr4_bit_set(vcpu, X86_CR4_PCIDE) ? cr3 & X86_CR3_PCID_MASK : 0; } static inline unsigned long kvm_get_active_pcid(struct kvm_vcpu *vcpu) { return kvm_get_pcid(vcpu, kvm_read_cr3(vcpu)); } static inline unsigned long kvm_get_active_cr3_lam_bits(struct kvm_vcpu *vcpu) { if (!guest_cpu_cap_has(vcpu, X86_FEATURE_LAM)) return 0; return kvm_read_cr3(vcpu) & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57); } static inline void kvm_mmu_load_pgd(struct kvm_vcpu *vcpu) { u64 root_hpa = vcpu->arch.mmu->root.hpa; if (!VALID_PAGE(root_hpa)) return; kvm_x86_call(load_mmu_pgd)(vcpu, root_hpa, vcpu->arch.mmu->root_role.level); } static inline void kvm_mmu_refresh_passthrough_bits(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) { /* * When EPT is enabled, KVM may passthrough CR0.WP to the guest, i.e. * @mmu's snapshot of CR0.WP and thus all related paging metadata may * be stale. Refresh CR0.WP and the metadata on-demand when checking * for permission faults. Exempt nested MMUs, i.e. MMUs for shadowing * nEPT and nNPT, as CR0.WP is ignored in both cases. Note, KVM does * need to refresh nested_mmu, a.k.a. the walker used to translate L2 * GVAs to GPAs, as that "MMU" needs to honor L2's CR0.WP. */ if (!tdp_enabled || mmu == &vcpu->arch.guest_mmu) return; __kvm_mmu_refresh_passthrough_bits(vcpu, mmu); } /* * Check if a given access (described through the I/D, W/R and U/S bits of a * page fault error code pfec) causes a permission fault with the given PTE * access rights (in ACC_* format). * * Return zero if the access does not fault; return the page fault error code * if the access faults. */ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned pte_access, unsigned pte_pkey, u64 access) { /* strip nested paging fault error codes */ unsigned int pfec = access; unsigned long rflags = kvm_x86_call(get_rflags)(vcpu); /* * For explicit supervisor accesses, SMAP is disabled if EFLAGS.AC = 1. * For implicit supervisor accesses, SMAP cannot be overridden. * * SMAP works on supervisor accesses only, and not_smap can * be set or not set when user access with neither has any bearing * on the result. * * We put the SMAP checking bit in place of the PFERR_RSVD_MASK bit; * this bit will always be zero in pfec, but it will be one in index * if SMAP checks are being disabled. */ u64 implicit_access = access & PFERR_IMPLICIT_ACCESS; bool not_smap = ((rflags & X86_EFLAGS_AC) | implicit_access) == X86_EFLAGS_AC; int index = (pfec | (not_smap ? PFERR_RSVD_MASK : 0)) >> 1; u32 errcode = PFERR_PRESENT_MASK; bool fault; kvm_mmu_refresh_passthrough_bits(vcpu, mmu); fault = (mmu->permissions[index] >> pte_access) & 1; WARN_ON_ONCE(pfec & (PFERR_PK_MASK | PFERR_SS_MASK | PFERR_RSVD_MASK)); if (unlikely(mmu->pkru_mask)) { u32 pkru_bits, offset; /* * PKRU defines 32 bits, there are 16 domains and 2 * attribute bits per domain in pkru. pte_pkey is the * index of the protection domain, so pte_pkey * 2 is * is the index of the first bit for the domain. */ pkru_bits = (vcpu->arch.pkru >> (pte_pkey * 2)) & 3; /* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */ offset = (pfec & ~1) | ((pte_access & PT_USER_MASK) ? PFERR_RSVD_MASK : 0); pkru_bits &= mmu->pkru_mask >> offset; errcode |= -pkru_bits & PFERR_PK_MASK; fault |= (pkru_bits != 0); } return -(u32)fault & errcode; } int kvm_mmu_post_init_vm(struct kvm *kvm); void kvm_mmu_pre_destroy_vm(struct kvm *kvm); static inline bool kvm_shadow_root_allocated(struct kvm *kvm) { /* * Read shadow_root_allocated before related pointers. Hence, threads * reading shadow_root_allocated in any lock context are guaranteed to * see the pointers. Pairs with smp_store_release in * mmu_first_shadow_root_alloc. */ return smp_load_acquire(&kvm->arch.shadow_root_allocated); } #ifdef CONFIG_X86_64 extern bool tdp_mmu_enabled; #else #define tdp_mmu_enabled false #endif int kvm_tdp_mmu_map_private_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn); static inline bool kvm_memslots_have_rmaps(struct kvm *kvm) { return !tdp_mmu_enabled || kvm_shadow_root_allocated(kvm); } static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) { /* KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K) must be 0. */ return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); } static inline unsigned long __kvm_mmu_slot_lpages(struct kvm_memory_slot *slot, unsigned long npages, int level) { return gfn_to_index(slot->base_gfn + npages - 1, slot->base_gfn, level) + 1; } static inline unsigned long kvm_mmu_slot_lpages(struct kvm_memory_slot *slot, int level) { return __kvm_mmu_slot_lpages(slot, slot->npages, level); } static inline void kvm_update_page_stats(struct kvm *kvm, int level, int count) { atomic64_add(count, &kvm->stat.pages[level - 1]); } gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u64 access, struct x86_exception *exception); static inline gpa_t kvm_translate_gpa(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gpa_t gpa, u64 access, struct x86_exception *exception) { if (mmu != &vcpu->arch.nested_mmu) return gpa; return translate_nested_gpa(vcpu, gpa, access, exception); } static inline bool kvm_has_mirrored_tdp(const struct kvm *kvm) { return kvm->arch.vm_type == KVM_X86_TDX_VM; } static inline gfn_t kvm_gfn_direct_bits(const struct kvm *kvm) { return kvm->arch.gfn_direct_bits; } static inline bool kvm_is_addr_direct(struct kvm *kvm, gpa_t gpa) { gpa_t gpa_direct_bits = gfn_to_gpa(kvm_gfn_direct_bits(kvm)); return !gpa_direct_bits || (gpa & gpa_direct_bits); } static inline bool kvm_is_gfn_alias(struct kvm *kvm, gfn_t gfn) { return gfn & kvm_gfn_direct_bits(kvm); } #endif
26 27 2 2 2 2 2 1 2 2 2 2 2 3 3 3 3 3 3 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 4 2 1 1 6 2 1 1 1 1 1 1 1 1 1 6 1 2 1 2 1 4 2 1 1 1 4 2 1 1 1 2 1 2 2 14 14 2 2 2 4 3 2 1 14 6 2 6 3 2 4 1 1 1 3 1 4 2 2 2 2 2 3 3 2 2 2 2 2 1 8 21 2 2 2 3 8 14 1 3 2 1 10 2 1 2 1 4 3 1 1 3 2 1 1 1 2 1 1 7 1 3 1 2 5 2 3 1 2 11 3 1 7 6 5 2 10 2 2 2 1 1 3 1 4 2 1 1 1 1 7 1 1 2 1 2 1 14 1 4 1 7 1 5 2 1 1 1 7 3 1 3 1 1 4 2 2 4 2 1 1 4 4 5 5 5 4 4 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 /* * Copyright (c) 2017 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL") version 2 as published by the Free * Software Foundation. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <linux/module.h> #include <linux/pid.h> #include <linux/pid_namespace.h> #include <linux/mutex.h> #include <net/netlink.h> #include <rdma/rdma_cm.h> #include <rdma/rdma_netlink.h> #include "core_priv.h" #include "cma_priv.h" #include "restrack.h" #include "uverbs.h" /* * This determines whether a non-privileged user is allowed to specify a * controlled QKEY or not, when true non-privileged user is allowed to specify * a controlled QKEY. */ static bool privileged_qkey; typedef int (*res_fill_func_t)(struct sk_buff*, bool, struct rdma_restrack_entry*, uint32_t); /* * Sort array elements by the netlink attribute name */ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = { [RDMA_NLDEV_ATTR_CHARDEV] = { .type = NLA_U64 }, [RDMA_NLDEV_ATTR_CHARDEV_ABI] = { .type = NLA_U64 }, [RDMA_NLDEV_ATTR_CHARDEV_NAME] = { .type = NLA_NUL_STRING, .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, [RDMA_NLDEV_ATTR_CHARDEV_TYPE] = { .type = NLA_NUL_STRING, .len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE }, [RDMA_NLDEV_ATTR_DEV_DIM] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, .len = IB_DEVICE_NAME_MAX }, [RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING, .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, [RDMA_NLDEV_ATTR_DRIVER] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_DRIVER_ENTRY] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_DRIVER_STRING] = { .type = NLA_NUL_STRING, .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, [RDMA_NLDEV_ATTR_DRIVER_S32] = { .type = NLA_S32 }, [RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 }, [RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 }, [RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING, .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, [RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ }, [RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ }, [RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 }, [RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_RES_CM_ID] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_RES_CM_IDN] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_RES_CQ] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_CQN] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_RES_CTX] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_CTX_ENTRY] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_RES_DST_ADDR] = { .len = sizeof(struct __kernel_sockaddr_storage) }, [RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 }, [RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING, .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, [RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_LQPN] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_RES_MRLEN] = { .type = NLA_U64 }, [RDMA_NLDEV_ATTR_RES_MRN] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_RES_PD] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_RES_PDN] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_PD_ENTRY] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_RES_RAW] = { .type = NLA_BINARY }, [RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_SQ_PSN] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_SRC_ADDR] = { .len = sizeof(struct __kernel_sockaddr_storage) }, [RDMA_NLDEV_ATTR_RES_STATE] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_RES_SUMMARY] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 }, [RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING, .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, [RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_RES_SUBTYPE] = { .type = NLA_NUL_STRING, .len = RDMA_NLDEV_ATTR_EMPTY_STRING }, [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 }, [RDMA_NLDEV_ATTR_RES_SRQ] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_RES_SRQN] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_RES_SRQ_ENTRY] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_MIN_RANGE] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_MAX_RANGE] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 }, [RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_STAT_MODE] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_STAT_RES] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_STAT_COUNTER] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_STAT_COUNTER_ID] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_STAT_HWCOUNTERS] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY] = { .type = NLA_NESTED }, [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING }, [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 }, [RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 }, [RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID] = { .type = NLA_U32 }, [RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 }, [RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 }, [RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX] = { .type = NLA_U32 }, [RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 }, [RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_DRIVER_DETAILS] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_DEV_TYPE] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_PARENT_NAME] = { .type = NLA_NUL_STRING }, [RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_EVENT_TYPE] = { .type = NLA_U8 }, [RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED] = { .type = NLA_U8 }, }; static int put_driver_name_print_type(struct sk_buff *msg, const char *name, enum rdma_nldev_print_type print_type) { if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name)) return -EMSGSIZE; if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC && nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type)) return -EMSGSIZE; return 0; } static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, enum rdma_nldev_print_type print_type, u32 value) { if (put_driver_name_print_type(msg, name, print_type)) return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value)) return -EMSGSIZE; return 0; } static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, enum rdma_nldev_print_type print_type, u64 value) { if (put_driver_name_print_type(msg, name, print_type)) return -EMSGSIZE; if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value, RDMA_NLDEV_ATTR_PAD)) return -EMSGSIZE; return 0; } int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name, const char *str) { if (put_driver_name_print_type(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC)) return -EMSGSIZE; if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str)) return -EMSGSIZE; return 0; } EXPORT_SYMBOL(rdma_nl_put_driver_string); int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value) { return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC, value); } EXPORT_SYMBOL(rdma_nl_put_driver_u32); int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name, u32 value) { return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX, value); } EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex); int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value) { return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC, value); } EXPORT_SYMBOL(rdma_nl_put_driver_u64); int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value) { return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX, value); } EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex); bool rdma_nl_get_privileged_qkey(void) { return privileged_qkey; } EXPORT_SYMBOL(rdma_nl_get_privileged_qkey); static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device) { if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index)) return -EMSGSIZE; if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, dev_name(&device->dev))) return -EMSGSIZE; return 0; } static int fill_dev_info(struct sk_buff *msg, struct ib_device *device) { char fw[IB_FW_VERSION_NAME_MAX]; int ret = 0; u32 port; if (fill_nldev_handle(msg, device)) return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device))) return -EMSGSIZE; BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64)); if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, device->attrs.device_cap_flags, RDMA_NLDEV_ATTR_PAD)) return -EMSGSIZE; ib_get_device_fw_str(device, fw); /* Device without FW has strlen(fw) = 0 */ if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw)) return -EMSGSIZE; if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID, be64_to_cpu(device->node_guid), RDMA_NLDEV_ATTR_PAD)) return -EMSGSIZE; if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID, be64_to_cpu(device->attrs.sys_image_guid), RDMA_NLDEV_ATTR_PAD)) return -EMSGSIZE; if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type)) return -EMSGSIZE; if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim)) return -EMSGSIZE; if (device->type && nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_TYPE, device->type)) return -EMSGSIZE; if (device->parent && nla_put_string(msg, RDMA_NLDEV_ATTR_PARENT_NAME, dev_name(&device->parent->dev))) return -EMSGSIZE; if (nla_put_u8(msg, RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE, device->name_assign_type)) return -EMSGSIZE; /* * Link type is determined on first port and mlx4 device * which can potentially have two different link type for the same * IB device is considered as better to be avoided in the future, */ port = rdma_start_port(device); if (rdma_cap_opa_mad(device, port)) ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa"); else if (rdma_protocol_ib(device, port)) ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib"); else if (rdma_protocol_iwarp(device, port)) ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw"); else if (rdma_protocol_roce(device, port)) ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce"); else if (rdma_protocol_usnic(device, port)) ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "usnic"); return ret; } static int fill_port_info(struct sk_buff *msg, struct ib_device *device, u32 port, const struct net *net) { struct net_device *netdev = NULL; struct ib_port_attr attr; int ret; u64 cap_flags = 0; if (fill_nldev_handle(msg, device)) return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) return -EMSGSIZE; ret = ib_query_port(device, port, &attr); if (ret) return ret; if (rdma_protocol_ib(device, port)) { BUILD_BUG_ON((sizeof(attr.port_cap_flags) + sizeof(attr.port_cap_flags2)) > sizeof(u64)); cap_flags = attr.port_cap_flags | ((u64)attr.port_cap_flags2 << 32); if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS, cap_flags, RDMA_NLDEV_ATTR_PAD)) return -EMSGSIZE; if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX, attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD)) return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid)) return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid)) return -EMSGSIZE; if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc)) return -EMSGSIZE; } if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state)) return -EMSGSIZE; if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state)) return -EMSGSIZE; netdev = ib_device_get_netdev(device, port); if (netdev && net_eq(dev_net(netdev), net)) { ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex); if (ret) goto out; ret = nla_put_string(msg, RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name); } out: dev_put(netdev); return ret; } static int fill_res_info_entry(struct sk_buff *msg, const char *name, u64 curr) { struct nlattr *entry_attr; entry_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY); if (!entry_attr) return -EMSGSIZE; if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name)) goto err; if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr, RDMA_NLDEV_ATTR_PAD)) goto err; nla_nest_end(msg, entry_attr); return 0; err: nla_nest_cancel(msg, entry_attr); return -EMSGSIZE; } static int fill_res_info(struct sk_buff *msg, struct ib_device *device, bool show_details) { static const char * const names[RDMA_RESTRACK_MAX] = { [RDMA_RESTRACK_PD] = "pd", [RDMA_RESTRACK_CQ] = "cq", [RDMA_RESTRACK_QP] = "qp", [RDMA_RESTRACK_CM_ID] = "cm_id", [RDMA_RESTRACK_MR] = "mr", [RDMA_RESTRACK_CTX] = "ctx", [RDMA_RESTRACK_SRQ] = "srq", }; struct nlattr *table_attr; int ret, i, curr; if (fill_nldev_handle(msg, device)) return -EMSGSIZE; table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY); if (!table_attr) return -EMSGSIZE; for (i = 0; i < RDMA_RESTRACK_MAX; i++) { if (!names[i]) continue; curr = rdma_restrack_count(device, i, show_details); ret = fill_res_info_entry(msg, names[i], curr); if (ret) goto err; } nla_nest_end(msg, table_attr); return 0; err: nla_nest_cancel(msg, table_attr); return ret; } static int fill_res_name_pid(struct sk_buff *msg, struct rdma_restrack_entry *res) { int err = 0; /* * For user resources, user is should read /proc/PID/comm to get the * name of the task file. */ if (rdma_is_kernel_res(res)) { err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, res->kern_name); } else { pid_t pid; pid = task_pid_vnr(res->task); /* * Task is dead and in zombie state. * There is no need to print PID anymore. */ if (pid) /* * This part is racy, task can be killed and PID will * be zero right here but it is ok, next query won't * return PID. We don't promise real-time reflection * of SW objects. */ err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid); } return err ? -EMSGSIZE : 0; } static int fill_res_qp_entry_query(struct sk_buff *msg, struct rdma_restrack_entry *res, struct ib_device *dev, struct ib_qp *qp) { struct ib_qp_init_attr qp_init_attr; struct ib_qp_attr qp_attr; int ret; ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr); if (ret) return ret; if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) { if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN, qp_attr.dest_qp_num)) goto err; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN, qp_attr.rq_psn)) goto err; } if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn)) goto err; if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC || qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) { if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE, qp_attr.path_mig_state)) goto err; } if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type)) goto err; if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state)) goto err; if (dev->ops.fill_res_qp_entry) return dev->ops.fill_res_qp_entry(msg, qp); return 0; err: return -EMSGSIZE; } static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_qp *qp = container_of(res, struct ib_qp, res); struct ib_device *dev = qp->device; int ret; if (port && port != qp->port) return -EAGAIN; /* In create_qp() port is not set yet */ if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port)) return -EMSGSIZE; ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num); if (ret) return -EMSGSIZE; if (!rdma_is_kernel_res(res) && nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id)) return -EMSGSIZE; ret = fill_res_name_pid(msg, res); if (ret) return -EMSGSIZE; return fill_res_qp_entry_query(msg, res, dev, qp); } static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_qp *qp = container_of(res, struct ib_qp, res); struct ib_device *dev = qp->device; if (port && port != qp->port) return -EAGAIN; if (!dev->ops.fill_res_qp_entry_raw) return -EINVAL; return dev->ops.fill_res_qp_entry_raw(msg, qp); } static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct rdma_id_private *id_priv = container_of(res, struct rdma_id_private, res); struct ib_device *dev = id_priv->id.device; struct rdma_cm_id *cm_id = &id_priv->id; if (port && port != cm_id->port_num) return -EAGAIN; if (cm_id->port_num && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num)) goto err; if (id_priv->qp_num) { if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num)) goto err; if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type)) goto err; } if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps)) goto err; if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state)) goto err; if (cm_id->route.addr.src_addr.ss_family && nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR, sizeof(cm_id->route.addr.src_addr), &cm_id->route.addr.src_addr)) goto err; if (cm_id->route.addr.dst_addr.ss_family && nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR, sizeof(cm_id->route.addr.dst_addr), &cm_id->route.addr.dst_addr)) goto err; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id)) goto err; if (fill_res_name_pid(msg, res)) goto err; if (dev->ops.fill_res_cm_id_entry) return dev->ops.fill_res_cm_id_entry(msg, cm_id); return 0; err: return -EMSGSIZE; } static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_cq *cq = container_of(res, struct ib_cq, res); struct ib_device *dev = cq->device; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe)) return -EMSGSIZE; if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT, atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD)) return -EMSGSIZE; /* Poll context is only valid for kernel CQs */ if (rdma_is_kernel_res(res) && nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx)) return -EMSGSIZE; if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL))) return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id)) return -EMSGSIZE; if (!rdma_is_kernel_res(res) && nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, cq->uobject->uevent.uobject.context->res.id)) return -EMSGSIZE; if (fill_res_name_pid(msg, res)) return -EMSGSIZE; return (dev->ops.fill_res_cq_entry) ? dev->ops.fill_res_cq_entry(msg, cq) : 0; } static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_cq *cq = container_of(res, struct ib_cq, res); struct ib_device *dev = cq->device; if (!dev->ops.fill_res_cq_entry_raw) return -EINVAL; return dev->ops.fill_res_cq_entry_raw(msg, cq); } static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_mr *mr = container_of(res, struct ib_mr, res); struct ib_device *dev = mr->pd->device; if (has_cap_net_admin) { if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey)) return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey)) return -EMSGSIZE; } if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, RDMA_NLDEV_ATTR_PAD)) return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id)) return -EMSGSIZE; if (!rdma_is_kernel_res(res) && nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id)) return -EMSGSIZE; if (fill_res_name_pid(msg, res)) return -EMSGSIZE; return (dev->ops.fill_res_mr_entry) ? dev->ops.fill_res_mr_entry(msg, mr) : 0; } static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_mr *mr = container_of(res, struct ib_mr, res); struct ib_device *dev = mr->pd->device; if (!dev->ops.fill_res_mr_entry_raw) return -EINVAL; return dev->ops.fill_res_mr_entry_raw(msg, mr); } static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_pd *pd = container_of(res, struct ib_pd, res); if (has_cap_net_admin) { if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY, pd->local_dma_lkey)) goto err; if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) && nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY, pd->unsafe_global_rkey)) goto err; } if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT, atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD)) goto err; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id)) goto err; if (!rdma_is_kernel_res(res) && nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, pd->uobject->context->res.id)) goto err; return fill_res_name_pid(msg, res); err: return -EMSGSIZE; } static int fill_res_ctx_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_ucontext *ctx = container_of(res, struct ib_ucontext, res); if (rdma_is_kernel_res(res)) return 0; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, ctx->res.id)) return -EMSGSIZE; return fill_res_name_pid(msg, res); } static int fill_res_range_qp_entry(struct sk_buff *msg, uint32_t min_range, uint32_t max_range) { struct nlattr *entry_attr; if (!min_range) return 0; entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY); if (!entry_attr) return -EMSGSIZE; if (min_range == max_range) { if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, min_range)) goto err; } else { if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MIN_RANGE, min_range)) goto err; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MAX_RANGE, max_range)) goto err; } nla_nest_end(msg, entry_attr); return 0; err: nla_nest_cancel(msg, entry_attr); return -EMSGSIZE; } static int fill_res_srq_qps(struct sk_buff *msg, struct ib_srq *srq) { uint32_t min_range = 0, prev = 0; struct rdma_restrack_entry *res; struct rdma_restrack_root *rt; struct nlattr *table_attr; struct ib_qp *qp = NULL; unsigned long id = 0; table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP); if (!table_attr) return -EMSGSIZE; rt = &srq->device->res[RDMA_RESTRACK_QP]; xa_lock(&rt->xa); xa_for_each(&rt->xa, id, res) { if (!rdma_restrack_get(res)) continue; qp = container_of(res, struct ib_qp, res); if (!qp->srq || (qp->srq->res.id != srq->res.id)) { rdma_restrack_put(res); continue; } if (qp->qp_num < prev) /* qp_num should be ascending */ goto err_loop; if (min_range == 0) { min_range = qp->qp_num; } else if (qp->qp_num > (prev + 1)) { if (fill_res_range_qp_entry(msg, min_range, prev)) goto err_loop; min_range = qp->qp_num; } prev = qp->qp_num; rdma_restrack_put(res); } xa_unlock(&rt->xa); if (fill_res_range_qp_entry(msg, min_range, prev)) goto err; nla_nest_end(msg, table_attr); return 0; err_loop: rdma_restrack_put(res); xa_unlock(&rt->xa); err: nla_nest_cancel(msg, table_attr); return -EMSGSIZE; } static int fill_res_srq_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_srq *srq = container_of(res, struct ib_srq, res); struct ib_device *dev = srq->device; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SRQN, srq->res.id)) goto err; if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, srq->srq_type)) goto err; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, srq->pd->res.id)) goto err; if (ib_srq_has_cq(srq->srq_type)) { if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, srq->ext.cq->res.id)) goto err; } if (fill_res_srq_qps(msg, srq)) goto err; if (fill_res_name_pid(msg, res)) goto err; if (dev->ops.fill_res_srq_entry) return dev->ops.fill_res_srq_entry(msg, srq); return 0; err: return -EMSGSIZE; } static int fill_res_srq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_srq *srq = container_of(res, struct ib_srq, res); struct ib_device *dev = srq->device; if (!dev->ops.fill_res_srq_entry_raw) return -EINVAL; return dev->ops.fill_res_srq_entry_raw(msg, srq); } static int fill_stat_counter_mode(struct sk_buff *msg, struct rdma_counter *counter) { struct rdma_counter_mode *m = &counter->mode; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode)) return -EMSGSIZE; if (m->mode == RDMA_COUNTER_MODE_AUTO) { if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) && nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type)) return -EMSGSIZE; if ((m->mask & RDMA_COUNTER_MASK_PID) && fill_res_name_pid(msg, &counter->res)) return -EMSGSIZE; } return 0; } static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn) { struct nlattr *entry_attr; entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY); if (!entry_attr) return -EMSGSIZE; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) goto err; nla_nest_end(msg, entry_attr); return 0; err: nla_nest_cancel(msg, entry_attr); return -EMSGSIZE; } static int fill_stat_counter_qps(struct sk_buff *msg, struct rdma_counter *counter) { struct rdma_restrack_entry *res; struct rdma_restrack_root *rt; struct nlattr *table_attr; struct ib_qp *qp = NULL; unsigned long id = 0; int ret = 0; table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP); if (!table_attr) return -EMSGSIZE; rt = &counter->device->res[RDMA_RESTRACK_QP]; xa_lock(&rt->xa); xa_for_each(&rt->xa, id, res) { qp = container_of(res, struct ib_qp, res); if (!qp->counter || (qp->counter->id != counter->id)) continue; ret = fill_stat_counter_qp_entry(msg, qp->qp_num); if (ret) goto err; } xa_unlock(&rt->xa); nla_nest_end(msg, table_attr); return 0; err: xa_unlock(&rt->xa); nla_nest_cancel(msg, table_attr); return ret; } int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name, u64 value) { struct nlattr *entry_attr; entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY); if (!entry_attr) return -EMSGSIZE; if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME, name)) goto err; if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE, value, RDMA_NLDEV_ATTR_PAD)) goto err; nla_nest_end(msg, entry_attr); return 0; err: nla_nest_cancel(msg, entry_attr); return -EMSGSIZE; } EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry); static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct ib_mr *mr = container_of(res, struct ib_mr, res); struct ib_device *dev = mr->pd->device; if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id)) goto err; if (dev->ops.fill_stat_mr_entry) return dev->ops.fill_stat_mr_entry(msg, mr); return 0; err: return -EMSGSIZE; } static int fill_stat_counter_hwcounters(struct sk_buff *msg, struct rdma_counter *counter) { struct rdma_hw_stats *st = counter->stats; struct nlattr *table_attr; int i; table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); if (!table_attr) return -EMSGSIZE; mutex_lock(&st->lock); for (i = 0; i < st->num_counters; i++) { if (test_bit(i, st->is_disabled)) continue; if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name, st->value[i])) goto err; } mutex_unlock(&st->lock); nla_nest_end(msg, table_attr); return 0; err: mutex_unlock(&st->lock); nla_nest_cancel(msg, table_attr); return -EMSGSIZE; } static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_restrack_entry *res, uint32_t port) { struct rdma_counter *counter = container_of(res, struct rdma_counter, res); if (port && port != counter->port) return -EAGAIN; /* Dump it even query failed */ rdma_counter_query_stats(counter); if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) || nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) || fill_stat_counter_mode(msg, counter) || fill_stat_counter_qps(msg, counter) || fill_stat_counter_hwcounters(msg, counter)) return -EMSGSIZE; return 0; } static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; struct ib_device *device; struct sk_buff *msg; u32 index; int err; err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, NL_VALIDATE_LIBERAL, extack); if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { err = -ENOMEM; goto err; } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 0, 0); if (!nlh) { err = -EMSGSIZE; goto err_free; } err = fill_dev_info(msg, device); if (err) goto err_free; nlmsg_end(msg, nlh); ib_device_put(device); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_free: nlmsg_free(msg); err: ib_device_put(device); return err; } static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; struct ib_device *device; u32 index; int err; err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, extack); if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) { char name[IB_DEVICE_NAME_MAX] = {}; nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], IB_DEVICE_NAME_MAX); if (strlen(name) == 0) { err = -EINVAL; goto done; } err = ib_device_rename(device, name); goto done; } if (tb[RDMA_NLDEV_NET_NS_FD]) { u32 ns_fd; ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]); err = ib_device_set_netns_put(skb, device, ns_fd); goto put_done; } if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) { u8 use_dim; use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]); err = ib_device_set_dim(device, use_dim); goto done; } done: ib_device_put(device); put_done: return err; } static int _nldev_get_dumpit(struct ib_device *device, struct sk_buff *skb, struct netlink_callback *cb, unsigned int idx) { int start = cb->args[0]; struct nlmsghdr *nlh; if (idx < start) return 0; nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 0, NLM_F_MULTI); if (!nlh || fill_dev_info(skb, device)) { nlmsg_cancel(skb, nlh); goto out; } nlmsg_end(skb, nlh); idx++; out: cb->args[0] = idx; return skb->len; } static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { /* * There is no need to take lock, because * we are relying on ib_core's locking. */ return ib_enum_all_devs(_nldev_get_dumpit, skb, cb); } static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; struct ib_device *device; struct sk_buff *msg; u32 index; u32 port; int err; err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, NL_VALIDATE_LIBERAL, extack); if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); if (!rdma_is_port_valid(device, port)) { err = -EINVAL; goto err; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { err = -ENOMEM; goto err; } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 0, 0); if (!nlh) { err = -EMSGSIZE; goto err_free; } err = fill_port_info(msg, device, port, sock_net(skb->sk)); if (err) goto err_free; nlmsg_end(msg, nlh); ib_device_put(device); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_free: nlmsg_free(msg); err: ib_device_put(device); return err; } static int nldev_port_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; struct ib_device *device; int start = cb->args[0]; struct nlmsghdr *nlh; u32 idx = 0; u32 ifindex; int err; unsigned int p; err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, NL_VALIDATE_LIBERAL, NULL); if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) return -EINVAL; ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); device = ib_device_get_by_index(sock_net(skb->sk), ifindex); if (!device) return -EINVAL; rdma_for_each_port (device, p) { /* * The dumpit function returns all information from specific * index. This specific index is taken from the netlink * messages request sent by user and it is available * in cb->args[0]. * * Usually, the user doesn't fill this field and it causes * to return everything. * */ if (idx < start) { idx++; continue; } nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_PORT_GET), 0, NLM_F_MULTI); if (!nlh || fill_port_info(skb, device, p, sock_net(skb->sk))) { nlmsg_cancel(skb, nlh); goto out; } idx++; nlmsg_end(skb, nlh); } out: ib_device_put(device); cb->args[0] = idx; return skb->len; } static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; bool show_details = false; struct ib_device *device; struct sk_buff *msg; u32 index; int ret; ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, NL_VALIDATE_LIBERAL, extack); if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]) show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; goto err; } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET), 0, 0); if (!nlh) { ret = -EMSGSIZE; goto err_free; } ret = fill_res_info(msg, device, show_details); if (ret) goto err_free; nlmsg_end(msg, nlh); ib_device_put(device); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_free: nlmsg_free(msg); err: ib_device_put(device); return ret; } static int _nldev_res_get_dumpit(struct ib_device *device, struct sk_buff *skb, struct netlink_callback *cb, unsigned int idx) { int start = cb->args[0]; struct nlmsghdr *nlh; if (idx < start) return 0; nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET), 0, NLM_F_MULTI); if (!nlh || fill_res_info(skb, device, false)) { nlmsg_cancel(skb, nlh); goto out; } nlmsg_end(skb, nlh); idx++; out: cb->args[0] = idx; return skb->len; } static int nldev_res_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb); } struct nldev_fill_res_entry { enum rdma_nldev_attr nldev_attr; u8 flags; u32 entry; u32 id; }; enum nldev_res_flags { NLDEV_PER_DEV = 1 << 0, }; static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = { [RDMA_RESTRACK_QP] = { .nldev_attr = RDMA_NLDEV_ATTR_RES_QP, .entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY, .id = RDMA_NLDEV_ATTR_RES_LQPN, }, [RDMA_RESTRACK_CM_ID] = { .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID, .entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY, .id = RDMA_NLDEV_ATTR_RES_CM_IDN, }, [RDMA_RESTRACK_CQ] = { .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ, .flags = NLDEV_PER_DEV, .entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY, .id = RDMA_NLDEV_ATTR_RES_CQN, }, [RDMA_RESTRACK_MR] = { .nldev_attr = RDMA_NLDEV_ATTR_RES_MR, .flags = NLDEV_PER_DEV, .entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY, .id = RDMA_NLDEV_ATTR_RES_MRN, }, [RDMA_RESTRACK_PD] = { .nldev_attr = RDMA_NLDEV_ATTR_RES_PD, .flags = NLDEV_PER_DEV, .entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY, .id = RDMA_NLDEV_ATTR_RES_PDN, }, [RDMA_RESTRACK_COUNTER] = { .nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER, .entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY, .id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID, }, [RDMA_RESTRACK_CTX] = { .nldev_attr = RDMA_NLDEV_ATTR_RES_CTX, .flags = NLDEV_PER_DEV, .entry = RDMA_NLDEV_ATTR_RES_CTX_ENTRY, .id = RDMA_NLDEV_ATTR_RES_CTXN, }, [RDMA_RESTRACK_SRQ] = { .nldev_attr = RDMA_NLDEV_ATTR_RES_SRQ, .flags = NLDEV_PER_DEV, .entry = RDMA_NLDEV_ATTR_RES_SRQ_ENTRY, .id = RDMA_NLDEV_ATTR_RES_SRQN, }, }; static noinline_for_stack int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack, enum rdma_restrack_type res_type, res_fill_func_t fill_func) { const struct nldev_fill_res_entry *fe = &fill_entries[res_type]; struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; struct rdma_restrack_entry *res; struct ib_device *device; u32 index, id, port = 0; bool has_cap_net_admin; struct sk_buff *msg; int ret; ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, NL_VALIDATE_LIBERAL, extack); if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id]) return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); if (!rdma_is_port_valid(device, port)) { ret = -EINVAL; goto err; } } if ((port && fe->flags & NLDEV_PER_DEV) || (!port && ~fe->flags & NLDEV_PER_DEV)) { ret = -EINVAL; goto err; } id = nla_get_u32(tb[fe->id]); res = rdma_restrack_get_byid(device, res_type, id); if (IS_ERR(res)) { ret = PTR_ERR(res); goto err; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; goto err_get; } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NL_GET_OP(nlh->nlmsg_type)), 0, 0); if (!nlh || fill_nldev_handle(msg, device)) { ret = -EMSGSIZE; goto err_free; } has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN); ret = fill_func(msg, has_cap_net_admin, res, port); if (ret) goto err_free; rdma_restrack_put(res); nlmsg_end(msg, nlh); ib_device_put(device); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_free: nlmsg_free(msg); err_get: rdma_restrack_put(res); err: ib_device_put(device); return ret; } static int res_get_common_dumpit(struct sk_buff *skb, struct netlink_callback *cb, enum rdma_restrack_type res_type, res_fill_func_t fill_func) { const struct nldev_fill_res_entry *fe = &fill_entries[res_type]; struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; struct rdma_restrack_entry *res; struct rdma_restrack_root *rt; int err, ret = 0, idx = 0; bool show_details = false; struct nlattr *table_attr; struct nlattr *entry_attr; struct ib_device *device; int start = cb->args[0]; bool has_cap_net_admin; struct nlmsghdr *nlh; unsigned long id; u32 index, port = 0; bool filled = false; err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, NL_VALIDATE_LIBERAL, NULL); /* * Right now, we are expecting the device index to get res information, * but it is possible to extend this code to return all devices in * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX. * if it doesn't exist, we will iterate over all devices. * * But it is not needed for now. */ if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]) show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]); /* * If no PORT_INDEX is supplied, we will return all QPs from that device */ if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); if (!rdma_is_port_valid(device, port)) { ret = -EINVAL; goto err_index; } } nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NL_GET_OP(cb->nlh->nlmsg_type)), 0, NLM_F_MULTI); if (!nlh || fill_nldev_handle(skb, device)) { ret = -EMSGSIZE; goto err; } table_attr = nla_nest_start_noflag(skb, fe->nldev_attr); if (!table_attr) { ret = -EMSGSIZE; goto err; } has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN); rt = &device->res[res_type]; xa_lock(&rt->xa); /* * FIXME: if the skip ahead is something common this loop should * use xas_for_each & xas_pause to optimize, we can have a lot of * objects. */ xa_for_each(&rt->xa, id, res) { if (xa_get_mark(&rt->xa, res->id, RESTRACK_DD) && !show_details) goto next; if (idx < start || !rdma_restrack_get(res)) goto next; xa_unlock(&rt->xa); filled = true; entry_attr = nla_nest_start_noflag(skb, fe->entry); if (!entry_attr) { ret = -EMSGSIZE; rdma_restrack_put(res); goto msg_full; } ret = fill_func(skb, has_cap_net_admin, res, port); rdma_restrack_put(res); if (ret) { nla_nest_cancel(skb, entry_attr); if (ret == -EMSGSIZE) goto msg_full; if (ret == -EAGAIN) goto again; goto res_err; } nla_nest_end(skb, entry_attr); again: xa_lock(&rt->xa); next: idx++; } xa_unlock(&rt->xa); msg_full: nla_nest_end(skb, table_attr); nlmsg_end(skb, nlh); cb->args[0] = idx; /* * No more entries to fill, cancel the message and * return 0 to mark end of dumpit. */ if (!filled) goto err; ib_device_put(device); return skb->len; res_err: nla_nest_cancel(skb, table_attr); err: nlmsg_cancel(skb, nlh); err_index: ib_device_put(device); return ret; } #define RES_GET_FUNCS(name, type) \ static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \ struct netlink_callback *cb) \ { \ return res_get_common_dumpit(skb, cb, type, \ fill_res_##name##_entry); \ } \ static int nldev_res_get_##name##_doit(struct sk_buff *skb, \ struct nlmsghdr *nlh, \ struct netlink_ext_ack *extack) \ { \ return res_get_common_doit(skb, nlh, extack, type, \ fill_res_##name##_entry); \ } RES_GET_FUNCS(qp, RDMA_RESTRACK_QP); RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP); RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID); RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ); RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ); RES_GET_FUNCS(pd, RDMA_RESTRACK_PD); RES_GET_FUNCS(mr, RDMA_RESTRACK_MR); RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR); RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER); RES_GET_FUNCS(ctx, RDMA_RESTRACK_CTX); RES_GET_FUNCS(srq, RDMA_RESTRACK_SRQ); RES_GET_FUNCS(srq_raw, RDMA_RESTRACK_SRQ); static LIST_HEAD(link_ops); static DECLARE_RWSEM(link_ops_rwsem); static const struct rdma_link_ops *link_ops_get(const char *type) { const struct rdma_link_ops *ops; list_for_each_entry(ops, &link_ops, list) { if (!strcmp(ops->type, type)) goto out; } ops = NULL; out: return ops; } void rdma_link_register(struct rdma_link_ops *ops) { down_write(&link_ops_rwsem); if (WARN_ON_ONCE(link_ops_get(ops->type))) goto out; list_add(&ops->list, &link_ops); out: up_write(&link_ops_rwsem); } EXPORT_SYMBOL(rdma_link_register); void rdma_link_unregister(struct rdma_link_ops *ops) { down_write(&link_ops_rwsem); list_del(&ops->list); up_write(&link_ops_rwsem); } EXPORT_SYMBOL(rdma_link_unregister); static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; char ibdev_name[IB_DEVICE_NAME_MAX]; const struct rdma_link_ops *ops; char ndev_name[IFNAMSIZ]; struct net_device *ndev; char type[IFNAMSIZ]; int err; err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, extack); if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] || !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME]) return -EINVAL; nla_strscpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME], sizeof(ibdev_name)); if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0) return -EINVAL; nla_strscpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type)); nla_strscpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME], sizeof(ndev_name)); ndev = dev_get_by_name(sock_net(skb->sk), ndev_name); if (!ndev) return -ENODEV; down_read(&link_ops_rwsem); ops = link_ops_get(type); #ifdef CONFIG_MODULES if (!ops) { up_read(&link_ops_rwsem); request_module("rdma-link-%s", type); down_read(&link_ops_rwsem); ops = link_ops_get(type); } #endif err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL; up_read(&link_ops_rwsem); dev_put(ndev); return err; } static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; struct ib_device *device; u32 index; int err; err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, extack); if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) { ib_device_put(device); return -EINVAL; } ib_unregister_device_and_put(device); return 0; } static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE]; struct ib_client_nl_info data = {}; struct ib_device *ibdev = NULL; struct sk_buff *msg; u32 index; int err; err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, NL_VALIDATE_LIBERAL, extack); if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE]) return -EINVAL; nla_strscpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE], sizeof(client_name)); if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) { index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); ibdev = ib_device_get_by_index(sock_net(skb->sk), index); if (!ibdev) return -EINVAL; if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); if (!rdma_is_port_valid(ibdev, data.port)) { err = -EINVAL; goto out_put; } } else { data.port = -1; } } else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) { return -EINVAL; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { err = -ENOMEM; goto out_put; } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET_CHARDEV), 0, 0); if (!nlh) { err = -EMSGSIZE; goto out_nlmsg; } data.nl_msg = msg; err = ib_get_client_nl_info(ibdev, client_name, &data); if (err) goto out_nlmsg; err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV, huge_encode_dev(data.cdev->devt), RDMA_NLDEV_ATTR_PAD); if (err) goto out_data; err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi, RDMA_NLDEV_ATTR_PAD); if (err) goto out_data; if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME, dev_name(data.cdev))) { err = -EMSGSIZE; goto out_data; } nlmsg_end(msg, nlh); put_device(data.cdev); if (ibdev) ib_device_put(ibdev); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); out_data: put_device(data.cdev); out_nlmsg: nlmsg_free(msg); out_put: if (ibdev) ib_device_put(ibdev); return err; } static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; struct sk_buff *msg; int err; err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, NL_VALIDATE_LIBERAL, extack); if (err) return err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_SYS_GET), 0, 0); if (!nlh) { nlmsg_free(msg); return -EMSGSIZE; } err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE, (u8)ib_devices_shared_netns); if (err) { nlmsg_free(msg); return err; } err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE, (u8)privileged_qkey); if (err) { nlmsg_free(msg); return err; } err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_MONITOR_MODE, 1); if (err) { nlmsg_free(msg); return err; } /* * Copy-on-fork is supported. * See commits: * 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes") * 4eae4efa2c29 ("hugetlb: do early cow when page pinned on src mm") * for more details. Don't backport this without them. * * Return value ignored on purpose, assume copy-on-fork is not * supported in case of failure. */ nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, 1); nlmsg_end(msg, nlh); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); } static int nldev_set_sys_set_netns_doit(struct nlattr *tb[]) { u8 enable; int err; enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]); /* Only 0 and 1 are supported */ if (enable > 1) return -EINVAL; err = rdma_compatdev_set(enable); return err; } static int nldev_set_sys_set_pqkey_doit(struct nlattr *tb[]) { u8 enable; enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE]); /* Only 0 and 1 are supported */ if (enable > 1) return -EINVAL; privileged_qkey = enable; return 0; } static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; int err; err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, extack); if (err) return -EINVAL; if (tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]) return nldev_set_sys_set_netns_doit(tb); if (tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE]) return nldev_set_sys_set_pqkey_doit(tb); return -EINVAL; } static int nldev_stat_set_mode_doit(struct sk_buff *msg, struct netlink_ext_ack *extack, struct nlattr *tb[], struct ib_device *device, u32 port) { u32 mode, mask = 0, qpn, cntn = 0; bool opcnt = false; int ret; /* Currently only counter for QP is supported */ if (!tb[RDMA_NLDEV_ATTR_STAT_RES] || nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP) return -EINVAL; if (tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED]) opcnt = !!nla_get_u8( tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED]); mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]); if (mode == RDMA_COUNTER_MODE_AUTO) { if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]) mask = nla_get_u32( tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]); return rdma_counter_set_auto_mode(device, port, mask, opcnt, extack); } if (!tb[RDMA_NLDEV_ATTR_RES_LQPN]) return -EINVAL; qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) { cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); ret = rdma_counter_bind_qpn(device, port, qpn, cntn); if (ret) return ret; } else { ret = rdma_counter_bind_qpn_alloc(device, port, qpn, &cntn); if (ret) return ret; } if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) { ret = -EMSGSIZE; goto err_fill; } return 0; err_fill: rdma_counter_unbind_qpn(device, port, qpn, cntn); return ret; } static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[], struct ib_device *device, u32 port) { struct rdma_hw_stats *stats; struct nlattr *entry_attr; unsigned long *target; int rem, i, ret = 0; u32 index; stats = ib_get_hw_stats_port(device, port); if (!stats) return -EINVAL; target = kcalloc(BITS_TO_LONGS(stats->num_counters), sizeof(*stats->is_disabled), GFP_KERNEL); if (!target) return -ENOMEM; nla_for_each_nested(entry_attr, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS], rem) { index = nla_get_u32(entry_attr); if ((index >= stats->num_counters) || !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) { ret = -EINVAL; goto out; } set_bit(index, target); } for (i = 0; i < stats->num_counters; i++) { if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL)) continue; ret = rdma_counter_modify(device, port, i, test_bit(i, target)); if (ret) goto out; } out: kfree(target); return ret; } static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; struct ib_device *device; struct sk_buff *msg; u32 index, port; int ret; ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, extack); if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); if (!rdma_is_port_valid(device, port)) { ret = -EINVAL; goto err_put_device; } if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] && !tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) { ret = -EINVAL; goto err_put_device; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; goto err_put_device; } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_SET), 0, 0); if (!nlh || fill_nldev_handle(msg, device) || nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) { ret = -EMSGSIZE; goto err_free_msg; } if (tb[RDMA_NLDEV_ATTR_STAT_MODE]) { ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port); if (ret) goto err_free_msg; } if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) { ret = nldev_stat_set_counter_dynamic_doit(tb, device, port); if (ret) goto err_free_msg; } nlmsg_end(msg, nlh); ib_device_put(device); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_free_msg: nlmsg_free(msg); err_put_device: ib_device_put(device); return ret; } static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; struct ib_device *device; struct sk_buff *msg; u32 index, port, qpn, cntn; int ret; ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, extack); if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] || !tb[RDMA_NLDEV_ATTR_RES_LQPN]) return -EINVAL; if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP) return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); if (!rdma_is_port_valid(device, port)) { ret = -EINVAL; goto err; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; goto err; } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_SET), 0, 0); if (!nlh) { ret = -EMSGSIZE; goto err_fill; } cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); if (fill_nldev_handle(msg, device) || nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) || nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) || nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) { ret = -EMSGSIZE; goto err_fill; } ret = rdma_counter_unbind_qpn(device, port, qpn, cntn); if (ret) goto err_fill; nlmsg_end(msg, nlh); ib_device_put(device); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_fill: nlmsg_free(msg); err: ib_device_put(device); return ret; } static noinline_for_stack int stat_get_doit_default_counter(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack, struct nlattr *tb[]) { struct rdma_hw_stats *stats; struct nlattr *table_attr; struct ib_device *device; int ret, num_cnts, i; struct sk_buff *msg; u32 index, port; u64 v; if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; if (!device->ops.alloc_hw_port_stats || !device->ops.get_hw_stats) { ret = -EINVAL; goto err; } port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); stats = ib_get_hw_stats_port(device, port); if (!stats) { ret = -EINVAL; goto err; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; goto err; } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET), 0, 0); if (!nlh || fill_nldev_handle(msg, device) || nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) { ret = -EMSGSIZE; goto err_msg; } mutex_lock(&stats->lock); num_cnts = device->ops.get_hw_stats(device, stats, port, 0); if (num_cnts < 0) { ret = -EINVAL; goto err_stats; } table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); if (!table_attr) { ret = -EMSGSIZE; goto err_stats; } for (i = 0; i < num_cnts; i++) { if (test_bit(i, stats->is_disabled)) continue; v = stats->value[i] + rdma_counter_get_hwstat_value(device, port, i); if (rdma_nl_stat_hwcounter_entry(msg, stats->descs[i].name, v)) { ret = -EMSGSIZE; goto err_table; } } nla_nest_end(msg, table_attr); mutex_unlock(&stats->lock); nlmsg_end(msg, nlh); ib_device_put(device); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_table: nla_nest_cancel(msg, table_attr); err_stats: mutex_unlock(&stats->lock); err_msg: nlmsg_free(msg); err: ib_device_put(device); return ret; } static noinline_for_stack int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack, struct nlattr *tb[]) { static enum rdma_nl_counter_mode mode; static enum rdma_nl_counter_mask mask; struct ib_device *device; struct sk_buff *msg; u32 index, port; bool opcnt; int ret; if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) return nldev_res_get_counter_doit(skb, nlh, extack); if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) return -EINVAL; index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); device = ib_device_get_by_index(sock_net(skb->sk), index); if (!device) return -EINVAL; port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); if (!rdma_is_port_valid(device, port)) { ret = -EINVAL; goto err; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; goto err; } nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET), 0, 0); if (!nlh) { ret = -EMSGSIZE; goto err_msg; } ret = rdma_counter_get_mode(device, port, &mode, &mask, &opcnt); if (ret) goto err_msg; if (fill_nldev_handle(msg, device) || nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) || nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) { ret = -EMSGSIZE; goto err_msg; } if ((mode == RDMA_COUNTER_MODE_AUTO) && nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) { ret = -EMSGSIZE; goto err_msg; } if ((mode == RDMA_COUNTER_MODE_AUTO) && nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED, opcnt)) { ret = -EMSGSIZE; goto err_msg; } nlmsg_end(msg, nlh); ib_device_put(device); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_msg: nlmsg_free(msg); err: ib_device_put(device); return ret; } static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; int ret; ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, NL_VALIDATE_LIBERAL, extack); if (ret) return -EINVAL; if (!tb[RDMA_NLDEV_ATTR_STAT_RES]) return stat_get_doit_default_counter(skb, nlh, extack, tb); switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) { case RDMA_NLDEV_ATTR_RES_QP: ret = stat_get_doit_qp(skb, nlh, extack, tb); break; case RDMA_NLDEV_ATTR_RES_MR: ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR, fill_stat_mr_entry); break; default: ret = -EINVAL; break; } return ret; } static int nldev_stat_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; int ret; ret = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, NL_VALIDATE_LIBERAL, NULL); if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES]) return -EINVAL; switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) { case RDMA_NLDEV_ATTR_RES_QP: ret = nldev_res_get_counter_dumpit(skb, cb); break; case RDMA_NLDEV_ATTR_RES_MR: ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR, fill_stat_mr_entry); break; default: ret = -EINVAL; break; } return ret; } static int nldev_stat_get_counter_status_doit(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr *tb[RDMA_NLDEV_ATTR_MAX], *table, *entry; struct rdma_hw_stats *stats; struct ib_device *device; struct sk_buff *msg; u32 devid, port; int ret, i; ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, NL_VALIDATE_LIBERAL, extack); if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX]) return -EINVAL; devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); device = ib_device_get_by_index(sock_net(skb->sk), devid); if (!device) return -EINVAL; port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]); if (!rdma_is_port_valid(device, port)) { ret = -EINVAL; goto err; } stats = ib_get_hw_stats_port(device, port); if (!stats) { ret = -EINVAL; goto err; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; goto err; } nlh = nlmsg_put( msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET_STATUS), 0, 0); ret = -EMSGSIZE; if (!nlh || fill_nldev_handle(msg, device) || nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) goto err_msg; table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); if (!table) goto err_msg; mutex_lock(&stats->lock); for (i = 0; i < stats->num_counters; i++) { entry = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY); if (!entry) goto err_msg_table; if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME, stats->descs[i].name) || nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i)) goto err_msg_entry; if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) && (nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC, !test_bit(i, stats->is_disabled)))) goto err_msg_entry; nla_nest_end(msg, entry); } mutex_unlock(&stats->lock); nla_nest_end(msg, table); nlmsg_end(msg, nlh); ib_device_put(device); return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid); err_msg_entry: nla_nest_cancel(msg, entry); err_msg_table: mutex_unlock(&stats->lock); nla_nest_cancel(msg, table); err_msg: nlmsg_free(msg); err: ib_device_put(device); return ret; } static int nldev_newdev(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; enum rdma_nl_dev_type type; struct ib_device *parent; char name[IFNAMSIZ] = {}; u32 parentid; int ret; ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, extack); if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_DEV_NAME] || !tb[RDMA_NLDEV_ATTR_DEV_TYPE]) return -EINVAL; nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], sizeof(name)); type = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_TYPE]); parentid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); parent = ib_device_get_by_index(sock_net(skb->sk), parentid); if (!parent) return -EINVAL; ret = ib_add_sub_device(parent, type, name); ib_device_put(parent); return ret; } static int nldev_deldev(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr *tb[RDMA_NLDEV_ATTR_MAX]; struct ib_device *device; u32 devid; int ret; ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy, extack); if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX]) return -EINVAL; devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]); device = ib_device_get_by_index(sock_net(skb->sk), devid); if (!device) return -EINVAL; return ib_del_sub_device_and_put(device); } static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = { [RDMA_NLDEV_CMD_GET] = { .doit = nldev_get_doit, .dump = nldev_get_dumpit, }, [RDMA_NLDEV_CMD_GET_CHARDEV] = { .doit = nldev_get_chardev, }, [RDMA_NLDEV_CMD_SET] = { .doit = nldev_set_doit, .flags = RDMA_NL_ADMIN_PERM, }, [RDMA_NLDEV_CMD_NEWLINK] = { .doit = nldev_newlink, .flags = RDMA_NL_ADMIN_PERM, }, [RDMA_NLDEV_CMD_DELLINK] = { .doit = nldev_dellink, .flags = RDMA_NL_ADMIN_PERM, }, [RDMA_NLDEV_CMD_PORT_GET] = { .doit = nldev_port_get_doit, .dump = nldev_port_get_dumpit, }, [RDMA_NLDEV_CMD_RES_GET] = { .doit = nldev_res_get_doit, .dump = nldev_res_get_dumpit, }, [RDMA_NLDEV_CMD_RES_QP_GET] = { .doit = nldev_res_get_qp_doit, .dump = nldev_res_get_qp_dumpit, }, [RDMA_NLDEV_CMD_RES_CM_ID_GET] = { .doit = nldev_res_get_cm_id_doit, .dump = nldev_res_get_cm_id_dumpit, }, [RDMA_NLDEV_CMD_RES_CQ_GET] = { .doit = nldev_res_get_cq_doit, .dump = nldev_res_get_cq_dumpit, }, [RDMA_NLDEV_CMD_RES_MR_GET] = { .doit = nldev_res_get_mr_doit, .dump = nldev_res_get_mr_dumpit, }, [RDMA_NLDEV_CMD_RES_PD_GET] = { .doit = nldev_res_get_pd_doit, .dump = nldev_res_get_pd_dumpit, }, [RDMA_NLDEV_CMD_RES_CTX_GET] = { .doit = nldev_res_get_ctx_doit, .dump = nldev_res_get_ctx_dumpit, }, [RDMA_NLDEV_CMD_RES_SRQ_GET] = { .doit = nldev_res_get_srq_doit, .dump = nldev_res_get_srq_dumpit, }, [RDMA_NLDEV_CMD_SYS_GET] = { .doit = nldev_sys_get_doit, }, [RDMA_NLDEV_CMD_SYS_SET] = { .doit = nldev_set_sys_set_doit, .flags = RDMA_NL_ADMIN_PERM, }, [RDMA_NLDEV_CMD_STAT_SET] = { .doit = nldev_stat_set_doit, .flags = RDMA_NL_ADMIN_PERM, }, [RDMA_NLDEV_CMD_STAT_GET] = { .doit = nldev_stat_get_doit, .dump = nldev_stat_get_dumpit, }, [RDMA_NLDEV_CMD_STAT_DEL] = { .doit = nldev_stat_del_doit, .flags = RDMA_NL_ADMIN_PERM, }, [RDMA_NLDEV_CMD_RES_QP_GET_RAW] = { .doit = nldev_res_get_qp_raw_doit, .dump = nldev_res_get_qp_raw_dumpit, .flags = RDMA_NL_ADMIN_PERM, }, [RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = { .doit = nldev_res_get_cq_raw_doit, .dump = nldev_res_get_cq_raw_dumpit, .flags = RDMA_NL_ADMIN_PERM, }, [RDMA_NLDEV_CMD_RES_MR_GET_RAW] = { .doit = nldev_res_get_mr_raw_doit, .dump = nldev_res_get_mr_raw_dumpit, .flags = RDMA_NL_ADMIN_PERM, }, [RDMA_NLDEV_CMD_RES_SRQ_GET_RAW] = { .doit = nldev_res_get_srq_raw_doit, .dump = nldev_res_get_srq_raw_dumpit, .flags = RDMA_NL_ADMIN_PERM, }, [RDMA_NLDEV_CMD_STAT_GET_STATUS] = { .doit = nldev_stat_get_counter_status_doit, }, [RDMA_NLDEV_CMD_NEWDEV] = { .doit = nldev_newdev, .flags = RDMA_NL_ADMIN_PERM, }, [RDMA_NLDEV_CMD_DELDEV] = { .doit = nldev_deldev, .flags = RDMA_NL_ADMIN_PERM, }, }; static int fill_mon_netdev_rename(struct sk_buff *msg, struct ib_device *device, u32 port, const struct net *net) { struct net_device *netdev = ib_device_get_netdev(device, port); int ret = 0; if (!netdev || !net_eq(dev_net(netdev), net)) goto out; ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex); if (ret) goto out; ret = nla_put_string(msg, RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name); out: dev_put(netdev); return ret; } static int fill_mon_netdev_association(struct sk_buff *msg, struct ib_device *device, u32 port, const struct net *net) { struct net_device *netdev = ib_device_get_netdev(device, port); int ret = 0; if (netdev && !net_eq(dev_net(netdev), net)) goto out; ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index); if (ret) goto out; ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME, dev_name(&device->dev)); if (ret) goto out; ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port); if (ret) goto out; if (netdev) { ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex); if (ret) goto out; ret = nla_put_string(msg, RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name); } out: dev_put(netdev); return ret; } static void rdma_nl_notify_err_msg(struct ib_device *device, u32 port_num, enum rdma_nl_notify_event_type type) { struct net_device *netdev; switch (type) { case RDMA_REGISTER_EVENT: dev_warn_ratelimited(&device->dev, "Failed to send RDMA monitor register device event\n"); break; case RDMA_UNREGISTER_EVENT: dev_warn_ratelimited(&device->dev, "Failed to send RDMA monitor unregister device event\n"); break; case RDMA_NETDEV_ATTACH_EVENT: netdev = ib_device_get_netdev(device, port_num); dev_warn_ratelimited(&device->dev, "Failed to send RDMA monitor netdev attach event: port %d netdev %d\n", port_num, netdev->ifindex); dev_put(netdev); break; case RDMA_NETDEV_DETACH_EVENT: dev_warn_ratelimited(&device->dev, "Failed to send RDMA monitor netdev detach event: port %d\n", port_num); break; case RDMA_RENAME_EVENT: dev_warn_ratelimited(&device->dev, "Failed to send RDMA monitor rename device event\n"); break; case RDMA_NETDEV_RENAME_EVENT: netdev = ib_device_get_netdev(device, port_num); dev_warn_ratelimited(&device->dev, "Failed to send RDMA monitor netdev rename event: port %d netdev %d\n", port_num, netdev->ifindex); dev_put(netdev); break; default: break; } } int rdma_nl_notify_event(struct ib_device *device, u32 port_num, enum rdma_nl_notify_event_type type) { struct sk_buff *skb; int ret = -EMSGSIZE; struct net *net; void *nlh; net = read_pnet(&device->coredev.rdma_net); if (!net) return -EINVAL; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return -ENOMEM; nlh = nlmsg_put(skb, 0, 0, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_MONITOR), 0, 0); if (!nlh) goto err_free; switch (type) { case RDMA_REGISTER_EVENT: case RDMA_UNREGISTER_EVENT: case RDMA_RENAME_EVENT: ret = fill_nldev_handle(skb, device); if (ret) goto err_free; break; case RDMA_NETDEV_ATTACH_EVENT: case RDMA_NETDEV_DETACH_EVENT: ret = fill_mon_netdev_association(skb, device, port_num, net); if (ret) goto err_free; break; case RDMA_NETDEV_RENAME_EVENT: ret = fill_mon_netdev_rename(skb, device, port_num, net); if (ret) goto err_free; break; default: break; } ret = nla_put_u8(skb, RDMA_NLDEV_ATTR_EVENT_TYPE, type); if (ret) goto err_free; nlmsg_end(skb, nlh); ret = rdma_nl_multicast(net, skb, RDMA_NL_GROUP_NOTIFY, GFP_KERNEL); if (ret && ret != -ESRCH) { skb = NULL; /* skb is freed in the netlink send-op handling */ goto err_free; } return 0; err_free: rdma_nl_notify_err_msg(device, port_num, type); nlmsg_free(skb); return ret; } void __init nldev_init(void) { rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table); } void nldev_exit(void) { rdma_nl_unregister(RDMA_NL_NLDEV); } MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);
3 1 1 1 2 2 2 1 1 1 1 2 2 4 2 2 1 1 2 10 6 6 9 2 5 10 8 10 10 10 10 10 10 10 10 4 2 8 8 3 1 3 7 4 7 7 8 13 11 2 13 6 4 1 5 4 5 5 2 4 2 5 7 1 6 5 1 5 6 6 1 5 4 7 6 6 6 6 6 5 5 1 2 4 6 6 6 6 6 6 6 15 1 11 15 14 15 15 15 4 1 10 10 11 11 15 13 1 13 1 13 13 12 1 2 5 6 11 6 5 11 11 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 // SPDX-License-Identifier: GPL-2.0-or-later /* * Xbox gamepad driver * * Copyright (c) 2002 Marko Friedemann <mfr@bmx-chemnitz.de> * 2004 Oliver Schwartz <Oliver.Schwartz@gmx.de>, * Steven Toth <steve@toth.demon.co.uk>, * Franz Lehner <franz@caos.at>, * Ivan Hawkes <blackhawk@ivanhawkes.com> * 2005 Dominic Cerquetti <binary1230@yahoo.com> * 2006 Adam Buchbinder <adam.buchbinder@gmail.com> * 2007 Jan Kratochvil <honza@jikos.cz> * 2010 Christoph Fritz <chf.fritz@googlemail.com> * * This driver is based on: * - information from http://euc.jp/periphs/xbox-controller.ja.html * - the iForce driver drivers/char/joystick/iforce.c * - the skeleton-driver drivers/usb/usb-skeleton.c * - Xbox 360 information http://www.free60.org/wiki/Gamepad * - Xbox One information https://github.com/quantus/xbox-one-controller-protocol * * Thanks to: * - ITO Takayuki for providing essential xpad information on his website * - Vojtech Pavlik - iforce driver / input subsystem * - Greg Kroah-Hartman - usb-skeleton driver * - Xbox Linux project - extra USB IDs * - Pekka Pöyry (quantus) - Xbox One controller reverse-engineering * * TODO: * - fine tune axes (especially trigger axes) * - fix "analog" buttons (reported as digital now) * - get rumble working * - need USB IDs for other dance pads * * History: * * 2002-06-27 - 0.0.1 : first version, just said "XBOX HID controller" * * 2002-07-02 - 0.0.2 : basic working version * - all axes and 9 of the 10 buttons work (german InterAct device) * - the black button does not work * * 2002-07-14 - 0.0.3 : rework by Vojtech Pavlik * - indentation fixes * - usb + input init sequence fixes * * 2002-07-16 - 0.0.4 : minor changes, merge with Vojtech's v0.0.3 * - verified the lack of HID and report descriptors * - verified that ALL buttons WORK * - fixed d-pad to axes mapping * * 2002-07-17 - 0.0.5 : simplified d-pad handling * * 2004-10-02 - 0.0.6 : DDR pad support * - borrowed from the Xbox Linux kernel * - USB id's for commonly used dance pads are present * - dance pads will map D-PAD to buttons, not axes * - pass the module paramater 'dpad_to_buttons' to force * the D-PAD to map to buttons if your pad is not detected * * Later changes can be tracked in SCM. */ #include <linux/bits.h> #include <linux/kernel.h> #include <linux/input.h> #include <linux/rcupdate.h> #include <linux/slab.h> #include <linux/stat.h> #include <linux/module.h> #include <linux/usb/input.h> #include <linux/usb/quirks.h> #define XPAD_PKT_LEN 64 /* * xbox d-pads should map to buttons, as is required for DDR pads * but we map them to axes when possible to simplify things */ #define MAP_DPAD_TO_BUTTONS BIT(0) #define MAP_TRIGGERS_TO_BUTTONS BIT(1) #define MAP_STICKS_TO_NULL BIT(2) #define MAP_SHARE_BUTTON BIT(3) #define MAP_PADDLES BIT(4) #define MAP_PROFILE_BUTTON BIT(5) #define MAP_SHARE_OFFSET BIT(6) #define DANCEPAD_MAP_CONFIG (MAP_DPAD_TO_BUTTONS | \ MAP_TRIGGERS_TO_BUTTONS | MAP_STICKS_TO_NULL) #define XTYPE_XBOX 0 #define XTYPE_XBOX360 1 #define XTYPE_XBOX360W 2 #define XTYPE_XBOXONE 3 #define XTYPE_UNKNOWN 4 /* Send power-off packet to xpad360w after holding the mode button for this many * seconds */ #define XPAD360W_POWEROFF_TIMEOUT 5 #define PKT_XB 0 #define PKT_XBE1 1 #define PKT_XBE2_FW_OLD 2 #define PKT_XBE2_FW_5_EARLY 3 #define PKT_XBE2_FW_5_11 4 #define FLAG_DELAY_INIT BIT(0) static bool dpad_to_buttons; module_param(dpad_to_buttons, bool, S_IRUGO); MODULE_PARM_DESC(dpad_to_buttons, "Map D-PAD to buttons rather than axes for unknown pads"); static bool triggers_to_buttons; module_param(triggers_to_buttons, bool, S_IRUGO); MODULE_PARM_DESC(triggers_to_buttons, "Map triggers to buttons rather than axes for unknown pads"); static bool sticks_to_null; module_param(sticks_to_null, bool, S_IRUGO); MODULE_PARM_DESC(sticks_to_null, "Do not map sticks at all for unknown pads"); static bool auto_poweroff = true; module_param(auto_poweroff, bool, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(auto_poweroff, "Power off wireless controllers on suspend"); static const struct xpad_device { u16 idVendor; u16 idProduct; char *name; u8 mapping; u8 xtype; u8 flags; } xpad_device[] = { /* Please keep this list sorted by vendor and product ID. */ { 0x0079, 0x18d4, "GPD Win 2 X-Box Controller", 0, XTYPE_XBOX360 }, { 0x0351, 0x1000, "CRKD LP Blueberry Burst Pro Edition (Xbox)", 0, XTYPE_XBOX360 }, { 0x0351, 0x2000, "CRKD LP Black Tribal Edition (Xbox) ", 0, XTYPE_XBOX360 }, { 0x03eb, 0xff01, "Wooting One (Legacy)", 0, XTYPE_XBOX360 }, { 0x03eb, 0xff02, "Wooting Two (Legacy)", 0, XTYPE_XBOX360 }, { 0x03f0, 0x038D, "HyperX Clutch", 0, XTYPE_XBOX360 }, /* wired */ { 0x03f0, 0x048D, "HyperX Clutch", 0, XTYPE_XBOX360 }, /* wireless */ { 0x03f0, 0x0495, "HyperX Clutch Gladiate", 0, XTYPE_XBOXONE }, { 0x03f0, 0x07A0, "HyperX Clutch Gladiate RGB", 0, XTYPE_XBOXONE }, { 0x03f0, 0x08B6, "HyperX Clutch Gladiate", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, /* v2 */ { 0x03f0, 0x09B4, "HyperX Clutch Tanto", 0, XTYPE_XBOXONE }, { 0x044f, 0x0f00, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f03, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX }, { 0x044f, 0x0f10, "Thrustmaster Modena GT Wheel", 0, XTYPE_XBOX }, { 0x044f, 0xb326, "Thrustmaster Gamepad GP XID", 0, XTYPE_XBOX360 }, { 0x044f, 0xd01e, "ThrustMaster, Inc. ESWAP X 2 ELDEN RING EDITION", 0, XTYPE_XBOXONE }, { 0x045e, 0x0202, "Microsoft X-Box pad v1 (US)", 0, XTYPE_XBOX }, { 0x045e, 0x0285, "Microsoft X-Box pad (Japan)", 0, XTYPE_XBOX }, { 0x045e, 0x0287, "Microsoft Xbox Controller S", 0, XTYPE_XBOX }, { 0x045e, 0x0288, "Microsoft Xbox Controller S v2", 0, XTYPE_XBOX }, { 0x045e, 0x0289, "Microsoft X-Box pad v2 (US)", 0, XTYPE_XBOX }, { 0x045e, 0x028e, "Microsoft X-Box 360 pad", 0, XTYPE_XBOX360 }, { 0x045e, 0x028f, "Microsoft X-Box 360 pad v2", 0, XTYPE_XBOX360 }, { 0x045e, 0x0291, "Xbox 360 Wireless Receiver (XBOX)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W }, { 0x045e, 0x02a9, "Xbox 360 Wireless Receiver (Unofficial)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W }, { 0x045e, 0x02d1, "Microsoft X-Box One pad", 0, XTYPE_XBOXONE }, { 0x045e, 0x02dd, "Microsoft X-Box One pad (Firmware 2015)", 0, XTYPE_XBOXONE }, { 0x045e, 0x02e3, "Microsoft X-Box One Elite pad", MAP_PADDLES, XTYPE_XBOXONE }, { 0x045e, 0x02ea, "Microsoft X-Box One S pad", 0, XTYPE_XBOXONE }, { 0x045e, 0x0719, "Xbox 360 Wireless Receiver", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W }, { 0x045e, 0x0b00, "Microsoft X-Box One Elite 2 pad", MAP_PADDLES, XTYPE_XBOXONE }, { 0x045e, 0x0b0a, "Microsoft X-Box Adaptive Controller", MAP_PROFILE_BUTTON, XTYPE_XBOXONE }, { 0x045e, 0x0b12, "Microsoft Xbox Series S|X Controller", MAP_SHARE_BUTTON | MAP_SHARE_OFFSET, XTYPE_XBOXONE }, { 0x046d, 0xc21d, "Logitech Gamepad F310", 0, XTYPE_XBOX360 }, { 0x046d, 0xc21e, "Logitech Gamepad F510", 0, XTYPE_XBOX360 }, { 0x046d, 0xc21f, "Logitech Gamepad F710", 0, XTYPE_XBOX360 }, { 0x046d, 0xc242, "Logitech Chillstream Controller", 0, XTYPE_XBOX360 }, { 0x046d, 0xca84, "Logitech Xbox Cordless Controller", 0, XTYPE_XBOX }, { 0x046d, 0xca88, "Logitech Compact Controller for Xbox", 0, XTYPE_XBOX }, { 0x046d, 0xca8a, "Logitech Precision Vibration Feedback Wheel", 0, XTYPE_XBOX }, { 0x046d, 0xcaa3, "Logitech DriveFx Racing Wheel", 0, XTYPE_XBOX360 }, { 0x0502, 0x1305, "Acer NGR200", 0, XTYPE_XBOX360 }, { 0x056e, 0x2004, "Elecom JC-U3613M", 0, XTYPE_XBOX360 }, { 0x05fd, 0x1007, "Mad Catz Controller (unverified)", 0, XTYPE_XBOX }, { 0x05fd, 0x107a, "InterAct 'PowerPad Pro' X-Box pad (Germany)", 0, XTYPE_XBOX }, { 0x05fe, 0x3030, "Chic Controller", 0, XTYPE_XBOX }, { 0x05fe, 0x3031, "Chic Controller", 0, XTYPE_XBOX }, { 0x062a, 0x0020, "Logic3 Xbox GamePad", 0, XTYPE_XBOX }, { 0x062a, 0x0033, "Competition Pro Steering Wheel", 0, XTYPE_XBOX }, { 0x06a3, 0x0200, "Saitek Racing Wheel", 0, XTYPE_XBOX }, { 0x06a3, 0x0201, "Saitek Adrenalin", 0, XTYPE_XBOX }, { 0x06a3, 0xf51a, "Saitek P3600", 0, XTYPE_XBOX360 }, { 0x0738, 0x4503, "Mad Catz Racing Wheel", 0, XTYPE_XBOXONE }, { 0x0738, 0x4506, "Mad Catz 4506 Wireless Controller", 0, XTYPE_XBOX }, { 0x0738, 0x4516, "Mad Catz Control Pad", 0, XTYPE_XBOX }, { 0x0738, 0x4520, "Mad Catz Control Pad Pro", 0, XTYPE_XBOX }, { 0x0738, 0x4522, "Mad Catz LumiCON", 0, XTYPE_XBOX }, { 0x0738, 0x4526, "Mad Catz Control Pad Pro", 0, XTYPE_XBOX }, { 0x0738, 0x4530, "Mad Catz Universal MC2 Racing Wheel and Pedals", 0, XTYPE_XBOX }, { 0x0738, 0x4536, "Mad Catz MicroCON", 0, XTYPE_XBOX }, { 0x0738, 0x4540, "Mad Catz Beat Pad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x0738, 0x4556, "Mad Catz Lynx Wireless Controller", 0, XTYPE_XBOX }, { 0x0738, 0x4586, "Mad Catz MicroCon Wireless Controller", 0, XTYPE_XBOX }, { 0x0738, 0x4588, "Mad Catz Blaster", 0, XTYPE_XBOX }, { 0x0738, 0x45ff, "Mad Catz Beat Pad (w/ Handle)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x0738, 0x4716, "Mad Catz Wired Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x0738, 0x4718, "Mad Catz Street Fighter IV FightStick SE", 0, XTYPE_XBOX360 }, { 0x0738, 0x4726, "Mad Catz Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x0738, 0x4728, "Mad Catz Street Fighter IV FightPad", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0738, 0x4736, "Mad Catz MicroCon Gamepad", 0, XTYPE_XBOX360 }, { 0x0738, 0x4738, "Mad Catz Wired Xbox 360 Controller (SFIV)", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0738, 0x4740, "Mad Catz Beat Pad", 0, XTYPE_XBOX360 }, { 0x0738, 0x4743, "Mad Catz Beat Pad Pro", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x0738, 0x4758, "Mad Catz Arcade Game Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0738, 0x4a01, "Mad Catz FightStick TE 2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x0738, 0x6040, "Mad Catz Beat Pad Pro", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x0738, 0x9871, "Mad Catz Portable Drum", 0, XTYPE_XBOX360 }, { 0x0738, 0xb726, "Mad Catz Xbox controller - MW2", 0, XTYPE_XBOX360 }, { 0x0738, 0xb738, "Mad Catz MVC2TE Stick 2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0738, 0xbeef, "Mad Catz JOYTECH NEO SE Advanced GamePad", 0, XTYPE_XBOX360 }, { 0x0738, 0xcb02, "Saitek Cyborg Rumble Pad - PC/Xbox 360", 0, XTYPE_XBOX360 }, { 0x0738, 0xcb03, "Saitek P3200 Rumble Pad - PC/Xbox 360", 0, XTYPE_XBOX360 }, { 0x0738, 0xcb29, "Saitek Aviator Stick AV8R02", 0, XTYPE_XBOX360 }, { 0x0738, 0xf738, "Super SFIV FightStick TE S", 0, XTYPE_XBOX360 }, { 0x07ff, 0xffff, "Mad Catz GamePad", 0, XTYPE_XBOX360 }, { 0x0b05, 0x1a38, "ASUS ROG RAIKIRI", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x0b05, 0x1abb, "ASUS ROG RAIKIRI PRO", 0, XTYPE_XBOXONE }, { 0x0c12, 0x0005, "Intec wireless", 0, XTYPE_XBOX }, { 0x0c12, 0x8801, "Nyko Xbox Controller", 0, XTYPE_XBOX }, { 0x0c12, 0x8802, "Zeroplus Xbox Controller", 0, XTYPE_XBOX }, { 0x0c12, 0x8809, "RedOctane Xbox Dance Pad", DANCEPAD_MAP_CONFIG, XTYPE_XBOX }, { 0x0c12, 0x880a, "Pelican Eclipse PL-2023", 0, XTYPE_XBOX }, { 0x0c12, 0x8810, "Zeroplus Xbox Controller", 0, XTYPE_XBOX }, { 0x0c12, 0x9902, "HAMA VibraX - *FAULTY HARDWARE*", 0, XTYPE_XBOX }, { 0x0d2f, 0x0002, "Andamiro Pump It Up pad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x0db0, 0x1901, "Micro Star International Xbox360 Controller for Windows", 0, XTYPE_XBOX360 }, { 0x0e4c, 0x1097, "Radica Gamester Controller", 0, XTYPE_XBOX }, { 0x0e4c, 0x1103, "Radica Gamester Reflex", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX }, { 0x0e4c, 0x2390, "Radica Games Jtech Controller", 0, XTYPE_XBOX }, { 0x0e4c, 0x3510, "Radica Gamester", 0, XTYPE_XBOX }, { 0x0e6f, 0x0003, "Logic3 Freebird wireless Controller", 0, XTYPE_XBOX }, { 0x0e6f, 0x0005, "Eclipse wireless Controller", 0, XTYPE_XBOX }, { 0x0e6f, 0x0006, "Edge wireless Controller", 0, XTYPE_XBOX }, { 0x0e6f, 0x0008, "After Glow Pro Controller", 0, XTYPE_XBOX }, { 0x0e6f, 0x0105, "HSM3 Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0e6f, 0x0113, "Afterglow AX.1 Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x011f, "Rock Candy Gamepad Wired Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0131, "PDP EA Sports Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0133, "Xbox 360 Wired Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0139, "Afterglow Prismatic Wired Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x013a, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0146, "Rock Candy Wired Controller for Xbox One", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0147, "PDP Marvel Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x015c, "PDP Xbox One Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x0e6f, 0x015d, "PDP Mirror's Edge Official Wired Controller for Xbox One", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0161, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0162, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0163, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0164, "PDP Battlefield One", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0165, "PDP Titanfall 2", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0201, "Pelican PL-3601 'TSZ' Wired Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0213, "Afterglow Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x021f, "Rock Candy Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0246, "Rock Candy Gamepad for Xbox One 2015", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a0, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a1, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a2, "PDP Wired Controller for Xbox One - Crimson Red", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a4, "PDP Wired Controller for Xbox One - Stealth Series", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a6, "PDP Wired Controller for Xbox One - Camo Series", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a7, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a8, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02ab, "PDP Controller for Xbox One", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02ad, "PDP Wired Controller for Xbox One - Stealth Series", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02b3, "Afterglow Prismatic Wired Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02b8, "Afterglow Prismatic Wired Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0301, "Logic3 Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0346, "Rock Candy Gamepad for Xbox One 2016", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0401, "Logic3 Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0413, "Afterglow AX.1 Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0501, "PDP Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0xf900, "PDP Afterglow AX.1", 0, XTYPE_XBOX360 }, { 0x0e8f, 0x0201, "SmartJoy Frag Xpad/PS2 adaptor", 0, XTYPE_XBOX }, { 0x0e8f, 0x3008, "Generic xbox control (dealextreme)", 0, XTYPE_XBOX }, { 0x0f0d, 0x000a, "Hori Co. DOA4 FightStick", 0, XTYPE_XBOX360 }, { 0x0f0d, 0x000c, "Hori PadEX Turbo", 0, XTYPE_XBOX360 }, { 0x0f0d, 0x000d, "Hori Fighting Stick EX2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0f0d, 0x0016, "Hori Real Arcade Pro.EX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0f0d, 0x001b, "Hori Real Arcade Pro VX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0f0d, 0x0063, "Hori Real Arcade Pro Hayabusa (USA) Xbox One", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x0f0d, 0x0067, "HORIPAD ONE", 0, XTYPE_XBOXONE }, { 0x0f0d, 0x0078, "Hori Real Arcade Pro V Kai Xbox One", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x0f0d, 0x00c5, "Hori Fighting Commander ONE", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x0f0d, 0x00dc, "HORIPAD FPS for Nintendo Switch", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0f0d, 0x0151, "Hori Racing Wheel Overdrive for Xbox Series X", 0, XTYPE_XBOXONE }, { 0x0f0d, 0x0152, "Hori Racing Wheel Overdrive for Xbox Series X", 0, XTYPE_XBOXONE }, { 0x0f0d, 0x01b2, "HORI Taiko No Tatsujin Drum Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x0f30, 0x010b, "Philips Recoil", 0, XTYPE_XBOX }, { 0x0f30, 0x0202, "Joytech Advanced Controller", 0, XTYPE_XBOX }, { 0x0f30, 0x8888, "BigBen XBMiniPad Controller", 0, XTYPE_XBOX }, { 0x102c, 0xff0c, "Joytech Wireless Advanced Controller", 0, XTYPE_XBOX }, { 0x1038, 0x1430, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, { 0x1038, 0x1431, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, { 0x10f5, 0x7005, "Turtle Beach Recon Controller", 0, XTYPE_XBOXONE }, { 0x10f5, 0x7008, "Turtle Beach Recon Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x10f5, 0x7073, "Turtle Beach Stealth Ultra Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x11c9, 0x55f0, "Nacon GC-100XF", 0, XTYPE_XBOX360 }, { 0x11ff, 0x0511, "PXN V900", 0, XTYPE_XBOX360 }, { 0x1209, 0x2882, "Ardwiino Controller", 0, XTYPE_XBOX360 }, { 0x12ab, 0x0004, "Honey Bee Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x12ab, 0x0301, "PDP AFTERGLOW AX.1", 0, XTYPE_XBOX360 }, { 0x12ab, 0x0303, "Mortal Kombat Klassic FightStick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x12ab, 0x8809, "Xbox DDR dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x1430, 0x4748, "RedOctane Guitar Hero X-plorer", 0, XTYPE_XBOX360 }, { 0x1430, 0x8888, "TX6500+ Dance Pad (first generation)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x1430, 0xf801, "RedOctane Controller", 0, XTYPE_XBOX360 }, { 0x146b, 0x0601, "BigBen Interactive XBOX 360 Controller", 0, XTYPE_XBOX360 }, { 0x146b, 0x0604, "Bigben Interactive DAIJA Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1532, 0x0a00, "Razer Atrox Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x1532, 0x0a03, "Razer Wildcat", 0, XTYPE_XBOXONE }, { 0x1532, 0x0a29, "Razer Wolverine V2", 0, XTYPE_XBOXONE }, { 0x15e4, 0x3f00, "Power A Mini Pro Elite", 0, XTYPE_XBOX360 }, { 0x15e4, 0x3f0a, "Xbox Airflo wired controller", 0, XTYPE_XBOX360 }, { 0x15e4, 0x3f10, "Batarang Xbox 360 controller", 0, XTYPE_XBOX360 }, { 0x162e, 0xbeef, "Joytech Neo-Se Take2", 0, XTYPE_XBOX360 }, { 0x1689, 0xfd00, "Razer Onza Tournament Edition", 0, XTYPE_XBOX360 }, { 0x1689, 0xfd01, "Razer Onza Classic Edition", 0, XTYPE_XBOX360 }, { 0x1689, 0xfe00, "Razer Sabertooth", 0, XTYPE_XBOX360 }, { 0x17ef, 0x6182, "Lenovo Legion Controller for Windows", 0, XTYPE_XBOX360 }, { 0x1949, 0x041a, "Amazon Game Controller", 0, XTYPE_XBOX360 }, { 0x1a86, 0xe310, "Legion Go S", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0x0130, "Ion Drum Rocker", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf016, "Mad Catz Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf018, "Mad Catz Street Fighter IV SE Fighting Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf019, "Mad Catz Brawlstick for Xbox 360", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf021, "Mad Cats Ghost Recon FS GamePad", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf023, "MLG Pro Circuit Controller (Xbox)", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf025, "Mad Catz Call Of Duty", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf027, "Mad Catz FPS Pro", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf028, "Street Fighter IV FightPad", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf02e, "Mad Catz Fightpad", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf030, "Mad Catz Xbox 360 MC2 MicroCon Racing Wheel", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf036, "Mad Catz MicroCon GamePad Pro", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf038, "Street Fighter IV FightStick TE", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf039, "Mad Catz MvC2 TE", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf03a, "Mad Catz SFxT Fightstick Pro", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf03d, "Street Fighter IV Arcade Stick TE - Chun Li", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf03e, "Mad Catz MLG FightStick TE", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf03f, "Mad Catz FightStick SoulCaliber", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf042, "Mad Catz FightStick TES+", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf080, "Mad Catz FightStick TE2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf501, "HoriPad EX2 Turbo", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf502, "Hori Real Arcade Pro.VX SA", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf503, "Hori Fighting Stick VX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf504, "Hori Real Arcade Pro. EX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf505, "Hori Fighting Stick EX2B", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf506, "Hori Real Arcade Pro.EX Premium VLX", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf900, "Harmonix Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf901, "Gamestop Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf903, "Tron Xbox 360 controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf904, "PDP Versus Fighting Pad", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf906, "MortalKombat FightStick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xfa01, "MadCatz GamePad", 0, XTYPE_XBOX360 }, { 0x1bad, 0xfd00, "Razer Onza TE", 0, XTYPE_XBOX360 }, { 0x1bad, 0xfd01, "Razer Onza", 0, XTYPE_XBOX360 }, { 0x1ee9, 0x1590, "ZOTAC Gaming Zone", 0, XTYPE_XBOX360 }, { 0x20d6, 0x2001, "BDA Xbox Series X Wired Controller", 0, XTYPE_XBOXONE }, { 0x20d6, 0x2009, "PowerA Enhanced Wired Controller for Xbox Series X|S", 0, XTYPE_XBOXONE }, { 0x20d6, 0x2064, "PowerA Wired Controller for Xbox", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x20d6, 0x281f, "PowerA Wired Controller For Xbox 360", 0, XTYPE_XBOX360 }, { 0x20d6, 0x400b, "PowerA FUSION Pro 4 Wired Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x20d6, 0x890b, "PowerA MOGA XP-Ultra Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x2345, 0xe00b, "Machenike G5 Pro Controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5000, "Razer Atrox Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x24c6, 0x5300, "PowerA MINI PROEX Controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5303, "Xbox Airflo wired controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x530a, "Xbox 360 Pro EX Controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x531a, "PowerA Pro Ex", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5397, "FUS1ON Tournament Controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x541a, "PowerA Xbox One Mini Wired Controller", 0, XTYPE_XBOXONE }, { 0x24c6, 0x542a, "Xbox ONE spectra", 0, XTYPE_XBOXONE }, { 0x24c6, 0x543a, "PowerA Xbox One wired controller", 0, XTYPE_XBOXONE }, { 0x24c6, 0x5500, "Hori XBOX 360 EX 2 with Turbo", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5501, "Hori Real Arcade Pro VX-SA", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5502, "Hori Fighting Stick VX Alt", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x24c6, 0x5503, "Hori Fighting Edge", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x24c6, 0x5506, "Hori SOULCALIBUR V Stick", 0, XTYPE_XBOX360 }, { 0x24c6, 0x550d, "Hori GEM Xbox controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x550e, "Hori Real Arcade Pro V Kai 360", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x24c6, 0x5510, "Hori Fighting Commander ONE (Xbox 360/PC Mode)", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x24c6, 0x551a, "PowerA FUSION Pro Controller", 0, XTYPE_XBOXONE }, { 0x24c6, 0x561a, "PowerA FUSION Controller", 0, XTYPE_XBOXONE }, { 0x24c6, 0x581a, "ThrustMaster XB1 Classic Controller", 0, XTYPE_XBOXONE }, { 0x24c6, 0x5b00, "ThrustMaster Ferrari 458 Racing Wheel", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5b02, "Thrustmaster, Inc. GPX Controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5b03, "Thrustmaster Ferrari 458 Racing Wheel", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5d04, "Razer Sabertooth", 0, XTYPE_XBOX360 }, { 0x24c6, 0xfafe, "Rock Candy Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x2563, 0x058d, "OneXPlayer Gamepad", 0, XTYPE_XBOX360 }, { 0x294b, 0x3303, "Snakebyte GAMEPAD BASE X", 0, XTYPE_XBOXONE }, { 0x294b, 0x3404, "Snakebyte GAMEPAD RGB X", 0, XTYPE_XBOXONE }, { 0x2993, 0x2001, "TECNO Pocket Go", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x2000, "8BitDo Pro 2 Wired Controller fox Xbox", 0, XTYPE_XBOXONE }, { 0x2dc8, 0x200f, "8BitDo Ultimate 3-mode Controller for Xbox", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x2dc8, 0x3106, "8BitDo Ultimate Wireless / Pro 2 Wired Controller", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x3109, "8BitDo Ultimate Wireless Bluetooth", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x310a, "8BitDo Ultimate 2C Wireless Controller", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x310b, "8BitDo Ultimate 2 Wireless Controller", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x6001, "8BitDo SN30 Pro", 0, XTYPE_XBOX360 }, { 0x2e24, 0x0423, "Hyperkin DuchesS Xbox One pad", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x2e24, 0x0652, "Hyperkin Duke X-Box One pad", 0, XTYPE_XBOXONE }, { 0x2e24, 0x1688, "Hyperkin X91 X-Box One pad", 0, XTYPE_XBOXONE }, { 0x2e95, 0x0504, "SCUF Gaming Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE }, { 0x31e3, 0x1100, "Wooting One", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1200, "Wooting Two", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1210, "Wooting Lekker", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1220, "Wooting Two HE", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1230, "Wooting Two HE (ARM)", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1300, "Wooting 60HE (AVR)", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1310, "Wooting 60HE (ARM)", 0, XTYPE_XBOX360 }, { 0x3285, 0x0603, "Nacon Pro Compact controller for Xbox", 0, XTYPE_XBOXONE }, { 0x3285, 0x0607, "Nacon GC-100", 0, XTYPE_XBOX360 }, { 0x3285, 0x0614, "Nacon Pro Compact", 0, XTYPE_XBOXONE }, { 0x3285, 0x0646, "Nacon Pro Compact", 0, XTYPE_XBOXONE }, { 0x3285, 0x0662, "Nacon Revolution5 Pro", 0, XTYPE_XBOX360 }, { 0x3285, 0x0663, "Nacon Evol-X", 0, XTYPE_XBOXONE }, { 0x3537, 0x1004, "GameSir T4 Kaleid", 0, XTYPE_XBOX360 }, { 0x3537, 0x1010, "GameSir G7 SE", 0, XTYPE_XBOXONE }, { 0x3651, 0x1000, "CRKD SG", 0, XTYPE_XBOX360 }, { 0x366c, 0x0005, "ByoWave Proteus Controller", MAP_SHARE_BUTTON, XTYPE_XBOXONE, FLAG_DELAY_INIT }, { 0x3767, 0x0101, "Fanatec Speedster 3 Forceshock Wheel", 0, XTYPE_XBOX }, { 0x37d7, 0x2501, "Flydigi Apex 5", 0, XTYPE_XBOX360 }, { 0x413d, 0x2104, "Black Shark Green Ghost Gamepad", 0, XTYPE_XBOX360 }, { 0xffff, 0xffff, "Chinese-made Xbox Controller", 0, XTYPE_XBOX }, { 0x0000, 0x0000, "Generic X-Box pad", 0, XTYPE_UNKNOWN } }; /* buttons shared with xbox and xbox360 */ static const signed short xpad_common_btn[] = { BTN_A, BTN_B, BTN_X, BTN_Y, /* "analog" buttons */ BTN_START, BTN_SELECT, BTN_THUMBL, BTN_THUMBR, /* start/back/sticks */ -1 /* terminating entry */ }; /* original xbox controllers only */ static const signed short xpad_btn[] = { BTN_C, BTN_Z, /* "analog" buttons */ -1 /* terminating entry */ }; /* used when dpad is mapped to buttons */ static const signed short xpad_btn_pad[] = { BTN_DPAD_LEFT, BTN_DPAD_RIGHT, /* d-pad left, right */ BTN_DPAD_UP, BTN_DPAD_DOWN, /* d-pad up, down */ -1 /* terminating entry */ }; /* used when triggers are mapped to buttons */ static const signed short xpad_btn_triggers[] = { BTN_TL2, BTN_TR2, /* triggers left/right */ -1 }; static const signed short xpad360_btn[] = { /* buttons for x360 controller */ BTN_TL, BTN_TR, /* Button LB/RB */ BTN_MODE, /* The big X button */ -1 }; static const signed short xpad_abs[] = { ABS_X, ABS_Y, /* left stick */ ABS_RX, ABS_RY, /* right stick */ -1 /* terminating entry */ }; /* used when dpad is mapped to axes */ static const signed short xpad_abs_pad[] = { ABS_HAT0X, ABS_HAT0Y, /* d-pad axes */ -1 /* terminating entry */ }; /* used when triggers are mapped to axes */ static const signed short xpad_abs_triggers[] = { ABS_Z, ABS_RZ, /* triggers left/right */ -1 }; /* used when the controller has extra paddle buttons */ static const signed short xpad_btn_paddles[] = { BTN_GRIPR, BTN_GRIPR2, /* paddle upper right, lower right */ BTN_GRIPL, BTN_GRIPL2, /* paddle upper left, lower left */ -1 /* terminating entry */ }; /* * Xbox 360 has a vendor-specific class, so we cannot match it with only * USB_INTERFACE_INFO (also specifically refused by USB subsystem), so we * match against vendor id as well. Wired Xbox 360 devices have protocol 1, * wireless controllers have protocol 129. */ #define XPAD_XBOX360_VENDOR_PROTOCOL(vend, pr) \ .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_INFO, \ .idVendor = (vend), \ .bInterfaceClass = USB_CLASS_VENDOR_SPEC, \ .bInterfaceSubClass = 93, \ .bInterfaceProtocol = (pr) #define XPAD_XBOX360_VENDOR(vend) \ { XPAD_XBOX360_VENDOR_PROTOCOL((vend), 1) }, \ { XPAD_XBOX360_VENDOR_PROTOCOL((vend), 129) } /* The Xbox One controller uses subclass 71 and protocol 208. */ #define XPAD_XBOXONE_VENDOR_PROTOCOL(vend, pr) \ .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_INFO, \ .idVendor = (vend), \ .bInterfaceClass = USB_CLASS_VENDOR_SPEC, \ .bInterfaceSubClass = 71, \ .bInterfaceProtocol = (pr) #define XPAD_XBOXONE_VENDOR(vend) \ { XPAD_XBOXONE_VENDOR_PROTOCOL((vend), 208) } static const struct usb_device_id xpad_table[] = { /* * Please keep this list sorted by vendor ID. Note that there are 2 * macros - XPAD_XBOX360_VENDOR and XPAD_XBOXONE_VENDOR. */ { USB_INTERFACE_INFO('X', 'B', 0) }, /* Xbox USB-IF not-approved class */ XPAD_XBOX360_VENDOR(0x0079), /* GPD Win 2 controller */ XPAD_XBOX360_VENDOR(0x0351), /* CRKD Controllers */ XPAD_XBOX360_VENDOR(0x03eb), /* Wooting Keyboards (Legacy) */ XPAD_XBOX360_VENDOR(0x03f0), /* HP HyperX Xbox 360 controllers */ XPAD_XBOXONE_VENDOR(0x03f0), /* HP HyperX Xbox One controllers */ XPAD_XBOX360_VENDOR(0x044f), /* Thrustmaster Xbox 360 controllers */ XPAD_XBOXONE_VENDOR(0x044f), /* Thrustmaster Xbox One controllers */ XPAD_XBOX360_VENDOR(0x045e), /* Microsoft Xbox 360 controllers */ XPAD_XBOXONE_VENDOR(0x045e), /* Microsoft Xbox One controllers */ XPAD_XBOX360_VENDOR(0x046d), /* Logitech Xbox 360-style controllers */ XPAD_XBOX360_VENDOR(0x0502), /* Acer Inc. Xbox 360 style controllers */ XPAD_XBOX360_VENDOR(0x056e), /* Elecom JC-U3613M */ XPAD_XBOX360_VENDOR(0x06a3), /* Saitek P3600 */ XPAD_XBOX360_VENDOR(0x0738), /* Mad Catz Xbox 360 controllers */ { USB_DEVICE(0x0738, 0x4540) }, /* Mad Catz Beat Pad */ XPAD_XBOXONE_VENDOR(0x0738), /* Mad Catz FightStick TE 2 */ XPAD_XBOX360_VENDOR(0x07ff), /* Mad Catz Gamepad */ XPAD_XBOXONE_VENDOR(0x0b05), /* ASUS controllers */ XPAD_XBOX360_VENDOR(0x0c12), /* Zeroplus X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x0db0), /* Micro Star International X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x0e6f), /* 0x0e6f Xbox 360 controllers */ XPAD_XBOXONE_VENDOR(0x0e6f), /* 0x0e6f Xbox One controllers */ XPAD_XBOX360_VENDOR(0x0f0d), /* Hori controllers */ XPAD_XBOXONE_VENDOR(0x0f0d), /* Hori controllers */ XPAD_XBOX360_VENDOR(0x1038), /* SteelSeries controllers */ XPAD_XBOXONE_VENDOR(0x10f5), /* Turtle Beach Controllers */ XPAD_XBOX360_VENDOR(0x11c9), /* Nacon GC100XF */ XPAD_XBOX360_VENDOR(0x11ff), /* PXN V900 */ XPAD_XBOX360_VENDOR(0x1209), /* Ardwiino Controllers */ XPAD_XBOX360_VENDOR(0x12ab), /* Xbox 360 dance pads */ XPAD_XBOX360_VENDOR(0x1430), /* RedOctane Xbox 360 controllers */ XPAD_XBOX360_VENDOR(0x146b), /* Bigben Interactive controllers */ XPAD_XBOX360_VENDOR(0x1532), /* Razer Sabertooth */ XPAD_XBOXONE_VENDOR(0x1532), /* Razer Wildcat */ XPAD_XBOX360_VENDOR(0x15e4), /* Numark Xbox 360 controllers */ XPAD_XBOX360_VENDOR(0x162e), /* Joytech Xbox 360 controllers */ XPAD_XBOX360_VENDOR(0x1689), /* Razer Onza */ XPAD_XBOX360_VENDOR(0x17ef), /* Lenovo */ XPAD_XBOX360_VENDOR(0x1949), /* Amazon controllers */ XPAD_XBOX360_VENDOR(0x1a86), /* Nanjing Qinheng Microelectronics (WCH) */ XPAD_XBOX360_VENDOR(0x1bad), /* Harmonix Rock Band guitar and drums */ XPAD_XBOX360_VENDOR(0x1ee9), /* ZOTAC Technology Limited */ XPAD_XBOX360_VENDOR(0x20d6), /* PowerA controllers */ XPAD_XBOXONE_VENDOR(0x20d6), /* PowerA controllers */ XPAD_XBOX360_VENDOR(0x2345), /* Machenike Controllers */ XPAD_XBOX360_VENDOR(0x24c6), /* PowerA controllers */ XPAD_XBOXONE_VENDOR(0x24c6), /* PowerA controllers */ XPAD_XBOX360_VENDOR(0x2563), /* OneXPlayer Gamepad */ XPAD_XBOX360_VENDOR(0x260d), /* Dareu H101 */ XPAD_XBOXONE_VENDOR(0x294b), /* Snakebyte */ XPAD_XBOX360_VENDOR(0x2993), /* TECNO Mobile */ XPAD_XBOX360_VENDOR(0x2c22), /* Qanba Controllers */ XPAD_XBOX360_VENDOR(0x2dc8), /* 8BitDo Controllers */ XPAD_XBOXONE_VENDOR(0x2dc8), /* 8BitDo Controllers */ XPAD_XBOXONE_VENDOR(0x2e24), /* Hyperkin Controllers */ XPAD_XBOX360_VENDOR(0x2f24), /* GameSir Controllers */ XPAD_XBOXONE_VENDOR(0x2e95), /* SCUF Gaming Controller */ XPAD_XBOX360_VENDOR(0x31e3), /* Wooting Keyboards */ XPAD_XBOX360_VENDOR(0x3285), /* Nacon GC-100 */ XPAD_XBOXONE_VENDOR(0x3285), /* Nacon Evol-X */ XPAD_XBOX360_VENDOR(0x3537), /* GameSir Controllers */ XPAD_XBOXONE_VENDOR(0x3537), /* GameSir Controllers */ XPAD_XBOX360_VENDOR(0x3651), /* CRKD Controllers */ XPAD_XBOXONE_VENDOR(0x366c), /* ByoWave controllers */ XPAD_XBOX360_VENDOR(0x37d7), /* Flydigi Controllers */ XPAD_XBOX360_VENDOR(0x413d), /* Black Shark Green Ghost Controller */ { } }; MODULE_DEVICE_TABLE(usb, xpad_table); struct xboxone_init_packet { u16 idVendor; u16 idProduct; const u8 *data; u8 len; }; #define XBOXONE_INIT_PKT(_vid, _pid, _data) \ { \ .idVendor = (_vid), \ .idProduct = (_pid), \ .data = (_data), \ .len = ARRAY_SIZE(_data), \ } /* * starting with xbox one, the game input protocol is used * magic numbers are taken from * - https://github.com/xpadneo/gip-dissector/blob/main/src/gip-dissector.lua * - https://github.com/medusalix/xone/blob/master/bus/protocol.c */ #define GIP_CMD_ACK 0x01 #define GIP_CMD_ANNOUNCE 0x02 #define GIP_CMD_IDENTIFY 0x04 #define GIP_CMD_POWER 0x05 #define GIP_CMD_AUTHENTICATE 0x06 #define GIP_CMD_VIRTUAL_KEY 0x07 #define GIP_CMD_RUMBLE 0x09 #define GIP_CMD_LED 0x0a #define GIP_CMD_FIRMWARE 0x0c #define GIP_CMD_INPUT 0x20 #define GIP_SEQ0 0x00 #define GIP_OPT_ACK 0x10 #define GIP_OPT_INTERNAL 0x20 /* * length of the command payload encoded with * https://en.wikipedia.org/wiki/LEB128 * which is a no-op for N < 128 */ #define GIP_PL_LEN(N) (N) /* * payload specific defines */ #define GIP_PWR_ON 0x00 #define GIP_LED_ON 0x01 #define GIP_MOTOR_R BIT(0) #define GIP_MOTOR_L BIT(1) #define GIP_MOTOR_RT BIT(2) #define GIP_MOTOR_LT BIT(3) #define GIP_MOTOR_ALL (GIP_MOTOR_R | GIP_MOTOR_L | GIP_MOTOR_RT | GIP_MOTOR_LT) #define GIP_WIRED_INTF_DATA 0 #define GIP_WIRED_INTF_AUDIO 1 /* * This packet is required for all Xbox One pads with 2015 * or later firmware installed (or present from the factory). */ static const u8 xboxone_power_on[] = { GIP_CMD_POWER, GIP_OPT_INTERNAL, GIP_SEQ0, GIP_PL_LEN(1), GIP_PWR_ON }; /* * This packet is required for Xbox One S (0x045e:0x02ea) * and Xbox One Elite Series 2 (0x045e:0x0b00) pads to * initialize the controller that was previously used in * Bluetooth mode. */ static const u8 xboxone_s_init[] = { GIP_CMD_POWER, GIP_OPT_INTERNAL, GIP_SEQ0, 0x0f, 0x06 }; /* * This packet is required to get additional input data * from Xbox One Elite Series 2 (0x045e:0x0b00) pads. * We mostly do this right now to get paddle data */ static const u8 extra_input_packet_init[] = { 0x4d, 0x10, 0x01, 0x02, 0x07, 0x00 }; /* * This packet is required for the Titanfall 2 Xbox One pads * (0x0e6f:0x0165) to finish initialization and for Hori pads * (0x0f0d:0x0067) to make the analog sticks work. */ static const u8 xboxone_hori_ack_id[] = { GIP_CMD_ACK, GIP_OPT_INTERNAL, GIP_SEQ0, GIP_PL_LEN(9), 0x00, GIP_CMD_IDENTIFY, GIP_OPT_INTERNAL, 0x3a, 0x00, 0x00, 0x00, 0x80, 0x00 }; /* * This packet is sent by default on Windows, and is required for some pads to * start sending input reports, including most (all?) of the PDP. These pads * include: (0x0e6f:0x02ab), (0x0e6f:0x02a4), (0x0e6f:0x02a6). */ static const u8 xboxone_led_on[] = { GIP_CMD_LED, GIP_OPT_INTERNAL, GIP_SEQ0, GIP_PL_LEN(3), 0x00, GIP_LED_ON, 0x14 }; /* * This packet is required for most (all?) of the PDP pads to start * sending input reports. These pads include: (0x0e6f:0x02ab), * (0x0e6f:0x02a4), (0x0e6f:0x02a6). */ static const u8 xboxone_auth_done[] = { GIP_CMD_AUTHENTICATE, GIP_OPT_INTERNAL, GIP_SEQ0, GIP_PL_LEN(2), 0x01, 0x00 }; /* * A specific rumble packet is required for some PowerA pads to start * sending input reports. One of those pads is (0x24c6:0x543a). */ static const u8 xboxone_rumblebegin_init[] = { GIP_CMD_RUMBLE, 0x00, GIP_SEQ0, GIP_PL_LEN(9), 0x00, GIP_MOTOR_ALL, 0x00, 0x00, 0x1D, 0x1D, 0xFF, 0x00, 0x00 }; /* * A rumble packet with zero FF intensity will immediately * terminate the rumbling required to init PowerA pads. * This should happen fast enough that the motors don't * spin up to enough speed to actually vibrate the gamepad. */ static const u8 xboxone_rumbleend_init[] = { GIP_CMD_RUMBLE, 0x00, GIP_SEQ0, GIP_PL_LEN(9), 0x00, GIP_MOTOR_ALL, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; /* * This specifies the selection of init packets that a gamepad * will be sent on init *and* the order in which they will be * sent. The correct sequence number will be added when the * packet is going to be sent. */ static const struct xboxone_init_packet xboxone_init_packets[] = { XBOXONE_INIT_PKT(0x0e6f, 0x0165, xboxone_hori_ack_id), XBOXONE_INIT_PKT(0x0f0d, 0x0067, xboxone_hori_ack_id), XBOXONE_INIT_PKT(0x0000, 0x0000, xboxone_power_on), XBOXONE_INIT_PKT(0x045e, 0x02ea, xboxone_s_init), XBOXONE_INIT_PKT(0x045e, 0x0b00, xboxone_s_init), XBOXONE_INIT_PKT(0x045e, 0x0b00, extra_input_packet_init), XBOXONE_INIT_PKT(0x0000, 0x0000, xboxone_led_on), XBOXONE_INIT_PKT(0x0000, 0x0000, xboxone_auth_done), XBOXONE_INIT_PKT(0x24c6, 0x541a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x542a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x543a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x541a, xboxone_rumbleend_init), XBOXONE_INIT_PKT(0x24c6, 0x542a, xboxone_rumbleend_init), XBOXONE_INIT_PKT(0x24c6, 0x543a, xboxone_rumbleend_init), }; struct xpad_output_packet { u8 data[XPAD_PKT_LEN]; u8 len; bool pending; }; #define XPAD_OUT_CMD_IDX 0 #define XPAD_OUT_FF_IDX 1 #define XPAD_OUT_LED_IDX (1 + IS_ENABLED(CONFIG_JOYSTICK_XPAD_FF)) #define XPAD_NUM_OUT_PACKETS (1 + \ IS_ENABLED(CONFIG_JOYSTICK_XPAD_FF) + \ IS_ENABLED(CONFIG_JOYSTICK_XPAD_LEDS)) struct usb_xpad { struct input_dev *dev; /* input device interface */ struct input_dev __rcu *x360w_dev; struct usb_device *udev; /* usb device */ struct usb_interface *intf; /* usb interface */ bool pad_present; bool input_created; struct urb *irq_in; /* urb for interrupt in report */ unsigned char *idata; /* input data */ dma_addr_t idata_dma; struct urb *irq_out; /* urb for interrupt out report */ struct usb_anchor irq_out_anchor; bool irq_out_active; /* we must not use an active URB */ u8 odata_serial; /* serial number for xbox one protocol */ unsigned char *odata; /* output data */ dma_addr_t odata_dma; spinlock_t odata_lock; struct xpad_output_packet out_packets[XPAD_NUM_OUT_PACKETS]; int last_out_packet; int init_seq; #if defined(CONFIG_JOYSTICK_XPAD_LEDS) struct xpad_led *led; #endif char phys[64]; /* physical device path */ int mapping; /* map d-pad to buttons or to axes */ int xtype; /* type of xbox device */ int packet_type; /* type of the extended packet */ int pad_nr; /* the order x360 pads were attached */ const char *name; /* name of the device */ struct work_struct work; /* init/remove device from callback */ time64_t mode_btn_down_ts; bool delay_init; /* init packets should be delayed */ bool delayed_init_done; }; static int xpad_init_input(struct usb_xpad *xpad); static void xpad_deinit_input(struct usb_xpad *xpad); static int xpad_start_input(struct usb_xpad *xpad); static void xpadone_ack_mode_report(struct usb_xpad *xpad, u8 seq_num); static void xpad360w_poweroff_controller(struct usb_xpad *xpad); /* * xpad_process_packet * * Completes a request by converting the data into events for the * input subsystem. * * The used report descriptor was taken from ITO Takayuki's website: * http://euc.jp/periphs/xbox-controller.ja.html */ static void xpad_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data) { struct input_dev *dev = xpad->dev; if (!(xpad->mapping & MAP_STICKS_TO_NULL)) { /* left stick */ input_report_abs(dev, ABS_X, (__s16) le16_to_cpup((__le16 *)(data + 12))); input_report_abs(dev, ABS_Y, ~(__s16) le16_to_cpup((__le16 *)(data + 14))); /* right stick */ input_report_abs(dev, ABS_RX, (__s16) le16_to_cpup((__le16 *)(data + 16))); input_report_abs(dev, ABS_RY, ~(__s16) le16_to_cpup((__le16 *)(data + 18))); } /* triggers left/right */ if (xpad->mapping & MAP_TRIGGERS_TO_BUTTONS) { input_report_key(dev, BTN_TL2, data[10]); input_report_key(dev, BTN_TR2, data[11]); } else { input_report_abs(dev, ABS_Z, data[10]); input_report_abs(dev, ABS_RZ, data[11]); } /* digital pad */ if (xpad->mapping & MAP_DPAD_TO_BUTTONS) { /* dpad as buttons (left, right, up, down) */ input_report_key(dev, BTN_DPAD_LEFT, data[2] & BIT(2)); input_report_key(dev, BTN_DPAD_RIGHT, data[2] & BIT(3)); input_report_key(dev, BTN_DPAD_UP, data[2] & BIT(0)); input_report_key(dev, BTN_DPAD_DOWN, data[2] & BIT(1)); } else { input_report_abs(dev, ABS_HAT0X, !!(data[2] & 0x08) - !!(data[2] & 0x04)); input_report_abs(dev, ABS_HAT0Y, !!(data[2] & 0x02) - !!(data[2] & 0x01)); } /* start/back buttons and stick press left/right */ input_report_key(dev, BTN_START, data[2] & BIT(4)); input_report_key(dev, BTN_SELECT, data[2] & BIT(5)); input_report_key(dev, BTN_THUMBL, data[2] & BIT(6)); input_report_key(dev, BTN_THUMBR, data[2] & BIT(7)); /* "analog" buttons A, B, X, Y */ input_report_key(dev, BTN_A, data[4]); input_report_key(dev, BTN_B, data[5]); input_report_key(dev, BTN_X, data[6]); input_report_key(dev, BTN_Y, data[7]); /* "analog" buttons black, white */ input_report_key(dev, BTN_C, data[8]); input_report_key(dev, BTN_Z, data[9]); input_sync(dev); } /* * xpad360_process_packet * * Completes a request by converting the data into events for the * input subsystem. It is version for xbox 360 controller * * The used report descriptor was taken from: * http://www.free60.org/wiki/Gamepad */ static void xpad360_process_packet(struct usb_xpad *xpad, struct input_dev *dev, u16 cmd, unsigned char *data) { /* valid pad data */ if (data[0] != 0x00) return; /* digital pad */ if (xpad->mapping & MAP_DPAD_TO_BUTTONS) { /* dpad as buttons (left, right, up, down) */ input_report_key(dev, BTN_DPAD_LEFT, data[2] & BIT(2)); input_report_key(dev, BTN_DPAD_RIGHT, data[2] & BIT(3)); input_report_key(dev, BTN_DPAD_UP, data[2] & BIT(0)); input_report_key(dev, BTN_DPAD_DOWN, data[2] & BIT(1)); } /* * This should be a simple else block. However historically * xbox360w has mapped DPAD to buttons while xbox360 did not. This * made no sense, but now we can not just switch back and have to * support both behaviors. */ if (!(xpad->mapping & MAP_DPAD_TO_BUTTONS) || xpad->xtype == XTYPE_XBOX360W) { input_report_abs(dev, ABS_HAT0X, !!(data[2] & 0x08) - !!(data[2] & 0x04)); input_report_abs(dev, ABS_HAT0Y, !!(data[2] & 0x02) - !!(data[2] & 0x01)); } /* start/back buttons */ input_report_key(dev, BTN_START, data[2] & BIT(4)); input_report_key(dev, BTN_SELECT, data[2] & BIT(5)); /* stick press left/right */ input_report_key(dev, BTN_THUMBL, data[2] & BIT(6)); input_report_key(dev, BTN_THUMBR, data[2] & BIT(7)); /* buttons A,B,X,Y,TL,TR and MODE */ input_report_key(dev, BTN_A, data[3] & BIT(4)); input_report_key(dev, BTN_B, data[3] & BIT(5)); input_report_key(dev, BTN_X, data[3] & BIT(6)); input_report_key(dev, BTN_Y, data[3] & BIT(7)); input_report_key(dev, BTN_TL, data[3] & BIT(0)); input_report_key(dev, BTN_TR, data[3] & BIT(1)); input_report_key(dev, BTN_MODE, data[3] & BIT(2)); if (!(xpad->mapping & MAP_STICKS_TO_NULL)) { /* left stick */ input_report_abs(dev, ABS_X, (__s16) le16_to_cpup((__le16 *)(data + 6))); input_report_abs(dev, ABS_Y, ~(__s16) le16_to_cpup((__le16 *)(data + 8))); /* right stick */ input_report_abs(dev, ABS_RX, (__s16) le16_to_cpup((__le16 *)(data + 10))); input_report_abs(dev, ABS_RY, ~(__s16) le16_to_cpup((__le16 *)(data + 12))); } /* triggers left/right */ if (xpad->mapping & MAP_TRIGGERS_TO_BUTTONS) { input_report_key(dev, BTN_TL2, data[4]); input_report_key(dev, BTN_TR2, data[5]); } else { input_report_abs(dev, ABS_Z, data[4]); input_report_abs(dev, ABS_RZ, data[5]); } input_sync(dev); /* XBOX360W controllers can't be turned off without driver assistance */ if (xpad->xtype == XTYPE_XBOX360W) { if (xpad->mode_btn_down_ts > 0 && xpad->pad_present && ((ktime_get_seconds() - xpad->mode_btn_down_ts) >= XPAD360W_POWEROFF_TIMEOUT)) { xpad360w_poweroff_controller(xpad); xpad->mode_btn_down_ts = 0; return; } /* mode button down/up */ if (data[3] & BIT(2)) xpad->mode_btn_down_ts = ktime_get_seconds(); else xpad->mode_btn_down_ts = 0; } } static void xpad_presence_work(struct work_struct *work) { struct usb_xpad *xpad = container_of(work, struct usb_xpad, work); int error; if (xpad->pad_present) { error = xpad_init_input(xpad); if (error) { /* complain only, not much else we can do here */ dev_err(&xpad->dev->dev, "unable to init device: %d\n", error); } else { rcu_assign_pointer(xpad->x360w_dev, xpad->dev); } } else { RCU_INIT_POINTER(xpad->x360w_dev, NULL); synchronize_rcu(); /* * Now that we are sure xpad360w_process_packet is not * using input device we can get rid of it. */ xpad_deinit_input(xpad); } } /* * xpad360w_process_packet * * Completes a request by converting the data into events for the * input subsystem. It is version for xbox 360 wireless controller. * * Byte.Bit * 00.1 - Status change: The controller or headset has connected/disconnected * Bits 01.7 and 01.6 are valid * 01.7 - Controller present * 01.6 - Headset present * 01.1 - Pad state (Bytes 4+) valid * */ static void xpad360w_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data) { struct input_dev *dev; bool present; /* Presence change */ if (data[0] & 0x08) { present = (data[1] & 0x80) != 0; if (xpad->pad_present != present) { xpad->pad_present = present; schedule_work(&xpad->work); } } /* Valid pad data */ if (data[1] != 0x1) return; rcu_read_lock(); dev = rcu_dereference(xpad->x360w_dev); if (dev) xpad360_process_packet(xpad, dev, cmd, &data[4]); rcu_read_unlock(); } /* * xpadone_process_packet * * Completes a request by converting the data into events for the * input subsystem. This version is for the Xbox One controller. * * The report format was gleaned from * https://github.com/kylelemons/xbox/blob/master/xbox.go */ static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data, u32 len) { struct input_dev *dev = xpad->dev; bool do_sync = false; /* the xbox button has its own special report */ if (data[0] == GIP_CMD_VIRTUAL_KEY) { /* * The Xbox One S controller requires these reports to be * acked otherwise it continues sending them forever and * won't report further mode button events. */ if (data[1] == (GIP_OPT_ACK | GIP_OPT_INTERNAL)) xpadone_ack_mode_report(xpad, data[2]); input_report_key(dev, BTN_MODE, data[4] & GENMASK(1, 0)); input_sync(dev); do_sync = true; } else if (data[0] == GIP_CMD_FIRMWARE) { /* Some packet formats force us to use this separate to poll paddle inputs */ if (xpad->packet_type == PKT_XBE2_FW_5_11) { /* Mute paddles if controller is in a custom profile slot * Checked by looking at the active profile slot to * verify it's the default slot */ if (data[19] != 0) data[18] = 0; /* Elite Series 2 split packet paddle bits */ input_report_key(dev, BTN_GRIPR, data[18] & BIT(0)); input_report_key(dev, BTN_GRIPR2, data[18] & BIT(1)); input_report_key(dev, BTN_GRIPL, data[18] & BIT(2)); input_report_key(dev, BTN_GRIPL2, data[18] & BIT(3)); do_sync = true; } } else if (data[0] == GIP_CMD_ANNOUNCE) { int error; if (xpad->delay_init && !xpad->delayed_init_done) { xpad->delayed_init_done = true; error = xpad_start_input(xpad); if (error) dev_warn(&xpad->dev->dev, "unable to start delayed input: %d\n", error); } } else if (data[0] == GIP_CMD_INPUT) { /* The main valid packet type for inputs */ /* menu/view buttons */ input_report_key(dev, BTN_START, data[4] & BIT(2)); input_report_key(dev, BTN_SELECT, data[4] & BIT(3)); if (xpad->mapping & MAP_SHARE_BUTTON) { if (xpad->mapping & MAP_SHARE_OFFSET) input_report_key(dev, KEY_RECORD, data[len - 26] & BIT(0)); else input_report_key(dev, KEY_RECORD, data[len - 18] & BIT(0)); } /* buttons A,B,X,Y */ input_report_key(dev, BTN_A, data[4] & BIT(4)); input_report_key(dev, BTN_B, data[4] & BIT(5)); input_report_key(dev, BTN_X, data[4] & BIT(6)); input_report_key(dev, BTN_Y, data[4] & BIT(7)); /* digital pad */ if (xpad->mapping & MAP_DPAD_TO_BUTTONS) { /* dpad as buttons (left, right, up, down) */ input_report_key(dev, BTN_DPAD_LEFT, data[5] & BIT(2)); input_report_key(dev, BTN_DPAD_RIGHT, data[5] & BIT(3)); input_report_key(dev, BTN_DPAD_UP, data[5] & BIT(0)); input_report_key(dev, BTN_DPAD_DOWN, data[5] & BIT(1)); } else { input_report_abs(dev, ABS_HAT0X, !!(data[5] & 0x08) - !!(data[5] & 0x04)); input_report_abs(dev, ABS_HAT0Y, !!(data[5] & 0x02) - !!(data[5] & 0x01)); } /* TL/TR */ input_report_key(dev, BTN_TL, data[5] & BIT(4)); input_report_key(dev, BTN_TR, data[5] & BIT(5)); /* stick press left/right */ input_report_key(dev, BTN_THUMBL, data[5] & BIT(6)); input_report_key(dev, BTN_THUMBR, data[5] & BIT(7)); if (!(xpad->mapping & MAP_STICKS_TO_NULL)) { /* left stick */ input_report_abs(dev, ABS_X, (__s16) le16_to_cpup((__le16 *)(data + 10))); input_report_abs(dev, ABS_Y, ~(__s16) le16_to_cpup((__le16 *)(data + 12))); /* right stick */ input_report_abs(dev, ABS_RX, (__s16) le16_to_cpup((__le16 *)(data + 14))); input_report_abs(dev, ABS_RY, ~(__s16) le16_to_cpup((__le16 *)(data + 16))); } /* triggers left/right */ if (xpad->mapping & MAP_TRIGGERS_TO_BUTTONS) { input_report_key(dev, BTN_TL2, (__u16) le16_to_cpup((__le16 *)(data + 6))); input_report_key(dev, BTN_TR2, (__u16) le16_to_cpup((__le16 *)(data + 8))); } else { input_report_abs(dev, ABS_Z, (__u16) le16_to_cpup((__le16 *)(data + 6))); input_report_abs(dev, ABS_RZ, (__u16) le16_to_cpup((__le16 *)(data + 8))); } /* Profile button has a value of 0-3, so it is reported as an axis */ if (xpad->mapping & MAP_PROFILE_BUTTON) input_report_abs(dev, ABS_PROFILE, data[34]); /* paddle handling */ /* based on SDL's SDL_hidapi_xboxone.c */ if (xpad->mapping & MAP_PADDLES) { if (xpad->packet_type == PKT_XBE1) { /* Mute paddles if controller has a custom mapping applied. * Checked by comparing the current mapping * config against the factory mapping config */ if (memcmp(&data[4], &data[18], 2) != 0) data[32] = 0; /* OG Elite Series Controller paddle bits */ input_report_key(dev, BTN_GRIPR, data[32] & BIT(1)); input_report_key(dev, BTN_GRIPR2, data[32] & BIT(3)); input_report_key(dev, BTN_GRIPL, data[32] & BIT(0)); input_report_key(dev, BTN_GRIPL2, data[32] & BIT(2)); } else if (xpad->packet_type == PKT_XBE2_FW_OLD) { /* Mute paddles if controller has a custom mapping applied. * Checked by comparing the current mapping * config against the factory mapping config */ if (data[19] != 0) data[18] = 0; /* Elite Series 2 4.x firmware paddle bits */ input_report_key(dev, BTN_GRIPR, data[18] & BIT(0)); input_report_key(dev, BTN_GRIPR2, data[18] & BIT(1)); input_report_key(dev, BTN_GRIPL, data[18] & BIT(2)); input_report_key(dev, BTN_GRIPL2, data[18] & BIT(3)); } else if (xpad->packet_type == PKT_XBE2_FW_5_EARLY) { /* Mute paddles if controller has a custom mapping applied. * Checked by comparing the current mapping * config against the factory mapping config */ if (data[23] != 0) data[22] = 0; /* Elite Series 2 5.x firmware paddle bits * (before the packet was split) */ input_report_key(dev, BTN_GRIPR, data[22] & BIT(0)); input_report_key(dev, BTN_GRIPR2, data[22] & BIT(1)); input_report_key(dev, BTN_GRIPL, data[22] & BIT(2)); input_report_key(dev, BTN_GRIPL2, data[22] & BIT(3)); } } do_sync = true; } if (do_sync) input_sync(dev); } static void xpad_irq_in(struct urb *urb) { struct usb_xpad *xpad = urb->context; struct device *dev = &xpad->intf->dev; int retval, status; status = urb->status; switch (status) { case 0: /* success */ break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ dev_dbg(dev, "%s - urb shutting down with status: %d\n", __func__, status); return; default: dev_dbg(dev, "%s - nonzero urb status received: %d\n", __func__, status); goto exit; } switch (xpad->xtype) { case XTYPE_XBOX360: xpad360_process_packet(xpad, xpad->dev, 0, xpad->idata); break; case XTYPE_XBOX360W: xpad360w_process_packet(xpad, 0, xpad->idata); break; case XTYPE_XBOXONE: xpadone_process_packet(xpad, 0, xpad->idata, urb->actual_length); break; default: xpad_process_packet(xpad, 0, xpad->idata); } exit: retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) dev_err(dev, "%s - usb_submit_urb failed with result %d\n", __func__, retval); } /* Callers must hold xpad->odata_lock spinlock */ static bool xpad_prepare_next_init_packet(struct usb_xpad *xpad) { const struct xboxone_init_packet *init_packet; if (xpad->xtype != XTYPE_XBOXONE) return false; /* * Some dongles will discard init packets if they're sent before the * controller connects. In these cases, we need to wait until we get * an announce packet from them to send the init packet sequence. */ if (xpad->delay_init && !xpad->delayed_init_done) return false; /* Perform initialization sequence for Xbox One pads that require it */ while (xpad->init_seq < ARRAY_SIZE(xboxone_init_packets)) { init_packet = &xboxone_init_packets[xpad->init_seq++]; if (init_packet->idVendor != 0 && init_packet->idVendor != xpad->dev->id.vendor) continue; if (init_packet->idProduct != 0 && init_packet->idProduct != xpad->dev->id.product) continue; /* This packet applies to our device, so prepare to send it */ memcpy(xpad->odata, init_packet->data, init_packet->len); xpad->irq_out->transfer_buffer_length = init_packet->len; /* Update packet with current sequence number */ xpad->odata[2] = xpad->odata_serial++; return true; } return false; } /* Callers must hold xpad->odata_lock spinlock */ static bool xpad_prepare_next_out_packet(struct usb_xpad *xpad) { struct xpad_output_packet *pkt, *packet = NULL; int i; /* We may have init packets to send before we can send user commands */ if (xpad_prepare_next_init_packet(xpad)) return true; for (i = 0; i < XPAD_NUM_OUT_PACKETS; i++) { if (++xpad->last_out_packet >= XPAD_NUM_OUT_PACKETS) xpad->last_out_packet = 0; pkt = &xpad->out_packets[xpad->last_out_packet]; if (pkt->pending) { dev_dbg(&xpad->intf->dev, "%s - found pending output packet %d\n", __func__, xpad->last_out_packet); packet = pkt; break; } } if (packet) { memcpy(xpad->odata, packet->data, packet->len); xpad->irq_out->transfer_buffer_length = packet->len; packet->pending = false; return true; } return false; } /* Callers must hold xpad->odata_lock spinlock */ static int xpad_try_sending_next_out_packet(struct usb_xpad *xpad) { int error; if (!xpad->irq_out_active && xpad_prepare_next_out_packet(xpad)) { usb_anchor_urb(xpad->irq_out, &xpad->irq_out_anchor); error = usb_submit_urb(xpad->irq_out, GFP_ATOMIC); if (error) { if (error != -ENODEV) dev_err(&xpad->intf->dev, "%s - usb_submit_urb failed with result %d\n", __func__, error); usb_unanchor_urb(xpad->irq_out); return error; } xpad->irq_out_active = true; } return 0; } static void xpad_irq_out(struct urb *urb) { struct usb_xpad *xpad = urb->context; struct device *dev = &xpad->intf->dev; int status = urb->status; int error; guard(spinlock_irqsave)(&xpad->odata_lock); switch (status) { case 0: /* success */ xpad->irq_out_active = xpad_prepare_next_out_packet(xpad); break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ dev_dbg(dev, "%s - urb shutting down with status: %d\n", __func__, status); xpad->irq_out_active = false; break; default: dev_dbg(dev, "%s - nonzero urb status received: %d\n", __func__, status); break; } if (xpad->irq_out_active) { usb_anchor_urb(urb, &xpad->irq_out_anchor); error = usb_submit_urb(urb, GFP_ATOMIC); if (error) { dev_err(dev, "%s - usb_submit_urb failed with result %d\n", __func__, error); usb_unanchor_urb(urb); xpad->irq_out_active = false; } } } static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad, struct usb_endpoint_descriptor *ep_irq_out) { int error; if (xpad->xtype == XTYPE_UNKNOWN) return 0; init_usb_anchor(&xpad->irq_out_anchor); xpad->odata = usb_alloc_coherent(xpad->udev, XPAD_PKT_LEN, GFP_KERNEL, &xpad->odata_dma); if (!xpad->odata) return -ENOMEM; spin_lock_init(&xpad->odata_lock); xpad->irq_out = usb_alloc_urb(0, GFP_KERNEL); if (!xpad->irq_out) { error = -ENOMEM; goto err_free_coherent; } usb_fill_int_urb(xpad->irq_out, xpad->udev, usb_sndintpipe(xpad->udev, ep_irq_out->bEndpointAddress), xpad->odata, XPAD_PKT_LEN, xpad_irq_out, xpad, ep_irq_out->bInterval); xpad->irq_out->transfer_dma = xpad->odata_dma; xpad->irq_out->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; return 0; err_free_coherent: usb_free_coherent(xpad->udev, XPAD_PKT_LEN, xpad->odata, xpad->odata_dma); return error; } static void xpad_stop_output(struct usb_xpad *xpad) { if (xpad->xtype != XTYPE_UNKNOWN) { if (!usb_wait_anchor_empty_timeout(&xpad->irq_out_anchor, 5000)) { dev_warn(&xpad->intf->dev, "timed out waiting for output URB to complete, killing\n"); usb_kill_anchored_urbs(&xpad->irq_out_anchor); } } } static void xpad_deinit_output(struct usb_xpad *xpad) { if (xpad->xtype != XTYPE_UNKNOWN) { usb_free_urb(xpad->irq_out); usb_free_coherent(xpad->udev, XPAD_PKT_LEN, xpad->odata, xpad->odata_dma); } } static int xpad_inquiry_pad_presence(struct usb_xpad *xpad) { struct xpad_output_packet *packet = &xpad->out_packets[XPAD_OUT_CMD_IDX]; guard(spinlock_irqsave)(&xpad->odata_lock); packet->data[0] = 0x08; packet->data[1] = 0x00; packet->data[2] = 0x0F; packet->data[3] = 0xC0; packet->data[4] = 0x00; packet->data[5] = 0x00; packet->data[6] = 0x00; packet->data[7] = 0x00; packet->data[8] = 0x00; packet->data[9] = 0x00; packet->data[10] = 0x00; packet->data[11] = 0x00; packet->len = 12; packet->pending = true; /* Reset the sequence so we send out presence first */ xpad->last_out_packet = -1; return xpad_try_sending_next_out_packet(xpad); } static int xpad_start_xbox_one(struct usb_xpad *xpad) { int error; if (usb_ifnum_to_if(xpad->udev, GIP_WIRED_INTF_AUDIO)) { /* * Explicitly disable the audio interface. This is needed * for some controllers, such as the PowerA Enhanced Wired * Controller for Series X|S (0x20d6:0x200e) to report the * guide button. */ error = usb_set_interface(xpad->udev, GIP_WIRED_INTF_AUDIO, 0); if (error) dev_warn(&xpad->dev->dev, "unable to disable audio interface: %d\n", error); } guard(spinlock_irqsave)(&xpad->odata_lock); /* * Begin the init sequence by attempting to send a packet. * We will cycle through the init packet sequence before * sending any packets from the output ring. */ xpad->init_seq = 0; return xpad_try_sending_next_out_packet(xpad); } static void xpadone_ack_mode_report(struct usb_xpad *xpad, u8 seq_num) { struct xpad_output_packet *packet = &xpad->out_packets[XPAD_OUT_CMD_IDX]; static const u8 mode_report_ack[] = { GIP_CMD_ACK, GIP_OPT_INTERNAL, GIP_SEQ0, GIP_PL_LEN(9), 0x00, GIP_CMD_VIRTUAL_KEY, GIP_OPT_INTERNAL, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00 }; guard(spinlock_irqsave)(&xpad->odata_lock); packet->len = sizeof(mode_report_ack); memcpy(packet->data, mode_report_ack, packet->len); packet->data[2] = seq_num; packet->pending = true; /* Reset the sequence so we send out the ack now */ xpad->last_out_packet = -1; xpad_try_sending_next_out_packet(xpad); } #ifdef CONFIG_JOYSTICK_XPAD_FF static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect *effect) { struct usb_xpad *xpad = input_get_drvdata(dev); struct xpad_output_packet *packet = &xpad->out_packets[XPAD_OUT_FF_IDX]; __u16 strong; __u16 weak; if (effect->type != FF_RUMBLE) return 0; strong = effect->u.rumble.strong_magnitude; weak = effect->u.rumble.weak_magnitude; guard(spinlock_irqsave)(&xpad->odata_lock); switch (xpad->xtype) { case XTYPE_XBOX: packet->data[0] = 0x00; packet->data[1] = 0x06; packet->data[2] = 0x00; packet->data[3] = strong / 256; /* left actuator */ packet->data[4] = 0x00; packet->data[5] = weak / 256; /* right actuator */ packet->len = 6; packet->pending = true; break; case XTYPE_XBOX360: packet->data[0] = 0x00; packet->data[1] = 0x08; packet->data[2] = 0x00; packet->data[3] = strong / 256; /* left actuator? */ packet->data[4] = weak / 256; /* right actuator? */ packet->data[5] = 0x00; packet->data[6] = 0x00; packet->data[7] = 0x00; packet->len = 8; packet->pending = true; break; case XTYPE_XBOX360W: packet->data[0] = 0x00; packet->data[1] = 0x01; packet->data[2] = 0x0F; packet->data[3] = 0xC0; packet->data[4] = 0x00; packet->data[5] = strong / 256; packet->data[6] = weak / 256; packet->data[7] = 0x00; packet->data[8] = 0x00; packet->data[9] = 0x00; packet->data[10] = 0x00; packet->data[11] = 0x00; packet->len = 12; packet->pending = true; break; case XTYPE_XBOXONE: packet->data[0] = GIP_CMD_RUMBLE; /* activate rumble */ packet->data[1] = 0x00; packet->data[2] = xpad->odata_serial++; packet->data[3] = GIP_PL_LEN(9); packet->data[4] = 0x00; packet->data[5] = GIP_MOTOR_ALL; packet->data[6] = 0x00; /* left trigger */ packet->data[7] = 0x00; /* right trigger */ packet->data[8] = strong / 512; /* left actuator */ packet->data[9] = weak / 512; /* right actuator */ packet->data[10] = 0xFF; /* on period */ packet->data[11] = 0x00; /* off period */ packet->data[12] = 0xFF; /* repeat count */ packet->len = 13; packet->pending = true; break; default: dev_dbg(&xpad->dev->dev, "%s - rumble command sent to unsupported xpad type: %d\n", __func__, xpad->xtype); return -EINVAL; } return xpad_try_sending_next_out_packet(xpad); } static int xpad_init_ff(struct usb_xpad *xpad) { if (xpad->xtype == XTYPE_UNKNOWN) return 0; input_set_capability(xpad->dev, EV_FF, FF_RUMBLE); return input_ff_create_memless(xpad->dev, NULL, xpad_play_effect); } #else static int xpad_init_ff(struct usb_xpad *xpad) { return 0; } #endif #if defined(CONFIG_JOYSTICK_XPAD_LEDS) #include <linux/leds.h> #include <linux/idr.h> static DEFINE_IDA(xpad_pad_seq); struct xpad_led { char name[16]; struct led_classdev led_cdev; struct usb_xpad *xpad; }; /* * set the LEDs on Xbox 360 / Wireless Controllers * @param command * 0: off * 1: all blink, then previous setting * 2: 1/top-left blink, then on * 3: 2/top-right blink, then on * 4: 3/bottom-left blink, then on * 5: 4/bottom-right blink, then on * 6: 1/top-left on * 7: 2/top-right on * 8: 3/bottom-left on * 9: 4/bottom-right on * 10: rotate * 11: blink, based on previous setting * 12: slow blink, based on previous setting * 13: rotate with two lights * 14: persistent slow all blink * 15: blink once, then previous setting */ static void xpad_send_led_command(struct usb_xpad *xpad, int command) { struct xpad_output_packet *packet = &xpad->out_packets[XPAD_OUT_LED_IDX]; command %= 16; guard(spinlock_irqsave)(&xpad->odata_lock); switch (xpad->xtype) { case XTYPE_XBOX360: packet->data[0] = 0x01; packet->data[1] = 0x03; packet->data[2] = command; packet->len = 3; packet->pending = true; break; case XTYPE_XBOX360W: packet->data[0] = 0x00; packet->data[1] = 0x00; packet->data[2] = 0x08; packet->data[3] = 0x40 + command; packet->data[4] = 0x00; packet->data[5] = 0x00; packet->data[6] = 0x00; packet->data[7] = 0x00; packet->data[8] = 0x00; packet->data[9] = 0x00; packet->data[10] = 0x00; packet->data[11] = 0x00; packet->len = 12; packet->pending = true; break; } xpad_try_sending_next_out_packet(xpad); } /* * Light up the segment corresponding to the pad number on * Xbox 360 Controllers. */ static void xpad_identify_controller(struct usb_xpad *xpad) { led_set_brightness(&xpad->led->led_cdev, (xpad->pad_nr % 4) + 2); } static void xpad_led_set(struct led_classdev *led_cdev, enum led_brightness value) { struct xpad_led *xpad_led = container_of(led_cdev, struct xpad_led, led_cdev); xpad_send_led_command(xpad_led->xpad, value); } static int xpad_led_probe(struct usb_xpad *xpad) { struct xpad_led *led; struct led_classdev *led_cdev; int error; if (xpad->xtype != XTYPE_XBOX360 && xpad->xtype != XTYPE_XBOX360W) return 0; xpad->led = led = kzalloc(sizeof(*led), GFP_KERNEL); if (!led) return -ENOMEM; xpad->pad_nr = ida_alloc(&xpad_pad_seq, GFP_KERNEL); if (xpad->pad_nr < 0) { error = xpad->pad_nr; goto err_free_mem; } snprintf(led->name, sizeof(led->name), "xpad%d", xpad->pad_nr); led->xpad = xpad; led_cdev = &led->led_cdev; led_cdev->name = led->name; led_cdev->brightness_set = xpad_led_set; led_cdev->flags = LED_CORE_SUSPENDRESUME; error = led_classdev_register(&xpad->udev->dev, led_cdev); if (error) goto err_free_id; xpad_identify_controller(xpad); return 0; err_free_id: ida_free(&xpad_pad_seq, xpad->pad_nr); err_free_mem: kfree(led); xpad->led = NULL; return error; } static void xpad_led_disconnect(struct usb_xpad *xpad) { struct xpad_led *xpad_led = xpad->led; if (xpad_led) { led_classdev_unregister(&xpad_led->led_cdev); ida_free(&xpad_pad_seq, xpad->pad_nr); kfree(xpad_led); } } #else static int xpad_led_probe(struct usb_xpad *xpad) { return 0; } static void xpad_led_disconnect(struct usb_xpad *xpad) { } #endif static int xpad_start_input(struct usb_xpad *xpad) { int error; if (usb_submit_urb(xpad->irq_in, GFP_KERNEL)) return -EIO; if (xpad->xtype == XTYPE_XBOXONE) { error = xpad_start_xbox_one(xpad); if (error) { usb_kill_urb(xpad->irq_in); return error; } } if (xpad->xtype == XTYPE_XBOX360) { /* * Some third-party controllers Xbox 360-style controllers * require this message to finish initialization. */ u8 dummy[20]; error = usb_control_msg_recv(xpad->udev, 0, /* bRequest */ 0x01, /* bmRequestType */ USB_TYPE_VENDOR | USB_DIR_IN | USB_RECIP_INTERFACE, /* wValue */ 0x100, /* wIndex */ 0x00, dummy, sizeof(dummy), 25, GFP_KERNEL); if (error) dev_warn(&xpad->dev->dev, "unable to receive magic message: %d\n", error); } return 0; } static void xpad_stop_input(struct usb_xpad *xpad) { usb_kill_urb(xpad->irq_in); } static void xpad360w_poweroff_controller(struct usb_xpad *xpad) { struct xpad_output_packet *packet = &xpad->out_packets[XPAD_OUT_CMD_IDX]; guard(spinlock_irqsave)(&xpad->odata_lock); packet->data[0] = 0x00; packet->data[1] = 0x00; packet->data[2] = 0x08; packet->data[3] = 0xC0; packet->data[4] = 0x00; packet->data[5] = 0x00; packet->data[6] = 0x00; packet->data[7] = 0x00; packet->data[8] = 0x00; packet->data[9] = 0x00; packet->data[10] = 0x00; packet->data[11] = 0x00; packet->len = 12; packet->pending = true; /* Reset the sequence so we send out poweroff now */ xpad->last_out_packet = -1; xpad_try_sending_next_out_packet(xpad); } static int xpad360w_start_input(struct usb_xpad *xpad) { int error; error = usb_submit_urb(xpad->irq_in, GFP_KERNEL); if (error) return -EIO; /* * Send presence packet. * This will force the controller to resend connection packets. * This is useful in the case we activate the module after the * adapter has been plugged in, as it won't automatically * send us info about the controllers. */ error = xpad_inquiry_pad_presence(xpad); if (error) { usb_kill_urb(xpad->irq_in); return error; } return 0; } static void xpad360w_stop_input(struct usb_xpad *xpad) { usb_kill_urb(xpad->irq_in); /* Make sure we are done with presence work if it was scheduled */ flush_work(&xpad->work); } static int xpad_open(struct input_dev *dev) { struct usb_xpad *xpad = input_get_drvdata(dev); return xpad_start_input(xpad); } static void xpad_close(struct input_dev *dev) { struct usb_xpad *xpad = input_get_drvdata(dev); xpad_stop_input(xpad); } static void xpad_set_up_abs(struct input_dev *input_dev, signed short abs) { struct usb_xpad *xpad = input_get_drvdata(input_dev); switch (abs) { case ABS_X: case ABS_Y: case ABS_RX: case ABS_RY: /* the two sticks */ input_set_abs_params(input_dev, abs, -32768, 32767, 16, 128); break; case ABS_Z: case ABS_RZ: /* the triggers (if mapped to axes) */ if (xpad->xtype == XTYPE_XBOXONE) input_set_abs_params(input_dev, abs, 0, 1023, 0, 0); else input_set_abs_params(input_dev, abs, 0, 255, 0, 0); break; case ABS_HAT0X: case ABS_HAT0Y: /* the d-pad (only if dpad is mapped to axes */ input_set_abs_params(input_dev, abs, -1, 1, 0, 0); break; case ABS_PROFILE: /* 4 value profile button (such as on XAC) */ input_set_abs_params(input_dev, abs, 0, 4, 0, 0); break; default: input_set_abs_params(input_dev, abs, 0, 0, 0, 0); break; } } static void xpad_deinit_input(struct usb_xpad *xpad) { if (xpad->input_created) { xpad->input_created = false; xpad_led_disconnect(xpad); input_unregister_device(xpad->dev); } } static int xpad_init_input(struct usb_xpad *xpad) { struct input_dev *input_dev; int i, error; input_dev = input_allocate_device(); if (!input_dev) return -ENOMEM; xpad->dev = input_dev; input_dev->name = xpad->name; input_dev->phys = xpad->phys; usb_to_input_id(xpad->udev, &input_dev->id); if (xpad->xtype == XTYPE_XBOX360W) { /* x360w controllers and the receiver have different ids */ input_dev->id.product = 0x02a1; } input_dev->dev.parent = &xpad->intf->dev; input_set_drvdata(input_dev, xpad); if (xpad->xtype != XTYPE_XBOX360W) { input_dev->open = xpad_open; input_dev->close = xpad_close; } if (!(xpad->mapping & MAP_STICKS_TO_NULL)) { /* set up axes */ for (i = 0; xpad_abs[i] >= 0; i++) xpad_set_up_abs(input_dev, xpad_abs[i]); } /* set up standard buttons */ for (i = 0; xpad_common_btn[i] >= 0; i++) input_set_capability(input_dev, EV_KEY, xpad_common_btn[i]); /* set up model-specific ones */ if (xpad->xtype == XTYPE_XBOX360 || xpad->xtype == XTYPE_XBOX360W || xpad->xtype == XTYPE_XBOXONE) { for (i = 0; xpad360_btn[i] >= 0; i++) input_set_capability(input_dev, EV_KEY, xpad360_btn[i]); if (xpad->mapping & MAP_SHARE_BUTTON) input_set_capability(input_dev, EV_KEY, KEY_RECORD); } else { for (i = 0; xpad_btn[i] >= 0; i++) input_set_capability(input_dev, EV_KEY, xpad_btn[i]); } if (xpad->mapping & MAP_DPAD_TO_BUTTONS) { for (i = 0; xpad_btn_pad[i] >= 0; i++) input_set_capability(input_dev, EV_KEY, xpad_btn_pad[i]); } /* set up paddles if the controller has them */ if (xpad->mapping & MAP_PADDLES) { for (i = 0; xpad_btn_paddles[i] >= 0; i++) input_set_capability(input_dev, EV_KEY, xpad_btn_paddles[i]); } /* * This should be a simple else block. However historically * xbox360w has mapped DPAD to buttons while xbox360 did not. This * made no sense, but now we can not just switch back and have to * support both behaviors. */ if (!(xpad->mapping & MAP_DPAD_TO_BUTTONS) || xpad->xtype == XTYPE_XBOX360W) { for (i = 0; xpad_abs_pad[i] >= 0; i++) xpad_set_up_abs(input_dev, xpad_abs_pad[i]); } if (xpad->mapping & MAP_TRIGGERS_TO_BUTTONS) { for (i = 0; xpad_btn_triggers[i] >= 0; i++) input_set_capability(input_dev, EV_KEY, xpad_btn_triggers[i]); } else { for (i = 0; xpad_abs_triggers[i] >= 0; i++) xpad_set_up_abs(input_dev, xpad_abs_triggers[i]); } /* setup profile button as an axis with 4 possible values */ if (xpad->mapping & MAP_PROFILE_BUTTON) xpad_set_up_abs(input_dev, ABS_PROFILE); error = xpad_init_ff(xpad); if (error) goto err_free_input; error = xpad_led_probe(xpad); if (error) goto err_destroy_ff; error = input_register_device(xpad->dev); if (error) goto err_disconnect_led; xpad->input_created = true; return 0; err_disconnect_led: xpad_led_disconnect(xpad); err_destroy_ff: input_ff_destroy(input_dev); err_free_input: input_free_device(input_dev); return error; } static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(intf); struct usb_xpad *xpad; struct usb_endpoint_descriptor *ep_irq_in, *ep_irq_out; int i, error; if (intf->cur_altsetting->desc.bNumEndpoints != 2) return -ENODEV; for (i = 0; xpad_device[i].idVendor; i++) { if ((le16_to_cpu(udev->descriptor.idVendor) == xpad_device[i].idVendor) && (le16_to_cpu(udev->descriptor.idProduct) == xpad_device[i].idProduct)) break; } xpad = kzalloc(sizeof(*xpad), GFP_KERNEL); if (!xpad) return -ENOMEM; usb_make_path(udev, xpad->phys, sizeof(xpad->phys)); strlcat(xpad->phys, "/input0", sizeof(xpad->phys)); xpad->idata = usb_alloc_coherent(udev, XPAD_PKT_LEN, GFP_KERNEL, &xpad->idata_dma); if (!xpad->idata) { error = -ENOMEM; goto err_free_mem; } xpad->irq_in = usb_alloc_urb(0, GFP_KERNEL); if (!xpad->irq_in) { error = -ENOMEM; goto err_free_idata; } xpad->udev = udev; xpad->intf = intf; xpad->mapping = xpad_device[i].mapping; xpad->xtype = xpad_device[i].xtype; xpad->name = xpad_device[i].name; if (xpad_device[i].flags & FLAG_DELAY_INIT) xpad->delay_init = true; xpad->packet_type = PKT_XB; INIT_WORK(&xpad->work, xpad_presence_work); if (xpad->xtype == XTYPE_UNKNOWN) { if (intf->cur_altsetting->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC) { if (intf->cur_altsetting->desc.bInterfaceProtocol == 129) xpad->xtype = XTYPE_XBOX360W; else if (intf->cur_altsetting->desc.bInterfaceProtocol == 208) xpad->xtype = XTYPE_XBOXONE; else xpad->xtype = XTYPE_XBOX360; } else { xpad->xtype = XTYPE_XBOX; } if (dpad_to_buttons) xpad->mapping |= MAP_DPAD_TO_BUTTONS; if (triggers_to_buttons) xpad->mapping |= MAP_TRIGGERS_TO_BUTTONS; if (sticks_to_null) xpad->mapping |= MAP_STICKS_TO_NULL; } if (xpad->xtype == XTYPE_XBOXONE && intf->cur_altsetting->desc.bInterfaceNumber != GIP_WIRED_INTF_DATA) { /* * The Xbox One controller lists three interfaces all with the * same interface class, subclass and protocol. Differentiate by * interface number. */ error = -ENODEV; goto err_free_in_urb; } ep_irq_in = ep_irq_out = NULL; for (i = 0; i < 2; i++) { struct usb_endpoint_descriptor *ep = &intf->cur_altsetting->endpoint[i].desc; if (usb_endpoint_xfer_int(ep)) { if (usb_endpoint_dir_in(ep)) ep_irq_in = ep; else ep_irq_out = ep; } } if (!ep_irq_in || !ep_irq_out) { error = -ENODEV; goto err_free_in_urb; } error = xpad_init_output(intf, xpad, ep_irq_out); if (error) goto err_free_in_urb; usb_fill_int_urb(xpad->irq_in, udev, usb_rcvintpipe(udev, ep_irq_in->bEndpointAddress), xpad->idata, XPAD_PKT_LEN, xpad_irq_in, xpad, ep_irq_in->bInterval); xpad->irq_in->transfer_dma = xpad->idata_dma; xpad->irq_in->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; usb_set_intfdata(intf, xpad); /* Packet type detection */ if (le16_to_cpu(udev->descriptor.idVendor) == 0x045e) { /* Microsoft controllers */ if (le16_to_cpu(udev->descriptor.idProduct) == 0x02e3) { /* The original elite controller always uses the oldest * type of extended packet */ xpad->packet_type = PKT_XBE1; } else if (le16_to_cpu(udev->descriptor.idProduct) == 0x0b00) { /* The elite 2 controller has seen multiple packet * revisions. These are tied to specific firmware * versions */ if (le16_to_cpu(udev->descriptor.bcdDevice) < 0x0500) { /* This is the format that the Elite 2 used * prior to the BLE update */ xpad->packet_type = PKT_XBE2_FW_OLD; } else if (le16_to_cpu(udev->descriptor.bcdDevice) < 0x050b) { /* This is the format that the Elite 2 used * prior to the update that split the packet */ xpad->packet_type = PKT_XBE2_FW_5_EARLY; } else { /* The split packet format that was introduced * in firmware v5.11 */ xpad->packet_type = PKT_XBE2_FW_5_11; } } } if (xpad->xtype == XTYPE_XBOX360W) { /* * Submit the int URB immediately rather than waiting for open * because we get status messages from the device whether * or not any controllers are attached. In fact, it's * exactly the message that a controller has arrived that * we're waiting for. */ error = xpad360w_start_input(xpad); if (error) goto err_deinit_output; /* * Wireless controllers require RESET_RESUME to work properly * after suspend. Ideally this quirk should be in usb core * quirk list, but we have too many vendors producing these * controllers and we'd need to maintain 2 identical lists * here in this driver and in usb core. */ udev->quirks |= USB_QUIRK_RESET_RESUME; } else { error = xpad_init_input(xpad); if (error) goto err_deinit_output; } return 0; err_deinit_output: xpad_deinit_output(xpad); err_free_in_urb: usb_free_urb(xpad->irq_in); err_free_idata: usb_free_coherent(udev, XPAD_PKT_LEN, xpad->idata, xpad->idata_dma); err_free_mem: kfree(xpad); return error; } static void xpad_disconnect(struct usb_interface *intf) { struct usb_xpad *xpad = usb_get_intfdata(intf); if (xpad->xtype == XTYPE_XBOX360W) xpad360w_stop_input(xpad); xpad_deinit_input(xpad); /* * Now that both input device and LED device are gone we can * stop output URB. */ xpad_stop_output(xpad); xpad_deinit_output(xpad); usb_free_urb(xpad->irq_in); usb_free_coherent(xpad->udev, XPAD_PKT_LEN, xpad->idata, xpad->idata_dma); kfree(xpad); usb_set_intfdata(intf, NULL); } static int xpad_suspend(struct usb_interface *intf, pm_message_t message) { struct usb_xpad *xpad = usb_get_intfdata(intf); struct input_dev *input = xpad->dev; if (xpad->xtype == XTYPE_XBOX360W) { /* * Wireless controllers always listen to input so * they are notified when controller shows up * or goes away. */ xpad360w_stop_input(xpad); /* * The wireless adapter is going off now, so the * gamepads are going to become disconnected. * Unless explicitly disabled, power them down * so they don't just sit there flashing. */ if (auto_poweroff && xpad->pad_present) xpad360w_poweroff_controller(xpad); } else { guard(mutex)(&input->mutex); if (input_device_enabled(input)) xpad_stop_input(xpad); } xpad_stop_output(xpad); return 0; } static int xpad_resume(struct usb_interface *intf) { struct usb_xpad *xpad = usb_get_intfdata(intf); struct input_dev *input = xpad->dev; xpad->delayed_init_done = false; if (xpad->xtype == XTYPE_XBOX360W) return xpad360w_start_input(xpad); guard(mutex)(&input->mutex); if (input_device_enabled(input)) return xpad_start_input(xpad); if (xpad->xtype == XTYPE_XBOXONE) { /* * Even if there are no users, we'll send Xbox One pads * the startup sequence so they don't sit there and * blink until somebody opens the input device again. */ return xpad_start_xbox_one(xpad); } return 0; } static struct usb_driver xpad_driver = { .name = "xpad", .probe = xpad_probe, .disconnect = xpad_disconnect, .suspend = xpad_suspend, .resume = xpad_resume, .id_table = xpad_table, }; module_usb_driver(xpad_driver); MODULE_AUTHOR("Marko Friedemann <mfr@bmx-chemnitz.de>"); MODULE_DESCRIPTION("Xbox pad driver"); MODULE_LICENSE("GPL");
2 2 2 2 6 6 5 7 5 1 6 1 4 2 4 2 9 6 6 4 1 2 1 1 8 2 1 1 1 1 1 2 2 2 1 10 9 1 6 9 4 9 6 6 5 6 6 4 4 1 1 1 1 4 2 2 2 12 5 1 1 6 4 2 3 1 2 2 180 1 1 1 4 1 1 1 11 1 8 1 4 1 2 1 6 3 140 7 3 7 5 5 5 2 6 7 7 7 7 7 7 3 2 1 3 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 9 8 1 45 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 // SPDX-License-Identifier: GPL-2.0-only /* By Ross Biro 1/23/92 */ /* * Pentium III FXSR, SSE support * Gareth Hughes <gareth@valinux.com>, May 2000 */ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> #include <linux/mm.h> #include <linux/smp.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/ptrace.h> #include <linux/user.h> #include <linux/elf.h> #include <linux/security.h> #include <linux/audit.h> #include <linux/seccomp.h> #include <linux/signal.h> #include <linux/perf_event.h> #include <linux/hw_breakpoint.h> #include <linux/rcupdate.h> #include <linux/export.h> #include <linux/context_tracking.h> #include <linux/nospec.h> #include <linux/uaccess.h> #include <asm/processor.h> #include <asm/fpu/signal.h> #include <asm/fpu/regset.h> #include <asm/fpu/xstate.h> #include <asm/debugreg.h> #include <asm/ldt.h> #include <asm/desc.h> #include <asm/prctl.h> #include <asm/proto.h> #include <asm/hw_breakpoint.h> #include <asm/traps.h> #include <asm/syscall.h> #include <asm/fsgsbase.h> #include <asm/io_bitmap.h> #include "tls.h" enum x86_regset_32 { REGSET32_GENERAL, REGSET32_FP, REGSET32_XFP, REGSET32_XSTATE, REGSET32_TLS, REGSET32_IOPERM, }; enum x86_regset_64 { REGSET64_GENERAL, REGSET64_FP, REGSET64_IOPERM, REGSET64_XSTATE, REGSET64_SSP, }; #define REGSET_GENERAL \ ({ \ BUILD_BUG_ON((int)REGSET32_GENERAL != (int)REGSET64_GENERAL); \ REGSET32_GENERAL; \ }) #define REGSET_FP \ ({ \ BUILD_BUG_ON((int)REGSET32_FP != (int)REGSET64_FP); \ REGSET32_FP; \ }) struct pt_regs_offset { const char *name; int offset; }; #define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} #define REG_OFFSET_END {.name = NULL, .offset = 0} static const struct pt_regs_offset regoffset_table[] = { #ifdef CONFIG_X86_64 REG_OFFSET_NAME(r15), REG_OFFSET_NAME(r14), REG_OFFSET_NAME(r13), REG_OFFSET_NAME(r12), REG_OFFSET_NAME(r11), REG_OFFSET_NAME(r10), REG_OFFSET_NAME(r9), REG_OFFSET_NAME(r8), #endif REG_OFFSET_NAME(bx), REG_OFFSET_NAME(cx), REG_OFFSET_NAME(dx), REG_OFFSET_NAME(si), REG_OFFSET_NAME(di), REG_OFFSET_NAME(bp), REG_OFFSET_NAME(ax), #ifdef CONFIG_X86_32 REG_OFFSET_NAME(ds), REG_OFFSET_NAME(es), REG_OFFSET_NAME(fs), REG_OFFSET_NAME(gs), #endif REG_OFFSET_NAME(orig_ax), REG_OFFSET_NAME(ip), REG_OFFSET_NAME(cs), REG_OFFSET_NAME(flags), REG_OFFSET_NAME(sp), REG_OFFSET_NAME(ss), REG_OFFSET_END, }; /** * regs_query_register_offset() - query register offset from its name * @name: the name of a register * * regs_query_register_offset() returns the offset of a register in struct * pt_regs from its name. If the name is invalid, this returns -EINVAL; */ int regs_query_register_offset(const char *name) { const struct pt_regs_offset *roff; for (roff = regoffset_table; roff->name != NULL; roff++) if (!strcmp(roff->name, name)) return roff->offset; return -EINVAL; } /** * regs_query_register_name() - query register name from its offset * @offset: the offset of a register in struct pt_regs. * * regs_query_register_name() returns the name of a register from its * offset in struct pt_regs. If the @offset is invalid, this returns NULL; */ const char *regs_query_register_name(unsigned int offset) { const struct pt_regs_offset *roff; for (roff = regoffset_table; roff->name != NULL; roff++) if (roff->offset == offset) return roff->name; return NULL; } /* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. */ /* * Determines which flags the user has access to [1 = access, 0 = no access]. */ #define FLAG_MASK_32 ((unsigned long) \ (X86_EFLAGS_CF | X86_EFLAGS_PF | \ X86_EFLAGS_AF | X86_EFLAGS_ZF | \ X86_EFLAGS_SF | X86_EFLAGS_TF | \ X86_EFLAGS_DF | X86_EFLAGS_OF | \ X86_EFLAGS_RF | X86_EFLAGS_AC)) /* * Determines whether a value may be installed in a segment register. */ static inline bool invalid_selector(u16 value) { return unlikely(value != 0 && (value & SEGMENT_RPL_MASK) != USER_RPL); } #ifdef CONFIG_X86_32 #define FLAG_MASK FLAG_MASK_32 static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) { BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); return &regs->bx + (regno >> 2); } static u16 get_segment_reg(struct task_struct *task, unsigned long offset) { /* * Returning the value truncates it to 16 bits. */ unsigned int retval; if (offset != offsetof(struct user_regs_struct, gs)) retval = *pt_regs_access(task_pt_regs(task), offset); else { if (task == current) savesegment(gs, retval); else retval = task->thread.gs; } return retval; } static int set_segment_reg(struct task_struct *task, unsigned long offset, u16 value) { if (WARN_ON_ONCE(task == current)) return -EIO; /* * The value argument was already truncated to 16 bits. */ if (invalid_selector(value)) return -EIO; /* * For %cs and %ss we cannot permit a null selector. * We can permit a bogus selector as long as it has USER_RPL. * Null selectors are fine for other segment registers, but * we will never get back to user mode with invalid %cs or %ss * and will take the trap in iret instead. Much code relies * on user_mode() to distinguish a user trap frame (which can * safely use invalid selectors) from a kernel trap frame. */ switch (offset) { case offsetof(struct user_regs_struct, cs): case offsetof(struct user_regs_struct, ss): if (unlikely(value == 0)) return -EIO; fallthrough; default: *pt_regs_access(task_pt_regs(task), offset) = value; break; case offsetof(struct user_regs_struct, gs): task->thread.gs = value; } return 0; } #else /* CONFIG_X86_64 */ #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long offset) { BUILD_BUG_ON(offsetof(struct pt_regs, r15) != 0); return &regs->r15 + (offset / sizeof(regs->r15)); } static u16 get_segment_reg(struct task_struct *task, unsigned long offset) { /* * Returning the value truncates it to 16 bits. */ unsigned int seg; switch (offset) { case offsetof(struct user_regs_struct, fs): if (task == current) { /* Older gas can't assemble movq %?s,%r?? */ asm("movl %%fs,%0" : "=r" (seg)); return seg; } return task->thread.fsindex; case offsetof(struct user_regs_struct, gs): if (task == current) { asm("movl %%gs,%0" : "=r" (seg)); return seg; } return task->thread.gsindex; case offsetof(struct user_regs_struct, ds): if (task == current) { asm("movl %%ds,%0" : "=r" (seg)); return seg; } return task->thread.ds; case offsetof(struct user_regs_struct, es): if (task == current) { asm("movl %%es,%0" : "=r" (seg)); return seg; } return task->thread.es; case offsetof(struct user_regs_struct, cs): case offsetof(struct user_regs_struct, ss): break; } return *pt_regs_access(task_pt_regs(task), offset); } static int set_segment_reg(struct task_struct *task, unsigned long offset, u16 value) { if (WARN_ON_ONCE(task == current)) return -EIO; /* * The value argument was already truncated to 16 bits. */ if (invalid_selector(value)) return -EIO; /* * Writes to FS and GS will change the stored selector. Whether * this changes the segment base as well depends on whether * FSGSBASE is enabled. */ switch (offset) { case offsetof(struct user_regs_struct,fs): task->thread.fsindex = value; break; case offsetof(struct user_regs_struct,gs): task->thread.gsindex = value; break; case offsetof(struct user_regs_struct,ds): task->thread.ds = value; break; case offsetof(struct user_regs_struct,es): task->thread.es = value; break; /* * Can't actually change these in 64-bit mode. */ case offsetof(struct user_regs_struct,cs): if (unlikely(value == 0)) return -EIO; task_pt_regs(task)->cs = value; break; case offsetof(struct user_regs_struct,ss): if (unlikely(value == 0)) return -EIO; task_pt_regs(task)->ss = value; break; } return 0; } #endif /* CONFIG_X86_32 */ static unsigned long get_flags(struct task_struct *task) { unsigned long retval = task_pt_regs(task)->flags; /* * If the debugger set TF, hide it from the readout. */ if (test_tsk_thread_flag(task, TIF_FORCED_TF)) retval &= ~X86_EFLAGS_TF; return retval; } static int set_flags(struct task_struct *task, unsigned long value) { struct pt_regs *regs = task_pt_regs(task); /* * If the user value contains TF, mark that * it was not "us" (the debugger) that set it. * If not, make sure it stays set if we had. */ if (value & X86_EFLAGS_TF) clear_tsk_thread_flag(task, TIF_FORCED_TF); else if (test_tsk_thread_flag(task, TIF_FORCED_TF)) value |= X86_EFLAGS_TF; regs->flags = (regs->flags & ~FLAG_MASK) | (value & FLAG_MASK); return 0; } static int putreg(struct task_struct *child, unsigned long offset, unsigned long value) { switch (offset) { case offsetof(struct user_regs_struct, cs): case offsetof(struct user_regs_struct, ds): case offsetof(struct user_regs_struct, es): case offsetof(struct user_regs_struct, fs): case offsetof(struct user_regs_struct, gs): case offsetof(struct user_regs_struct, ss): return set_segment_reg(child, offset, value); case offsetof(struct user_regs_struct, flags): return set_flags(child, value); #ifdef CONFIG_X86_64 case offsetof(struct user_regs_struct,fs_base): if (value >= TASK_SIZE_MAX) return -EIO; x86_fsbase_write_task(child, value); return 0; case offsetof(struct user_regs_struct,gs_base): if (value >= TASK_SIZE_MAX) return -EIO; x86_gsbase_write_task(child, value); return 0; #endif } *pt_regs_access(task_pt_regs(child), offset) = value; return 0; } static unsigned long getreg(struct task_struct *task, unsigned long offset) { switch (offset) { case offsetof(struct user_regs_struct, cs): case offsetof(struct user_regs_struct, ds): case offsetof(struct user_regs_struct, es): case offsetof(struct user_regs_struct, fs): case offsetof(struct user_regs_struct, gs): case offsetof(struct user_regs_struct, ss): return get_segment_reg(task, offset); case offsetof(struct user_regs_struct, flags): return get_flags(task); #ifdef CONFIG_X86_64 case offsetof(struct user_regs_struct, fs_base): return x86_fsbase_read_task(task); case offsetof(struct user_regs_struct, gs_base): return x86_gsbase_read_task(task); #endif } return *pt_regs_access(task_pt_regs(task), offset); } static int genregs_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { int reg; for (reg = 0; to.left; reg++) membuf_store(&to, getreg(target, reg * sizeof(unsigned long))); return 0; } static int genregs_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { int ret = 0; if (kbuf) { const unsigned long *k = kbuf; while (count >= sizeof(*k) && !ret) { ret = putreg(target, pos, *k++); count -= sizeof(*k); pos += sizeof(*k); } } else { const unsigned long __user *u = ubuf; while (count >= sizeof(*u) && !ret) { unsigned long word; ret = __get_user(word, u++); if (ret) break; ret = putreg(target, pos, word); count -= sizeof(*u); pos += sizeof(*u); } } return ret; } static void ptrace_triggered(struct perf_event *bp, struct perf_sample_data *data, struct pt_regs *regs) { int i; struct thread_struct *thread = &(current->thread); /* * Store in the virtual DR6 register the fact that the breakpoint * was hit so the thread's debugger will see it. */ for (i = 0; i < HBP_NUM; i++) { if (thread->ptrace_bps[i] == bp) break; } thread->virtual_dr6 |= (DR_TRAP0 << i); } /* * Walk through every ptrace breakpoints for this thread and * build the dr7 value on top of their attributes. * */ static unsigned long ptrace_get_dr7(struct perf_event *bp[]) { int i; int dr7 = 0; struct arch_hw_breakpoint *info; for (i = 0; i < HBP_NUM; i++) { if (bp[i] && !bp[i]->attr.disabled) { info = counter_arch_bp(bp[i]); dr7 |= encode_dr7(i, info->len, info->type); } } return dr7; } static int ptrace_fill_bp_fields(struct perf_event_attr *attr, int len, int type, bool disabled) { int err, bp_len, bp_type; err = arch_bp_generic_fields(len, type, &bp_len, &bp_type); if (!err) { attr->bp_len = bp_len; attr->bp_type = bp_type; attr->disabled = disabled; } return err; } static struct perf_event * ptrace_register_breakpoint(struct task_struct *tsk, int len, int type, unsigned long addr, bool disabled) { struct perf_event_attr attr; int err; ptrace_breakpoint_init(&attr); attr.bp_addr = addr; err = ptrace_fill_bp_fields(&attr, len, type, disabled); if (err) return ERR_PTR(err); return register_user_hw_breakpoint(&attr, ptrace_triggered, NULL, tsk); } static int ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, int disabled) { struct perf_event_attr attr = bp->attr; int err; err = ptrace_fill_bp_fields(&attr, len, type, disabled); if (err) return err; return modify_user_hw_breakpoint(bp, &attr); } /* * Handle ptrace writes to debug register 7. */ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) { struct thread_struct *thread = &tsk->thread; unsigned long old_dr7; bool second_pass = false; int i, rc, ret = 0; data &= ~DR_CONTROL_RESERVED; old_dr7 = ptrace_get_dr7(thread->ptrace_bps); restore: rc = 0; for (i = 0; i < HBP_NUM; i++) { unsigned len, type; bool disabled = !decode_dr7(data, i, &len, &type); struct perf_event *bp = thread->ptrace_bps[i]; if (!bp) { if (disabled) continue; bp = ptrace_register_breakpoint(tsk, len, type, 0, disabled); if (IS_ERR(bp)) { rc = PTR_ERR(bp); break; } thread->ptrace_bps[i] = bp; continue; } rc = ptrace_modify_breakpoint(bp, len, type, disabled); if (rc) break; } /* Restore if the first pass failed, second_pass shouldn't fail. */ if (rc && !WARN_ON(second_pass)) { ret = rc; data = old_dr7; second_pass = true; goto restore; } return ret; } /* * Handle PTRACE_PEEKUSR calls for the debug register area. */ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) { struct thread_struct *thread = &tsk->thread; unsigned long val = 0; if (n < HBP_NUM) { int index = array_index_nospec(n, HBP_NUM); struct perf_event *bp = thread->ptrace_bps[index]; if (bp) val = bp->hw.info.address; } else if (n == 6) { val = thread->virtual_dr6 ^ DR6_RESERVED; /* Flip back to arch polarity */ } else if (n == 7) { val = thread->ptrace_dr7; } return val; } static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, unsigned long addr) { struct thread_struct *t = &tsk->thread; struct perf_event *bp = t->ptrace_bps[nr]; int err = 0; if (!bp) { /* * Put stub len and type to create an inactive but correct bp. * * CHECKME: the previous code returned -EIO if the addr wasn't * a valid task virtual addr. The new one will return -EINVAL in * this case. * -EINVAL may be what we want for in-kernel breakpoints users, * but -EIO looks better for ptrace, since we refuse a register * writing for the user. And anyway this is the previous * behaviour. */ bp = ptrace_register_breakpoint(tsk, X86_BREAKPOINT_LEN_1, X86_BREAKPOINT_WRITE, addr, true); if (IS_ERR(bp)) err = PTR_ERR(bp); else t->ptrace_bps[nr] = bp; } else { struct perf_event_attr attr = bp->attr; attr.bp_addr = addr; err = modify_user_hw_breakpoint(bp, &attr); } return err; } /* * Handle PTRACE_POKEUSR calls for the debug register area. */ static int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) { struct thread_struct *thread = &tsk->thread; /* There are no DR4 or DR5 registers */ int rc = -EIO; if (n < HBP_NUM) { rc = ptrace_set_breakpoint_addr(tsk, n, val); } else if (n == 6) { thread->virtual_dr6 = val ^ DR6_RESERVED; /* Flip to positive polarity */ rc = 0; } else if (n == 7) { rc = ptrace_write_dr7(tsk, val); if (!rc) thread->ptrace_dr7 = val; } return rc; } /* * These access the current or another (stopped) task's io permission * bitmap for debugging or core dump. */ static int ioperm_active(struct task_struct *target, const struct user_regset *regset) { struct io_bitmap *iobm = target->thread.io_bitmap; return iobm ? DIV_ROUND_UP(iobm->max, regset->size) : 0; } static int ioperm_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { struct io_bitmap *iobm = target->thread.io_bitmap; if (!iobm) return -ENXIO; return membuf_write(&to, iobm->bitmap, IO_BITMAP_BYTES); } /* * Called by kernel/ptrace.c when detaching.. * * Make sure the single step bit is not set. */ void ptrace_disable(struct task_struct *child) { user_disable_single_step(child); } #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION static const struct user_regset_view user_x86_32_view; /* Initialized below. */ #endif #ifdef CONFIG_X86_64 static const struct user_regset_view user_x86_64_view; /* Initialized below. */ #endif long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { int ret; unsigned long __user *datap = (unsigned long __user *)data; #ifdef CONFIG_X86_64 /* This is native 64-bit ptrace() */ const struct user_regset_view *regset_view = &user_x86_64_view; #else /* This is native 32-bit ptrace() */ const struct user_regset_view *regset_view = &user_x86_32_view; #endif switch (request) { /* read the word at location addr in the USER area. */ case PTRACE_PEEKUSR: { unsigned long tmp; ret = -EIO; if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user)) break; tmp = 0; /* Default return condition */ if (addr < sizeof(struct user_regs_struct)) tmp = getreg(child, addr); else if (addr >= offsetof(struct user, u_debugreg[0]) && addr <= offsetof(struct user, u_debugreg[7])) { addr -= offsetof(struct user, u_debugreg[0]); tmp = ptrace_get_debugreg(child, addr / sizeof(data)); } ret = put_user(tmp, datap); break; } case PTRACE_POKEUSR: /* write the word at location addr in the USER area */ ret = -EIO; if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user)) break; if (addr < sizeof(struct user_regs_struct)) ret = putreg(child, addr, data); else if (addr >= offsetof(struct user, u_debugreg[0]) && addr <= offsetof(struct user, u_debugreg[7])) { addr -= offsetof(struct user, u_debugreg[0]); ret = ptrace_set_debugreg(child, addr / sizeof(data), data); } break; case PTRACE_GETREGS: /* Get all gp regs from the child. */ return copy_regset_to_user(child, regset_view, REGSET_GENERAL, 0, sizeof(struct user_regs_struct), datap); case PTRACE_SETREGS: /* Set all gp regs in the child. */ return copy_regset_from_user(child, regset_view, REGSET_GENERAL, 0, sizeof(struct user_regs_struct), datap); case PTRACE_GETFPREGS: /* Get the child FPU state. */ return copy_regset_to_user(child, regset_view, REGSET_FP, 0, sizeof(struct user_i387_struct), datap); case PTRACE_SETFPREGS: /* Set the child FPU state. */ return copy_regset_from_user(child, regset_view, REGSET_FP, 0, sizeof(struct user_i387_struct), datap); #ifdef CONFIG_X86_32 case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */ return copy_regset_to_user(child, &user_x86_32_view, REGSET32_XFP, 0, sizeof(struct user_fxsr_struct), datap) ? -EIO : 0; case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ return copy_regset_from_user(child, &user_x86_32_view, REGSET32_XFP, 0, sizeof(struct user_fxsr_struct), datap) ? -EIO : 0; #endif #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION case PTRACE_GET_THREAD_AREA: if ((int) addr < 0) return -EIO; ret = do_get_thread_area(child, addr, (struct user_desc __user *)data); break; case PTRACE_SET_THREAD_AREA: if ((int) addr < 0) return -EIO; ret = do_set_thread_area(child, addr, (struct user_desc __user *)data, 0); break; #endif #ifdef CONFIG_X86_64 /* normal 64bit interface to access TLS data. Works just like arch_prctl, except that the arguments are reversed. */ case PTRACE_ARCH_PRCTL: ret = do_arch_prctl_64(child, data, addr); break; #endif default: ret = ptrace_request(child, request, addr, data); break; } return ret; } #ifdef CONFIG_IA32_EMULATION #include <linux/compat.h> #include <linux/syscalls.h> #include <asm/ia32.h> #include <asm/user32.h> #define R32(l,q) \ case offsetof(struct user32, regs.l): \ regs->q = value; break #define SEG32(rs) \ case offsetof(struct user32, regs.rs): \ return set_segment_reg(child, \ offsetof(struct user_regs_struct, rs), \ value); \ break static int putreg32(struct task_struct *child, unsigned regno, u32 value) { struct pt_regs *regs = task_pt_regs(child); int ret; switch (regno) { SEG32(cs); SEG32(ds); SEG32(es); /* * A 32-bit ptracer on a 64-bit kernel expects that writing * FS or GS will also update the base. This is needed for * operations like PTRACE_SETREGS to fully restore a saved * CPU state. */ case offsetof(struct user32, regs.fs): ret = set_segment_reg(child, offsetof(struct user_regs_struct, fs), value); if (ret == 0) child->thread.fsbase = x86_fsgsbase_read_task(child, value); return ret; case offsetof(struct user32, regs.gs): ret = set_segment_reg(child, offsetof(struct user_regs_struct, gs), value); if (ret == 0) child->thread.gsbase = x86_fsgsbase_read_task(child, value); return ret; SEG32(ss); R32(ebx, bx); R32(ecx, cx); R32(edx, dx); R32(edi, di); R32(esi, si); R32(ebp, bp); R32(eax, ax); R32(eip, ip); R32(esp, sp); case offsetof(struct user32, regs.orig_eax): /* * Warning: bizarre corner case fixup here. A 32-bit * debugger setting orig_eax to -1 wants to disable * syscall restart. Make sure that the syscall * restart code sign-extends orig_ax. Also make sure * we interpret the -ERESTART* codes correctly if * loaded into regs->ax in case the task is not * actually still sitting at the exit from a 32-bit * syscall with TS_COMPAT still set. */ regs->orig_ax = value; if (syscall_get_nr(child, regs) != -1) child->thread_info.status |= TS_I386_REGS_POKED; break; case offsetof(struct user32, regs.eflags): return set_flags(child, value); case offsetof(struct user32, u_debugreg[0]) ... offsetof(struct user32, u_debugreg[7]): regno -= offsetof(struct user32, u_debugreg[0]); return ptrace_set_debugreg(child, regno / 4, value); default: if (regno > sizeof(struct user32) || (regno & 3)) return -EIO; /* * Other dummy fields in the virtual user structure * are ignored */ break; } return 0; } #undef R32 #undef SEG32 #define R32(l,q) \ case offsetof(struct user32, regs.l): \ *val = regs->q; break #define SEG32(rs) \ case offsetof(struct user32, regs.rs): \ *val = get_segment_reg(child, \ offsetof(struct user_regs_struct, rs)); \ break static int getreg32(struct task_struct *child, unsigned regno, u32 *val) { struct pt_regs *regs = task_pt_regs(child); switch (regno) { SEG32(ds); SEG32(es); SEG32(fs); SEG32(gs); R32(cs, cs); R32(ss, ss); R32(ebx, bx); R32(ecx, cx); R32(edx, dx); R32(edi, di); R32(esi, si); R32(ebp, bp); R32(eax, ax); R32(orig_eax, orig_ax); R32(eip, ip); R32(esp, sp); case offsetof(struct user32, regs.eflags): *val = get_flags(child); break; case offsetof(struct user32, u_debugreg[0]) ... offsetof(struct user32, u_debugreg[7]): regno -= offsetof(struct user32, u_debugreg[0]); *val = ptrace_get_debugreg(child, regno / 4); break; default: if (regno > sizeof(struct user32) || (regno & 3)) return -EIO; /* * Other dummy fields in the virtual user structure * are ignored */ *val = 0; break; } return 0; } #undef R32 #undef SEG32 static int genregs32_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { int reg; for (reg = 0; to.left; reg++) { u32 val; getreg32(target, reg * 4, &val); membuf_store(&to, val); } return 0; } static int genregs32_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { int ret = 0; if (kbuf) { const compat_ulong_t *k = kbuf; while (count >= sizeof(*k) && !ret) { ret = putreg32(target, pos, *k++); count -= sizeof(*k); pos += sizeof(*k); } } else { const compat_ulong_t __user *u = ubuf; while (count >= sizeof(*u) && !ret) { compat_ulong_t word; ret = __get_user(word, u++); if (ret) break; ret = putreg32(target, pos, word); count -= sizeof(*u); pos += sizeof(*u); } } return ret; } static long ia32_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t caddr, compat_ulong_t cdata) { unsigned long addr = caddr; unsigned long data = cdata; void __user *datap = compat_ptr(data); int ret; __u32 val; switch (request) { case PTRACE_PEEKUSR: ret = getreg32(child, addr, &val); if (ret == 0) ret = put_user(val, (__u32 __user *)datap); break; case PTRACE_POKEUSR: ret = putreg32(child, addr, data); break; case PTRACE_GETREGS: /* Get all gp regs from the child. */ return copy_regset_to_user(child, &user_x86_32_view, REGSET_GENERAL, 0, sizeof(struct user_regs_struct32), datap); case PTRACE_SETREGS: /* Set all gp regs in the child. */ return copy_regset_from_user(child, &user_x86_32_view, REGSET_GENERAL, 0, sizeof(struct user_regs_struct32), datap); case PTRACE_GETFPREGS: /* Get the child FPU state. */ return copy_regset_to_user(child, &user_x86_32_view, REGSET_FP, 0, sizeof(struct user_i387_ia32_struct), datap); case PTRACE_SETFPREGS: /* Set the child FPU state. */ return copy_regset_from_user( child, &user_x86_32_view, REGSET_FP, 0, sizeof(struct user_i387_ia32_struct), datap); case PTRACE_GETFPXREGS: /* Get the child extended FPU state. */ return copy_regset_to_user(child, &user_x86_32_view, REGSET32_XFP, 0, sizeof(struct user32_fxsr_struct), datap); case PTRACE_SETFPXREGS: /* Set the child extended FPU state. */ return copy_regset_from_user(child, &user_x86_32_view, REGSET32_XFP, 0, sizeof(struct user32_fxsr_struct), datap); case PTRACE_GET_THREAD_AREA: case PTRACE_SET_THREAD_AREA: return arch_ptrace(child, request, addr, data); default: return compat_ptrace_request(child, request, addr, data); } return ret; } #endif /* CONFIG_IA32_EMULATION */ #ifdef CONFIG_X86_X32_ABI static long x32_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t caddr, compat_ulong_t cdata) { unsigned long addr = caddr; unsigned long data = cdata; void __user *datap = compat_ptr(data); int ret; switch (request) { /* Read 32bits at location addr in the USER area. Only allow to return the lower 32bits of segment and debug registers. */ case PTRACE_PEEKUSR: { u32 tmp; ret = -EIO; if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user) || addr < offsetof(struct user_regs_struct, cs)) break; tmp = 0; /* Default return condition */ if (addr < sizeof(struct user_regs_struct)) tmp = getreg(child, addr); else if (addr >= offsetof(struct user, u_debugreg[0]) && addr <= offsetof(struct user, u_debugreg[7])) { addr -= offsetof(struct user, u_debugreg[0]); tmp = ptrace_get_debugreg(child, addr / sizeof(data)); } ret = put_user(tmp, (__u32 __user *)datap); break; } /* Write the word at location addr in the USER area. Only allow to update segment and debug registers with the upper 32bits zero-extended. */ case PTRACE_POKEUSR: ret = -EIO; if ((addr & (sizeof(data) - 1)) || addr >= sizeof(struct user) || addr < offsetof(struct user_regs_struct, cs)) break; if (addr < sizeof(struct user_regs_struct)) ret = putreg(child, addr, data); else if (addr >= offsetof(struct user, u_debugreg[0]) && addr <= offsetof(struct user, u_debugreg[7])) { addr -= offsetof(struct user, u_debugreg[0]); ret = ptrace_set_debugreg(child, addr / sizeof(data), data); } break; case PTRACE_GETREGS: /* Get all gp regs from the child. */ return copy_regset_to_user(child, &user_x86_64_view, REGSET_GENERAL, 0, sizeof(struct user_regs_struct), datap); case PTRACE_SETREGS: /* Set all gp regs in the child. */ return copy_regset_from_user(child, &user_x86_64_view, REGSET_GENERAL, 0, sizeof(struct user_regs_struct), datap); case PTRACE_GETFPREGS: /* Get the child FPU state. */ return copy_regset_to_user(child, &user_x86_64_view, REGSET_FP, 0, sizeof(struct user_i387_struct), datap); case PTRACE_SETFPREGS: /* Set the child FPU state. */ return copy_regset_from_user(child, &user_x86_64_view, REGSET_FP, 0, sizeof(struct user_i387_struct), datap); default: return compat_ptrace_request(child, request, addr, data); } return ret; } #endif #ifdef CONFIG_COMPAT long compat_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t caddr, compat_ulong_t cdata) { #ifdef CONFIG_X86_X32_ABI if (!in_ia32_syscall()) return x32_arch_ptrace(child, request, caddr, cdata); #endif #ifdef CONFIG_IA32_EMULATION return ia32_arch_ptrace(child, request, caddr, cdata); #else return 0; #endif } #endif /* CONFIG_COMPAT */ #ifdef CONFIG_X86_64 static struct user_regset x86_64_regsets[] __ro_after_init = { [REGSET64_GENERAL] = { USER_REGSET_NOTE_TYPE(PRSTATUS), .n = sizeof(struct user_regs_struct) / sizeof(long), .size = sizeof(long), .align = sizeof(long), .regset_get = genregs_get, .set = genregs_set }, [REGSET64_FP] = { USER_REGSET_NOTE_TYPE(PRFPREG), .n = sizeof(struct fxregs_state) / sizeof(long), .size = sizeof(long), .align = sizeof(long), .active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set }, [REGSET64_XSTATE] = { USER_REGSET_NOTE_TYPE(X86_XSTATE), .size = sizeof(u64), .align = sizeof(u64), .active = xstateregs_active, .regset_get = xstateregs_get, .set = xstateregs_set }, [REGSET64_IOPERM] = { USER_REGSET_NOTE_TYPE(386_IOPERM), .n = IO_BITMAP_LONGS, .size = sizeof(long), .align = sizeof(long), .active = ioperm_active, .regset_get = ioperm_get }, #ifdef CONFIG_X86_USER_SHADOW_STACK [REGSET64_SSP] = { USER_REGSET_NOTE_TYPE(X86_SHSTK), .n = 1, .size = sizeof(u64), .align = sizeof(u64), .active = ssp_active, .regset_get = ssp_get, .set = ssp_set }, #endif }; static const struct user_regset_view user_x86_64_view = { .name = "x86_64", .e_machine = EM_X86_64, .regsets = x86_64_regsets, .n = ARRAY_SIZE(x86_64_regsets) }; #else /* CONFIG_X86_32 */ #define user_regs_struct32 user_regs_struct #define genregs32_get genregs_get #define genregs32_set genregs_set #endif /* CONFIG_X86_64 */ #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION static struct user_regset x86_32_regsets[] __ro_after_init = { [REGSET32_GENERAL] = { USER_REGSET_NOTE_TYPE(PRSTATUS), .n = sizeof(struct user_regs_struct32) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), .regset_get = genregs32_get, .set = genregs32_set }, [REGSET32_FP] = { USER_REGSET_NOTE_TYPE(PRFPREG), .n = sizeof(struct user_i387_ia32_struct) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), .active = regset_fpregs_active, .regset_get = fpregs_get, .set = fpregs_set }, [REGSET32_XFP] = { USER_REGSET_NOTE_TYPE(PRXFPREG), .n = sizeof(struct fxregs_state) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), .active = regset_xregset_fpregs_active, .regset_get = xfpregs_get, .set = xfpregs_set }, [REGSET32_XSTATE] = { USER_REGSET_NOTE_TYPE(X86_XSTATE), .size = sizeof(u64), .align = sizeof(u64), .active = xstateregs_active, .regset_get = xstateregs_get, .set = xstateregs_set }, [REGSET32_TLS] = { USER_REGSET_NOTE_TYPE(386_TLS), .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN, .size = sizeof(struct user_desc), .align = sizeof(struct user_desc), .active = regset_tls_active, .regset_get = regset_tls_get, .set = regset_tls_set }, [REGSET32_IOPERM] = { USER_REGSET_NOTE_TYPE(386_IOPERM), .n = IO_BITMAP_BYTES / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), .active = ioperm_active, .regset_get = ioperm_get }, }; static const struct user_regset_view user_x86_32_view = { .name = "i386", .e_machine = EM_386, .regsets = x86_32_regsets, .n = ARRAY_SIZE(x86_32_regsets) }; #endif /* * This represents bytes 464..511 in the memory layout exported through * the REGSET_XSTATE interface. */ u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask) { #ifdef CONFIG_X86_64 x86_64_regsets[REGSET64_XSTATE].n = size / sizeof(u64); #endif #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION x86_32_regsets[REGSET32_XSTATE].n = size / sizeof(u64); #endif xstate_fx_sw_bytes[USER_XSTATE_XCR0_WORD] = xstate_mask; } /* * This is used by the core dump code to decide which regset to dump. The * core dump code writes out the resulting .e_machine and the corresponding * regsets. This is suboptimal if the task is messing around with its CS.L * field, but at worst the core dump will end up missing some information. * * Unfortunately, it is also used by the broken PTRACE_GETREGSET and * PTRACE_SETREGSET APIs. These APIs look at the .regsets field but have * no way to make sure that the e_machine they use matches the caller's * expectations. The result is that the data format returned by * PTRACE_GETREGSET depends on the returned CS field (and even the offset * of the returned CS field depends on its value!) and the data format * accepted by PTRACE_SETREGSET is determined by the old CS value. The * upshot is that it is basically impossible to use these APIs correctly. * * The best way to fix it in the long run would probably be to add new * improved ptrace() APIs to read and write registers reliably, possibly by * allowing userspace to select the ELF e_machine variant that they expect. */ const struct user_regset_view *task_user_regset_view(struct task_struct *task) { #ifdef CONFIG_IA32_EMULATION if (!user_64bit_mode(task_pt_regs(task))) #endif #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION return &user_x86_32_view; #endif #ifdef CONFIG_X86_64 return &user_x86_64_view; #endif } void send_sigtrap(struct pt_regs *regs, int error_code, int si_code) { struct task_struct *tsk = current; tsk->thread.trap_nr = X86_TRAP_DB; tsk->thread.error_code = error_code; /* Send us the fake SIGTRAP */ force_sig_fault(SIGTRAP, si_code, user_mode(regs) ? (void __user *)regs->ip : NULL); } void user_single_step_report(struct pt_regs *regs) { send_sigtrap(regs, 0, TRAP_BRKPT); }
1 1 92 4 141 3 152 10 25 9 13 43 22 21 3 189 155 12 14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/mount.h> #include <linux/seq_file.h> #include <linux/poll.h> #include <linux/ns_common.h> #include <linux/fs_pin.h> extern struct file_system_type nullfs_fs_type; extern struct list_head notify_list; struct mnt_namespace { struct ns_common ns; struct mount * root; struct { struct rb_root mounts; /* Protected by namespace_sem */ struct rb_node *mnt_last_node; /* last (rightmost) mount in the rbtree */ struct rb_node *mnt_first_node; /* first (leftmost) mount in the rbtree */ }; struct user_namespace *user_ns; struct ucounts *ucounts; wait_queue_head_t poll; u64 seq_origin; /* Sequence number of origin mount namespace */ u64 event; #ifdef CONFIG_FSNOTIFY __u32 n_fsnotify_mask; struct fsnotify_mark_connector __rcu *n_fsnotify_marks; #endif unsigned int nr_mounts; /* # of mounts in the namespace */ unsigned int pending_mounts; refcount_t passive; /* number references not pinning @mounts */ bool is_anon; } __randomize_layout; struct mnt_pcp { int mnt_count; int mnt_writers; }; struct mountpoint { struct hlist_node m_hash; struct dentry *m_dentry; struct hlist_head m_list; }; struct mount { struct hlist_node mnt_hash; struct mount *mnt_parent; struct dentry *mnt_mountpoint; struct vfsmount mnt; union { struct rb_node mnt_node; /* node in the ns->mounts rbtree */ struct rcu_head mnt_rcu; struct llist_node mnt_llist; }; #ifdef CONFIG_SMP struct mnt_pcp __percpu *mnt_pcp; #else int mnt_count; int mnt_writers; #endif struct list_head mnt_mounts; /* list of children, anchored here */ struct list_head mnt_child; /* and going through their mnt_child */ struct mount *mnt_next_for_sb; /* the next two fields are hlist_node, */ struct mount * __aligned(1) *mnt_pprev_for_sb; /* except that LSB of pprev is stolen */ #define WRITE_HOLD 1 /* ... for use by mnt_hold_writers() */ const char *mnt_devname; /* Name of device e.g. /dev/dsk/hda1 */ struct list_head mnt_list; struct list_head mnt_expire; /* link in fs-specific expiry list */ struct list_head mnt_share; /* circular list of shared mounts */ struct hlist_head mnt_slave_list;/* list of slave mounts */ struct hlist_node mnt_slave; /* slave list entry */ struct mount *mnt_master; /* slave is on master->mnt_slave_list */ struct mnt_namespace *mnt_ns; /* containing namespace */ struct mountpoint *mnt_mp; /* where is it mounted */ union { struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */ struct hlist_node mnt_umount; }; #ifdef CONFIG_FSNOTIFY struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks; __u32 mnt_fsnotify_mask; struct list_head to_notify; /* need to queue notification */ struct mnt_namespace *prev_ns; /* previous namespace (NULL if none) */ #endif int mnt_t_flags; /* namespace_sem-protected flags */ int mnt_id; /* mount identifier, reused */ u64 mnt_id_unique; /* mount ID unique until reboot */ int mnt_group_id; /* peer group identifier */ int mnt_expiry_mark; /* true if marked for expiry */ struct hlist_head mnt_pins; struct hlist_head mnt_stuck_children; struct mount *overmount; /* mounted on ->mnt_root */ } __randomize_layout; enum { T_SHARED = 1, /* mount is shared */ T_UNBINDABLE = 2, /* mount is unbindable */ T_MARKED = 4, /* internal mark for propagate_... */ T_UMOUNT_CANDIDATE = 8, /* for propagate_umount */ /* * T_SHARED_MASK is the set of flags that should be cleared when a * mount becomes shared. Currently, this is only the flag that says a * mount cannot be bind mounted, since this is how we create a mount * that shares events with another mount. If you add a new T_* * flag, consider how it interacts with shared mounts. */ T_SHARED_MASK = T_UNBINDABLE, }; #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */ static inline struct mount *real_mount(struct vfsmount *mnt) { return container_of(mnt, struct mount, mnt); } static inline int mnt_has_parent(const struct mount *mnt) { return mnt != mnt->mnt_parent; } static inline int is_mounted(struct vfsmount *mnt) { /* neither detached nor internal? */ return !IS_ERR_OR_NULL(real_mount(mnt)->mnt_ns); } extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *); extern int __legitimize_mnt(struct vfsmount *, unsigned); static inline bool __path_is_mountpoint(const struct path *path) { struct mount *m = __lookup_mnt(path->mnt, path->dentry); return m && likely(!(m->mnt.mnt_flags & MNT_SYNC_UMOUNT)); } extern void __detach_mounts(struct dentry *dentry); static inline void detach_mounts(struct dentry *dentry) { if (!d_mountpoint(dentry)) return; __detach_mounts(dentry); } static inline void get_mnt_ns(struct mnt_namespace *ns) { ns_ref_inc(ns); } extern seqlock_t mount_lock; DEFINE_LOCK_GUARD_0(mount_writer, write_seqlock(&mount_lock), write_sequnlock(&mount_lock)) DEFINE_LOCK_GUARD_0(mount_locked_reader, read_seqlock_excl(&mount_lock), read_sequnlock_excl(&mount_lock)) struct proc_mounts { struct mnt_namespace *ns; struct path root; int (*show)(struct seq_file *, struct vfsmount *); }; extern const struct seq_operations mounts_op; extern bool __is_local_mountpoint(const struct dentry *dentry); static inline bool is_local_mountpoint(const struct dentry *dentry) { if (!d_mountpoint(dentry)) return false; return __is_local_mountpoint(dentry); } static inline bool is_anon_ns(struct mnt_namespace *ns) { return ns->is_anon; } static inline bool anon_ns_root(const struct mount *m) { struct mnt_namespace *ns = READ_ONCE(m->mnt_ns); return !IS_ERR_OR_NULL(ns) && is_anon_ns(ns) && m == ns->root; } static inline bool mnt_ns_attached(const struct mount *mnt) { return !RB_EMPTY_NODE(&mnt->mnt_node); } static inline bool mnt_ns_empty(const struct mnt_namespace *ns) { return RB_EMPTY_ROOT(&ns->mounts); } static inline void move_from_ns(struct mount *mnt) { struct mnt_namespace *ns = mnt->mnt_ns; WARN_ON(!mnt_ns_attached(mnt)); if (ns->mnt_last_node == &mnt->mnt_node) ns->mnt_last_node = rb_prev(&mnt->mnt_node); if (ns->mnt_first_node == &mnt->mnt_node) ns->mnt_first_node = rb_next(&mnt->mnt_node); rb_erase(&mnt->mnt_node, &ns->mounts); RB_CLEAR_NODE(&mnt->mnt_node); } bool has_locked_children(struct mount *mnt, struct dentry *dentry); struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mnt_ns, bool previous); static inline struct mnt_namespace *to_mnt_ns(struct ns_common *ns) { return container_of(ns, struct mnt_namespace, ns); } #ifdef CONFIG_FSNOTIFY static inline void mnt_notify_add(struct mount *m) { /* Optimize the case where there are no watches */ if ((m->mnt_ns && m->mnt_ns->n_fsnotify_marks) || (m->prev_ns && m->prev_ns->n_fsnotify_marks)) list_add_tail(&m->to_notify, &notify_list); else m->prev_ns = m->mnt_ns; } #else static inline void mnt_notify_add(struct mount *m) { } #endif static inline struct mount *topmost_overmount(struct mount *m) { while (m->overmount) m = m->overmount; return m; } static inline bool __test_write_hold(struct mount * __aligned(1) *val) { return (unsigned long)val & WRITE_HOLD; } static inline bool test_write_hold(const struct mount *m) { return __test_write_hold(m->mnt_pprev_for_sb); } static inline void set_write_hold(struct mount *m) { m->mnt_pprev_for_sb = (void *)((unsigned long)m->mnt_pprev_for_sb | WRITE_HOLD); } static inline void clear_write_hold(struct mount *m) { m->mnt_pprev_for_sb = (void *)((unsigned long)m->mnt_pprev_for_sb & ~WRITE_HOLD); } struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry);
16 6 1 30 16 111 15 58 14 18 18 28 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. * */ #ifndef __IO_PAGETABLE_H #define __IO_PAGETABLE_H #include <linux/dma-buf.h> #include <linux/interval_tree.h> #include <linux/kref.h> #include <linux/mutex.h> #include <linux/xarray.h> #include "iommufd_private.h" struct iommu_domain; /* * Each io_pagetable is composed of intervals of areas which cover regions of * the iova that are backed by something. iova not covered by areas is not * populated in the page table. Each area is fully populated with pages. * * iovas are in byte units, but must be iopt->iova_alignment aligned. * * pages can be NULL, this means some other thread is still working on setting * up or tearing down the area. When observed under the write side of the * domain_rwsem a NULL pages must mean the area is still being setup and no * domains are filled. * * storage_domain points at an arbitrary iommu_domain that is holding the PFNs * for this area. It is locked by the pages->mutex. This simplifies the locking * as the pages code can rely on the storage_domain without having to get the * iopt->domains_rwsem. * * The io_pagetable::iova_rwsem protects node * The iopt_pages::mutex protects pages_node * iopt and iommu_prot are immutable * The pages::mutex protects num_accesses */ struct iopt_area { struct interval_tree_node node; struct interval_tree_node pages_node; struct io_pagetable *iopt; struct iopt_pages *pages; struct iommu_domain *storage_domain; /* How many bytes into the first page the area starts */ unsigned int page_offset; /* IOMMU_READ, IOMMU_WRITE, etc */ int iommu_prot; bool prevent_access : 1; unsigned int num_accesses; unsigned int num_locks; }; struct iopt_allowed { struct interval_tree_node node; }; struct iopt_reserved { struct interval_tree_node node; void *owner; }; int iopt_area_fill_domains(struct iopt_area *area, struct iopt_pages *pages); void iopt_area_unfill_domains(struct iopt_area *area, struct iopt_pages *pages); int iopt_area_fill_domain(struct iopt_area *area, struct iommu_domain *domain); void iopt_area_unfill_domain(struct iopt_area *area, struct iopt_pages *pages, struct iommu_domain *domain); void iopt_area_unmap_domain(struct iopt_area *area, struct iommu_domain *domain); int iopt_dmabuf_track_domain(struct iopt_pages *pages, struct iopt_area *area, struct iommu_domain *domain); void iopt_dmabuf_untrack_domain(struct iopt_pages *pages, struct iopt_area *area, struct iommu_domain *domain); int iopt_dmabuf_track_all_domains(struct iopt_area *area, struct iopt_pages *pages); void iopt_dmabuf_untrack_all_domains(struct iopt_area *area, struct iopt_pages *pages); static inline unsigned long iopt_area_index(struct iopt_area *area) { return area->pages_node.start; } static inline unsigned long iopt_area_last_index(struct iopt_area *area) { return area->pages_node.last; } static inline unsigned long iopt_area_iova(struct iopt_area *area) { return area->node.start; } static inline unsigned long iopt_area_last_iova(struct iopt_area *area) { return area->node.last; } static inline size_t iopt_area_length(struct iopt_area *area) { return (area->node.last - area->node.start) + 1; } /* * Number of bytes from the start of the iopt_pages that the iova begins. * iopt_area_start_byte() / PAGE_SIZE encodes the starting page index * iopt_area_start_byte() % PAGE_SIZE encodes the offset within that page */ static inline unsigned long iopt_area_start_byte(struct iopt_area *area, unsigned long iova) { if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) WARN_ON(iova < iopt_area_iova(area) || iova > iopt_area_last_iova(area)); return (iova - iopt_area_iova(area)) + area->page_offset + iopt_area_index(area) * PAGE_SIZE; } static inline unsigned long iopt_area_iova_to_index(struct iopt_area *area, unsigned long iova) { return iopt_area_start_byte(area, iova) / PAGE_SIZE; } #define __make_iopt_iter(name) \ static inline struct iopt_##name *iopt_##name##_iter_first( \ struct io_pagetable *iopt, unsigned long start, \ unsigned long last) \ { \ struct interval_tree_node *node; \ \ lockdep_assert_held(&iopt->iova_rwsem); \ node = interval_tree_iter_first(&iopt->name##_itree, start, \ last); \ if (!node) \ return NULL; \ return container_of(node, struct iopt_##name, node); \ } \ static inline struct iopt_##name *iopt_##name##_iter_next( \ struct iopt_##name *last_node, unsigned long start, \ unsigned long last) \ { \ struct interval_tree_node *node; \ \ node = interval_tree_iter_next(&last_node->node, start, last); \ if (!node) \ return NULL; \ return container_of(node, struct iopt_##name, node); \ } __make_iopt_iter(area) __make_iopt_iter(allowed) __make_iopt_iter(reserved) struct iopt_area_contig_iter { unsigned long cur_iova; unsigned long last_iova; struct iopt_area *area; }; struct iopt_area *iopt_area_contig_init(struct iopt_area_contig_iter *iter, struct io_pagetable *iopt, unsigned long iova, unsigned long last_iova); struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter); static inline bool iopt_area_contig_done(struct iopt_area_contig_iter *iter) { return iter->area && iter->last_iova <= iopt_area_last_iova(iter->area); } /* * Iterate over a contiguous list of areas that span the iova,last_iova range. * The caller must check iopt_area_contig_done() after the loop to see if * contiguous areas existed. */ #define iopt_for_each_contig_area(iter, area, iopt, iova, last_iova) \ for (area = iopt_area_contig_init(iter, iopt, iova, last_iova); area; \ area = iopt_area_contig_next(iter)) enum { IOPT_PAGES_ACCOUNT_NONE = 0, IOPT_PAGES_ACCOUNT_USER = 1, IOPT_PAGES_ACCOUNT_MM = 2, IOPT_PAGES_ACCOUNT_MODE_NUM = 3, }; enum iopt_address_type { IOPT_ADDRESS_USER = 0, IOPT_ADDRESS_FILE, IOPT_ADDRESS_DMABUF, }; struct iopt_pages_dmabuf_track { struct iommu_domain *domain; struct iopt_area *area; struct list_head elm; }; struct iopt_pages_dmabuf { struct dma_buf_attachment *attach; struct phys_vec phys; /* Always PAGE_SIZE aligned */ unsigned long start; struct list_head tracker; }; /* * This holds a pinned page list for multiple areas of IO address space. The * pages always originate from a linear chunk of userspace VA. Multiple * io_pagetable's, through their iopt_area's, can share a single iopt_pages * which avoids multi-pinning and double accounting of page consumption. * * indexes in this structure are measured in PAGE_SIZE units, are 0 based from * the start of the uptr and extend to npages. pages are pinned dynamically * according to the intervals in the access_itree and domains_itree, npinned * records the current number of pages pinned. */ struct iopt_pages { struct kref kref; struct mutex mutex; size_t npages; size_t npinned; size_t last_npinned; struct task_struct *source_task; struct mm_struct *source_mm; struct user_struct *source_user; enum iopt_address_type type; union { void __user *uptr; /* IOPT_ADDRESS_USER */ struct { /* IOPT_ADDRESS_FILE */ struct file *file; unsigned long start; }; /* IOPT_ADDRESS_DMABUF */ struct iopt_pages_dmabuf dmabuf; }; bool writable:1; u8 account_mode; struct xarray pinned_pfns; /* Of iopt_pages_access::node */ struct rb_root_cached access_itree; /* Of iopt_area::pages_node */ struct rb_root_cached domains_itree; }; static inline bool iopt_is_dmabuf(struct iopt_pages *pages) { if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER)) return false; return pages->type == IOPT_ADDRESS_DMABUF; } static inline bool iopt_dmabuf_revoked(struct iopt_pages *pages) { lockdep_assert_held(&pages->mutex); if (iopt_is_dmabuf(pages)) return pages->dmabuf.phys.len == 0; return false; } struct iopt_pages *iopt_alloc_user_pages(void __user *uptr, unsigned long length, bool writable); struct iopt_pages *iopt_alloc_file_pages(struct file *file, unsigned long start_byte, unsigned long start, unsigned long length, bool writable); struct iopt_pages *iopt_alloc_dmabuf_pages(struct iommufd_ctx *ictx, struct dma_buf *dmabuf, unsigned long start_byte, unsigned long start, unsigned long length, bool writable); void iopt_release_pages(struct kref *kref); static inline void iopt_put_pages(struct iopt_pages *pages) { kref_put(&pages->kref, iopt_release_pages); } void iopt_pages_fill_from_xarray(struct iopt_pages *pages, unsigned long start, unsigned long last, struct page **out_pages); int iopt_pages_fill_xarray(struct iopt_pages *pages, unsigned long start, unsigned long last, struct page **out_pages); void iopt_pages_unfill_xarray(struct iopt_pages *pages, unsigned long start, unsigned long last); int iopt_area_add_access(struct iopt_area *area, unsigned long start, unsigned long last, struct page **out_pages, unsigned int flags, bool lock_area); void iopt_area_remove_access(struct iopt_area *area, unsigned long start, unsigned long last, bool unlock_area); int iopt_pages_rw_access(struct iopt_pages *pages, unsigned long start_byte, void *data, unsigned long length, unsigned int flags); /* * Each interval represents an active iopt_access_pages(), it acts as an * interval lock that keeps the PFNs pinned and stored in the xarray. */ struct iopt_pages_access { struct interval_tree_node node; unsigned int users; }; struct pfn_reader_user; int iopt_pages_update_pinned(struct iopt_pages *pages, unsigned long npages, bool inc, struct pfn_reader_user *user); #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* SCTP kernel implementation * (C) Copyright IBM Corp. 2001, 2004 * Copyright (c) 1999-2000 Cisco, Inc. * Copyright (c) 1999-2001 Motorola, Inc. * Copyright (c) 2001 Intel Corp. * * This file is part of the SCTP kernel implementation * * These are the definitions needed for the tsnmap type. The tsnmap is used * to track out of order TSNs received. * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * Jon Grimm <jgrimm@us.ibm.com> * La Monte H.P. Yarroll <piggy@acm.org> * Karl Knutson <karl@athena.chicago.il.us> * Sridhar Samudrala <sri@us.ibm.com> */ #include <net/sctp/constants.h> #ifndef __sctp_tsnmap_h__ #define __sctp_tsnmap_h__ /* RFC 2960 12.2 Parameters necessary per association (i.e. the TCB) * Mapping An array of bits or bytes indicating which out of * Array order TSN's have been received (relative to the * Last Rcvd TSN). If no gaps exist, i.e. no out of * order packets have been received, this array * will be set to all zero. This structure may be * in the form of a circular buffer or bit array. */ struct sctp_tsnmap { /* This array counts the number of chunks with each TSN. * It points at one of the two buffers with which we will * ping-pong between. */ unsigned long *tsn_map; /* This is the TSN at tsn_map[0]. */ __u32 base_tsn; /* Last Rcvd : This is the last TSN received in * TSN : sequence. This value is set initially by * : taking the peer's Initial TSN, received in * : the INIT or INIT ACK chunk, and subtracting * : one from it. * * Throughout most of the specification this is called the * "Cumulative TSN ACK Point". In this case, we * ignore the advice in 12.2 in favour of the term * used in the bulk of the text. */ __u32 cumulative_tsn_ack_point; /* This is the highest TSN we've marked. */ __u32 max_tsn_seen; /* This is the minimum number of TSNs we can track. This corresponds * to the size of tsn_map. Note: the overflow_map allows us to * potentially track more than this quantity. */ __u16 len; /* Data chunks pending receipt. used by SCTP_STATUS sockopt */ __u16 pending_data; /* Record duplicate TSNs here. We clear this after * every SACK. Store up to SCTP_MAX_DUP_TSNS worth of * information. */ __u16 num_dup_tsns; __be32 dup_tsns[SCTP_MAX_DUP_TSNS]; }; struct sctp_tsnmap_iter { __u32 start; }; /* Initialize a block of memory as a tsnmap. */ struct sctp_tsnmap *sctp_tsnmap_init(struct sctp_tsnmap *, __u16 len, __u32 initial_tsn, gfp_t gfp); void sctp_tsnmap_free(struct sctp_tsnmap *map); /* Test the tracking state of this TSN. * Returns: * 0 if the TSN has not yet been seen * >0 if the TSN has been seen (duplicate) * <0 if the TSN is invalid (too large to track) */ int sctp_tsnmap_check(const struct sctp_tsnmap *, __u32 tsn); /* Mark this TSN as seen. */ int sctp_tsnmap_mark(struct sctp_tsnmap *, __u32 tsn, struct sctp_transport *trans); /* Mark this TSN and all lower as seen. */ void sctp_tsnmap_skip(struct sctp_tsnmap *map, __u32 tsn); /* Retrieve the Cumulative TSN ACK Point. */ static inline __u32 sctp_tsnmap_get_ctsn(const struct sctp_tsnmap *map) { return map->cumulative_tsn_ack_point; } /* Retrieve the highest TSN we've seen. */ static inline __u32 sctp_tsnmap_get_max_tsn_seen(const struct sctp_tsnmap *map) { return map->max_tsn_seen; } /* How many duplicate TSNs are stored? */ static inline __u16 sctp_tsnmap_num_dups(struct sctp_tsnmap *map) { return map->num_dup_tsns; } /* Return pointer to duplicate tsn array as needed by SACK. */ static inline __be32 *sctp_tsnmap_get_dups(struct sctp_tsnmap *map) { map->num_dup_tsns = 0; return map->dup_tsns; } /* How many gap ack blocks do we have recorded? */ __u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map, struct sctp_gap_ack_block *gabs); /* Refresh the count on pending data. */ __u16 sctp_tsnmap_pending(struct sctp_tsnmap *map); /* Is there a gap in the TSN map? */ static inline int sctp_tsnmap_has_gap(const struct sctp_tsnmap *map) { return map->cumulative_tsn_ack_point != map->max_tsn_seen; } /* Mark a duplicate TSN. Note: limit the storage of duplicate TSN * information. */ static inline void sctp_tsnmap_mark_dup(struct sctp_tsnmap *map, __u32 tsn) { if (map->num_dup_tsns < SCTP_MAX_DUP_TSNS) map->dup_tsns[map->num_dup_tsns++] = htonl(tsn); } /* Renege a TSN that was seen. */ void sctp_tsnmap_renege(struct sctp_tsnmap *, __u32 tsn); /* Is there a gap in the TSN map? */ int sctp_tsnmap_has_gap(const struct sctp_tsnmap *); #endif /* __sctp_tsnmap_h__ */
1 1 2 2 5 5 11 11 11 11 1 1 8 8 8 8 8 8 6 2 6 8 4 4 1 4 1 1 5 1 4 4 4 3 2 1 4 4 10 10 8 2 3 3 1 2 3 3 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 // SPDX-License-Identifier: GPL-2.0-only /* * Fence mechanism for dma-buf and to allow for asynchronous dma access * * Copyright (C) 2012 Canonical Ltd * Copyright (C) 2012 Texas Instruments * * Authors: * Rob Clark <robdclark@gmail.com> * Maarten Lankhorst <maarten.lankhorst@canonical.com> */ #include <linux/slab.h> #include <linux/export.h> #include <linux/atomic.h> #include <linux/dma-fence.h> #include <linux/sched/signal.h> #include <linux/seq_file.h> #define CREATE_TRACE_POINTS #include <trace/events/dma_fence.h> EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit); EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal); EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled); static DEFINE_SPINLOCK(dma_fence_stub_lock); static struct dma_fence dma_fence_stub; /* * fence context counter: each execution context should have its own * fence context, this allows checking if fences belong to the same * context or not. One device can have multiple separate contexts, * and they're used if some engine can run independently of another. */ static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(1); /** * DOC: DMA fences overview * * DMA fences, represented by &struct dma_fence, are the kernel internal * synchronization primitive for DMA operations like GPU rendering, video * encoding/decoding, or displaying buffers on a screen. * * A fence is initialized using dma_fence_init() and completed using * dma_fence_signal(). Fences are associated with a context, allocated through * dma_fence_context_alloc(), and all fences on the same context are * fully ordered. * * Since the purposes of fences is to facilitate cross-device and * cross-application synchronization, there's multiple ways to use one: * * - Individual fences can be exposed as a &sync_file, accessed as a file * descriptor from userspace, created by calling sync_file_create(). This is * called explicit fencing, since userspace passes around explicit * synchronization points. * * - Some subsystems also have their own explicit fencing primitives, like * &drm_syncobj. Compared to &sync_file, a &drm_syncobj allows the underlying * fence to be updated. * * - Then there's also implicit fencing, where the synchronization points are * implicitly passed around as part of shared &dma_buf instances. Such * implicit fences are stored in &struct dma_resv through the * &dma_buf.resv pointer. */ /** * DOC: fence cross-driver contract * * Since &dma_fence provide a cross driver contract, all drivers must follow the * same rules: * * * Fences must complete in a reasonable time. Fences which represent kernels * and shaders submitted by userspace, which could run forever, must be backed * up by timeout and gpu hang recovery code. Minimally that code must prevent * further command submission and force complete all in-flight fences, e.g. * when the driver or hardware do not support gpu reset, or if the gpu reset * failed for some reason. Ideally the driver supports gpu recovery which only * affects the offending userspace context, and no other userspace * submissions. * * * Drivers may have different ideas of what completion within a reasonable * time means. Some hang recovery code uses a fixed timeout, others a mix * between observing forward progress and increasingly strict timeouts. * Drivers should not try to second guess timeout handling of fences from * other drivers. * * * To ensure there's no deadlocks of dma_fence_wait() against other locks * drivers should annotate all code required to reach dma_fence_signal(), * which completes the fences, with dma_fence_begin_signalling() and * dma_fence_end_signalling(). * * * Drivers are allowed to call dma_fence_wait() while holding dma_resv_lock(). * This means any code required for fence completion cannot acquire a * &dma_resv lock. Note that this also pulls in the entire established * locking hierarchy around dma_resv_lock() and dma_resv_unlock(). * * * Drivers are allowed to call dma_fence_wait() from their &shrinker * callbacks. This means any code required for fence completion cannot * allocate memory with GFP_KERNEL. * * * Drivers are allowed to call dma_fence_wait() from their &mmu_notifier * respectively &mmu_interval_notifier callbacks. This means any code required * for fence completion cannot allocate memory with GFP_NOFS or GFP_NOIO. * Only GFP_ATOMIC is permissible, which might fail. * * Note that only GPU drivers have a reasonable excuse for both requiring * &mmu_interval_notifier and &shrinker callbacks at the same time as having to * track asynchronous compute work using &dma_fence. No driver outside of * drivers/gpu should ever call dma_fence_wait() in such contexts. */ static const char *dma_fence_stub_get_name(struct dma_fence *fence) { return "stub"; } static const struct dma_fence_ops dma_fence_stub_ops = { .get_driver_name = dma_fence_stub_get_name, .get_timeline_name = dma_fence_stub_get_name, }; static int __init dma_fence_init_stub(void) { dma_fence_init(&dma_fence_stub, &dma_fence_stub_ops, &dma_fence_stub_lock, 0, 0); set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &dma_fence_stub.flags); dma_fence_signal(&dma_fence_stub); return 0; } subsys_initcall(dma_fence_init_stub); /** * dma_fence_get_stub - return a signaled fence * * Return a stub fence which is already signaled. The fence's timestamp * corresponds to the initialisation time of the linux kernel. */ struct dma_fence *dma_fence_get_stub(void) { return dma_fence_get(&dma_fence_stub); } EXPORT_SYMBOL(dma_fence_get_stub); /** * dma_fence_allocate_private_stub - return a private, signaled fence * @timestamp: timestamp when the fence was signaled * * Return a newly allocated and signaled stub fence. */ struct dma_fence *dma_fence_allocate_private_stub(ktime_t timestamp) { struct dma_fence *fence; fence = kzalloc(sizeof(*fence), GFP_KERNEL); if (fence == NULL) return NULL; dma_fence_init(fence, &dma_fence_stub_ops, &dma_fence_stub_lock, 0, 0); set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags); dma_fence_signal_timestamp(fence, timestamp); return fence; } EXPORT_SYMBOL(dma_fence_allocate_private_stub); /** * dma_fence_context_alloc - allocate an array of fence contexts * @num: amount of contexts to allocate * * This function will return the first index of the number of fence contexts * allocated. The fence context is used for setting &dma_fence.context to a * unique number by passing the context to dma_fence_init(). */ u64 dma_fence_context_alloc(unsigned num) { WARN_ON(!num); return atomic64_fetch_add(num, &dma_fence_context_counter); } EXPORT_SYMBOL(dma_fence_context_alloc); /** * DOC: fence signalling annotation * * Proving correctness of all the kernel code around &dma_fence through code * review and testing is tricky for a few reasons: * * * It is a cross-driver contract, and therefore all drivers must follow the * same rules for lock nesting order, calling contexts for various functions * and anything else significant for in-kernel interfaces. But it is also * impossible to test all drivers in a single machine, hence brute-force N vs. * N testing of all combinations is impossible. Even just limiting to the * possible combinations is infeasible. * * * There is an enormous amount of driver code involved. For render drivers * there's the tail of command submission, after fences are published, * scheduler code, interrupt and workers to process job completion, * and timeout, gpu reset and gpu hang recovery code. Plus for integration * with core mm with have &mmu_notifier, respectively &mmu_interval_notifier, * and &shrinker. For modesetting drivers there's the commit tail functions * between when fences for an atomic modeset are published, and when the * corresponding vblank completes, including any interrupt processing and * related workers. Auditing all that code, across all drivers, is not * feasible. * * * Due to how many other subsystems are involved and the locking hierarchies * this pulls in there is extremely thin wiggle-room for driver-specific * differences. &dma_fence interacts with almost all of the core memory * handling through page fault handlers via &dma_resv, dma_resv_lock() and * dma_resv_unlock(). On the other side it also interacts through all * allocation sites through &mmu_notifier and &shrinker. * * Furthermore lockdep does not handle cross-release dependencies, which means * any deadlocks between dma_fence_wait() and dma_fence_signal() can't be caught * at runtime with some quick testing. The simplest example is one thread * waiting on a &dma_fence while holding a lock:: * * lock(A); * dma_fence_wait(B); * unlock(A); * * while the other thread is stuck trying to acquire the same lock, which * prevents it from signalling the fence the previous thread is stuck waiting * on:: * * lock(A); * unlock(A); * dma_fence_signal(B); * * By manually annotating all code relevant to signalling a &dma_fence we can * teach lockdep about these dependencies, which also helps with the validation * headache since now lockdep can check all the rules for us:: * * cookie = dma_fence_begin_signalling(); * lock(A); * unlock(A); * dma_fence_signal(B); * dma_fence_end_signalling(cookie); * * For using dma_fence_begin_signalling() and dma_fence_end_signalling() to * annotate critical sections the following rules need to be observed: * * * All code necessary to complete a &dma_fence must be annotated, from the * point where a fence is accessible to other threads, to the point where * dma_fence_signal() is called. Un-annotated code can contain deadlock issues, * and due to the very strict rules and many corner cases it is infeasible to * catch these just with review or normal stress testing. * * * &struct dma_resv deserves a special note, since the readers are only * protected by rcu. This means the signalling critical section starts as soon * as the new fences are installed, even before dma_resv_unlock() is called. * * * The only exception are fast paths and opportunistic signalling code, which * calls dma_fence_signal() purely as an optimization, but is not required to * guarantee completion of a &dma_fence. The usual example is a wait IOCTL * which calls dma_fence_signal(), while the mandatory completion path goes * through a hardware interrupt and possible job completion worker. * * * To aid composability of code, the annotations can be freely nested, as long * as the overall locking hierarchy is consistent. The annotations also work * both in interrupt and process context. Due to implementation details this * requires that callers pass an opaque cookie from * dma_fence_begin_signalling() to dma_fence_end_signalling(). * * * Validation against the cross driver contract is implemented by priming * lockdep with the relevant hierarchy at boot-up. This means even just * testing with a single device is enough to validate a driver, at least as * far as deadlocks with dma_fence_wait() against dma_fence_signal() are * concerned. */ #ifdef CONFIG_LOCKDEP static struct lockdep_map dma_fence_lockdep_map = { .name = "dma_fence_map" }; /** * dma_fence_begin_signalling - begin a critical DMA fence signalling section * * Drivers should use this to annotate the beginning of any code section * required to eventually complete &dma_fence by calling dma_fence_signal(). * * The end of these critical sections are annotated with * dma_fence_end_signalling(). * * Returns: * * Opaque cookie needed by the implementation, which needs to be passed to * dma_fence_end_signalling(). */ bool dma_fence_begin_signalling(void) { /* explicitly nesting ... */ if (lock_is_held_type(&dma_fence_lockdep_map, 1)) return true; /* rely on might_sleep check for soft/hardirq locks */ if (in_atomic()) return true; /* ... and non-recursive successful read_trylock */ lock_acquire(&dma_fence_lockdep_map, 0, 1, 1, 1, NULL, _RET_IP_); return false; } EXPORT_SYMBOL(dma_fence_begin_signalling); /** * dma_fence_end_signalling - end a critical DMA fence signalling section * @cookie: opaque cookie from dma_fence_begin_signalling() * * Closes a critical section annotation opened by dma_fence_begin_signalling(). */ void dma_fence_end_signalling(bool cookie) { if (cookie) return; lock_release(&dma_fence_lockdep_map, _RET_IP_); } EXPORT_SYMBOL(dma_fence_end_signalling); void __dma_fence_might_wait(void) { bool tmp; tmp = lock_is_held_type(&dma_fence_lockdep_map, 1); if (tmp) lock_release(&dma_fence_lockdep_map, _THIS_IP_); lock_map_acquire(&dma_fence_lockdep_map); lock_map_release(&dma_fence_lockdep_map); if (tmp) lock_acquire(&dma_fence_lockdep_map, 0, 1, 1, 1, NULL, _THIS_IP_); } #endif /** * dma_fence_signal_timestamp_locked - signal completion of a fence * @fence: the fence to signal * @timestamp: fence signal timestamp in kernel's CLOCK_MONOTONIC time domain * * Signal completion for software callbacks on a fence, this will unblock * dma_fence_wait() calls and run all the callbacks added with * dma_fence_add_callback(). Can be called multiple times, but since a fence * can only go from the unsignaled to the signaled state and not back, it will * only be effective the first time. Set the timestamp provided as the fence * signal timestamp. * * Unlike dma_fence_signal_timestamp(), this function must be called with * &dma_fence.lock held. */ void dma_fence_signal_timestamp_locked(struct dma_fence *fence, ktime_t timestamp) { struct dma_fence_cb *cur, *tmp; struct list_head cb_list; lockdep_assert_held(fence->lock); if (unlikely(test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))) return; /* Stash the cb_list before replacing it with the timestamp */ list_replace(&fence->cb_list, &cb_list); fence->timestamp = timestamp; set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags); trace_dma_fence_signaled(fence); list_for_each_entry_safe(cur, tmp, &cb_list, node) { INIT_LIST_HEAD(&cur->node); cur->func(fence, cur); } } EXPORT_SYMBOL(dma_fence_signal_timestamp_locked); /** * dma_fence_signal_timestamp - signal completion of a fence * @fence: the fence to signal * @timestamp: fence signal timestamp in kernel's CLOCK_MONOTONIC time domain * * Signal completion for software callbacks on a fence, this will unblock * dma_fence_wait() calls and run all the callbacks added with * dma_fence_add_callback(). Can be called multiple times, but since a fence * can only go from the unsignaled to the signaled state and not back, it will * only be effective the first time. Set the timestamp provided as the fence * signal timestamp. */ void dma_fence_signal_timestamp(struct dma_fence *fence, ktime_t timestamp) { unsigned long flags; if (WARN_ON(!fence)) return; spin_lock_irqsave(fence->lock, flags); dma_fence_signal_timestamp_locked(fence, timestamp); spin_unlock_irqrestore(fence->lock, flags); } EXPORT_SYMBOL(dma_fence_signal_timestamp); /** * dma_fence_signal_locked - signal completion of a fence * @fence: the fence to signal * * Signal completion for software callbacks on a fence, this will unblock * dma_fence_wait() calls and run all the callbacks added with * dma_fence_add_callback(). Can be called multiple times, but since a fence * can only go from the unsignaled to the signaled state and not back, it will * only be effective the first time. * * Unlike dma_fence_signal(), this function must be called with &dma_fence.lock * held. */ void dma_fence_signal_locked(struct dma_fence *fence) { dma_fence_signal_timestamp_locked(fence, ktime_get()); } EXPORT_SYMBOL(dma_fence_signal_locked); /** * dma_fence_check_and_signal_locked - signal the fence if it's not yet signaled * @fence: the fence to check and signal * * Checks whether a fence was signaled and signals it if it was not yet signaled. * * Unlike dma_fence_check_and_signal(), this function must be called with * &struct dma_fence.lock being held. * * Return: true if fence has been signaled already, false otherwise. */ bool dma_fence_check_and_signal_locked(struct dma_fence *fence) { bool ret; ret = dma_fence_test_signaled_flag(fence); dma_fence_signal_locked(fence); return ret; } EXPORT_SYMBOL(dma_fence_check_and_signal_locked); /** * dma_fence_check_and_signal - signal the fence if it's not yet signaled * @fence: the fence to check and signal * * Checks whether a fence was signaled and signals it if it was not yet signaled. * All this is done in a race-free manner. * * Return: true if fence has been signaled already, false otherwise. */ bool dma_fence_check_and_signal(struct dma_fence *fence) { unsigned long flags; bool ret; spin_lock_irqsave(fence->lock, flags); ret = dma_fence_check_and_signal_locked(fence); spin_unlock_irqrestore(fence->lock, flags); return ret; } EXPORT_SYMBOL(dma_fence_check_and_signal); /** * dma_fence_signal - signal completion of a fence * @fence: the fence to signal * * Signal completion for software callbacks on a fence, this will unblock * dma_fence_wait() calls and run all the callbacks added with * dma_fence_add_callback(). Can be called multiple times, but since a fence * can only go from the unsignaled to the signaled state and not back, it will * only be effective the first time. */ void dma_fence_signal(struct dma_fence *fence) { unsigned long flags; bool tmp; if (WARN_ON(!fence)) return; tmp = dma_fence_begin_signalling(); spin_lock_irqsave(fence->lock, flags); dma_fence_signal_timestamp_locked(fence, ktime_get()); spin_unlock_irqrestore(fence->lock, flags); dma_fence_end_signalling(tmp); } EXPORT_SYMBOL(dma_fence_signal); /** * dma_fence_wait_timeout - sleep until the fence gets signaled * or until timeout elapses * @fence: the fence to wait on * @intr: if true, do an interruptible wait * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the * remaining timeout in jiffies on success. Other error values may be * returned on custom implementations. * * Performs a synchronous wait on this fence. It is assumed the caller * directly or indirectly (buf-mgr between reservation and committing) * holds a reference to the fence, otherwise the fence might be * freed before return, resulting in undefined behavior. * * See also dma_fence_wait() and dma_fence_wait_any_timeout(). */ signed long dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout) { signed long ret; if (WARN_ON(timeout < 0)) return -EINVAL; might_sleep(); __dma_fence_might_wait(); dma_fence_enable_sw_signaling(fence); if (trace_dma_fence_wait_start_enabled()) { rcu_read_lock(); trace_dma_fence_wait_start(fence); rcu_read_unlock(); } if (fence->ops->wait) ret = fence->ops->wait(fence, intr, timeout); else ret = dma_fence_default_wait(fence, intr, timeout); if (trace_dma_fence_wait_end_enabled()) { rcu_read_lock(); trace_dma_fence_wait_end(fence); rcu_read_unlock(); } return ret; } EXPORT_SYMBOL(dma_fence_wait_timeout); /** * dma_fence_release - default release function for fences * @kref: &dma_fence.recfount * * This is the default release functions for &dma_fence. Drivers shouldn't call * this directly, but instead call dma_fence_put(). */ void dma_fence_release(struct kref *kref) { struct dma_fence *fence = container_of(kref, struct dma_fence, refcount); rcu_read_lock(); trace_dma_fence_destroy(fence); if (!list_empty(&fence->cb_list) && !dma_fence_test_signaled_flag(fence)) { const char __rcu *timeline; const char __rcu *driver; unsigned long flags; driver = dma_fence_driver_name(fence); timeline = dma_fence_timeline_name(fence); WARN(1, "Fence %s:%s:%llx:%llx released with pending signals!\n", rcu_dereference(driver), rcu_dereference(timeline), fence->context, fence->seqno); /* * Failed to signal before release, likely a refcounting issue. * * This should never happen, but if it does make sure that we * don't leave chains dangling. We set the error flag first * so that the callbacks know this signal is due to an error. */ spin_lock_irqsave(fence->lock, flags); fence->error = -EDEADLK; dma_fence_signal_locked(fence); spin_unlock_irqrestore(fence->lock, flags); } rcu_read_unlock(); if (fence->ops->release) fence->ops->release(fence); else dma_fence_free(fence); } EXPORT_SYMBOL(dma_fence_release); /** * dma_fence_free - default release function for &dma_fence. * @fence: fence to release * * This is the default implementation for &dma_fence_ops.release. It calls * kfree_rcu() on @fence. */ void dma_fence_free(struct dma_fence *fence) { kfree_rcu(fence, rcu); } EXPORT_SYMBOL(dma_fence_free); static bool __dma_fence_enable_signaling(struct dma_fence *fence) { bool was_set; lockdep_assert_held(fence->lock); was_set = test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags); if (dma_fence_test_signaled_flag(fence)) return false; if (!was_set && fence->ops->enable_signaling) { trace_dma_fence_enable_signal(fence); if (!fence->ops->enable_signaling(fence)) { dma_fence_signal_locked(fence); return false; } } return true; } /** * dma_fence_enable_sw_signaling - enable signaling on fence * @fence: the fence to enable * * This will request for sw signaling to be enabled, to make the fence * complete as soon as possible. This calls &dma_fence_ops.enable_signaling * internally. */ void dma_fence_enable_sw_signaling(struct dma_fence *fence) { unsigned long flags; spin_lock_irqsave(fence->lock, flags); __dma_fence_enable_signaling(fence); spin_unlock_irqrestore(fence->lock, flags); } EXPORT_SYMBOL(dma_fence_enable_sw_signaling); /** * dma_fence_add_callback - add a callback to be called when the fence * is signaled * @fence: the fence to wait on * @cb: the callback to register * @func: the function to call * * Add a software callback to the fence. The caller should keep a reference to * the fence. * * @cb will be initialized by dma_fence_add_callback(), no initialization * by the caller is required. Any number of callbacks can be registered * to a fence, but a callback can only be registered to one fence at a time. * * If fence is already signaled, this function will return -ENOENT (and * *not* call the callback). * * Note that the callback can be called from an atomic context or irq context. * * Returns 0 in case of success, -ENOENT if the fence is already signaled * and -EINVAL in case of error. */ int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb, dma_fence_func_t func) { unsigned long flags; int ret = 0; if (WARN_ON(!fence || !func)) return -EINVAL; if (dma_fence_test_signaled_flag(fence)) { INIT_LIST_HEAD(&cb->node); return -ENOENT; } spin_lock_irqsave(fence->lock, flags); if (__dma_fence_enable_signaling(fence)) { cb->func = func; list_add_tail(&cb->node, &fence->cb_list); } else { INIT_LIST_HEAD(&cb->node); ret = -ENOENT; } spin_unlock_irqrestore(fence->lock, flags); return ret; } EXPORT_SYMBOL(dma_fence_add_callback); /** * dma_fence_get_status - returns the status upon completion * @fence: the dma_fence to query * * This wraps dma_fence_get_status_locked() to return the error status * condition on a signaled fence. See dma_fence_get_status_locked() for more * details. * * Returns 0 if the fence has not yet been signaled, 1 if the fence has * been signaled without an error condition, or a negative error code * if the fence has been completed in err. */ int dma_fence_get_status(struct dma_fence *fence) { unsigned long flags; int status; spin_lock_irqsave(fence->lock, flags); status = dma_fence_get_status_locked(fence); spin_unlock_irqrestore(fence->lock, flags); return status; } EXPORT_SYMBOL(dma_fence_get_status); /** * dma_fence_remove_callback - remove a callback from the signaling list * @fence: the fence to wait on * @cb: the callback to remove * * Remove a previously queued callback from the fence. This function returns * true if the callback is successfully removed, or false if the fence has * already been signaled. * * *WARNING*: * Cancelling a callback should only be done if you really know what you're * doing, since deadlocks and race conditions could occur all too easily. For * this reason, it should only ever be done on hardware lockup recovery, * with a reference held to the fence. * * Behaviour is undefined if @cb has not been added to @fence using * dma_fence_add_callback() beforehand. */ bool dma_fence_remove_callback(struct dma_fence *fence, struct dma_fence_cb *cb) { unsigned long flags; bool ret; spin_lock_irqsave(fence->lock, flags); ret = !list_empty(&cb->node); if (ret) list_del_init(&cb->node); spin_unlock_irqrestore(fence->lock, flags); return ret; } EXPORT_SYMBOL(dma_fence_remove_callback); struct default_wait_cb { struct dma_fence_cb base; struct task_struct *task; }; static void dma_fence_default_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { struct default_wait_cb *wait = container_of(cb, struct default_wait_cb, base); wake_up_state(wait->task, TASK_NORMAL); } /** * dma_fence_default_wait - default sleep until the fence gets signaled * or until timeout elapses * @fence: the fence to wait on * @intr: if true, do an interruptible wait * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the * remaining timeout in jiffies on success. If timeout is zero the value one is * returned if the fence is already signaled for consistency with other * functions taking a jiffies timeout. */ signed long dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) { struct default_wait_cb cb; unsigned long flags; signed long ret = timeout ? timeout : 1; spin_lock_irqsave(fence->lock, flags); if (dma_fence_test_signaled_flag(fence)) goto out; if (intr && signal_pending(current)) { ret = -ERESTARTSYS; goto out; } if (!timeout) { ret = 0; goto out; } cb.base.func = dma_fence_default_wait_cb; cb.task = current; list_add(&cb.base.node, &fence->cb_list); while (!dma_fence_test_signaled_flag(fence) && ret > 0) { if (intr) __set_current_state(TASK_INTERRUPTIBLE); else __set_current_state(TASK_UNINTERRUPTIBLE); spin_unlock_irqrestore(fence->lock, flags); ret = schedule_timeout(ret); spin_lock_irqsave(fence->lock, flags); if (ret > 0 && intr && signal_pending(current)) ret = -ERESTARTSYS; } if (!list_empty(&cb.base.node)) list_del(&cb.base.node); __set_current_state(TASK_RUNNING); out: spin_unlock_irqrestore(fence->lock, flags); return ret; } EXPORT_SYMBOL(dma_fence_default_wait); static bool dma_fence_test_signaled_any(struct dma_fence **fences, uint32_t count, uint32_t *idx) { int i; for (i = 0; i < count; ++i) { struct dma_fence *fence = fences[i]; if (dma_fence_test_signaled_flag(fence)) { if (idx) *idx = i; return true; } } return false; } /** * dma_fence_wait_any_timeout - sleep until any fence gets signaled * or until timeout elapses * @fences: array of fences to wait on * @count: number of fences to wait on * @intr: if true, do an interruptible wait * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * @idx: used to store the first signaled fence index, meaningful only on * positive return * * Returns -EINVAL on custom fence wait implementation, -ERESTARTSYS if * interrupted, 0 if the wait timed out, or the remaining timeout in jiffies * on success. * * Synchronous waits for the first fence in the array to be signaled. The * caller needs to hold a reference to all fences in the array, otherwise a * fence might be freed before return, resulting in undefined behavior. * * See also dma_fence_wait() and dma_fence_wait_timeout(). */ signed long dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count, bool intr, signed long timeout, uint32_t *idx) { struct default_wait_cb *cb; signed long ret = timeout; unsigned i; if (WARN_ON(!fences || !count || timeout < 0)) return -EINVAL; if (timeout == 0) { for (i = 0; i < count; ++i) if (dma_fence_is_signaled(fences[i])) { if (idx) *idx = i; return 1; } return 0; } cb = kcalloc(count, sizeof(struct default_wait_cb), GFP_KERNEL); if (cb == NULL) { ret = -ENOMEM; goto err_free_cb; } for (i = 0; i < count; ++i) { struct dma_fence *fence = fences[i]; cb[i].task = current; if (dma_fence_add_callback(fence, &cb[i].base, dma_fence_default_wait_cb)) { /* This fence is already signaled */ if (idx) *idx = i; goto fence_rm_cb; } } while (ret > 0) { if (intr) set_current_state(TASK_INTERRUPTIBLE); else set_current_state(TASK_UNINTERRUPTIBLE); if (dma_fence_test_signaled_any(fences, count, idx)) break; ret = schedule_timeout(ret); if (ret > 0 && intr && signal_pending(current)) ret = -ERESTARTSYS; } __set_current_state(TASK_RUNNING); fence_rm_cb: while (i-- > 0) dma_fence_remove_callback(fences[i], &cb[i].base); err_free_cb: kfree(cb); return ret; } EXPORT_SYMBOL(dma_fence_wait_any_timeout); /** * DOC: deadline hints * * In an ideal world, it would be possible to pipeline a workload sufficiently * that a utilization based device frequency governor could arrive at a minimum * frequency that meets the requirements of the use-case, in order to minimize * power consumption. But in the real world there are many workloads which * defy this ideal. For example, but not limited to: * * * Workloads that ping-pong between device and CPU, with alternating periods * of CPU waiting for device, and device waiting on CPU. This can result in * devfreq and cpufreq seeing idle time in their respective domains and in * result reduce frequency. * * * Workloads that interact with a periodic time based deadline, such as double * buffered GPU rendering vs vblank sync'd page flipping. In this scenario, * missing a vblank deadline results in an *increase* in idle time on the GPU * (since it has to wait an additional vblank period), sending a signal to * the GPU's devfreq to reduce frequency, when in fact the opposite is what is * needed. * * To this end, deadline hint(s) can be set on a &dma_fence via &dma_fence_set_deadline * (or indirectly via userspace facing ioctls like &sync_set_deadline). * The deadline hint provides a way for the waiting driver, or userspace, to * convey an appropriate sense of urgency to the signaling driver. * * A deadline hint is given in absolute ktime (CLOCK_MONOTONIC for userspace * facing APIs). The time could either be some point in the future (such as * the vblank based deadline for page-flipping, or the start of a compositor's * composition cycle), or the current time to indicate an immediate deadline * hint (Ie. forward progress cannot be made until this fence is signaled). * * Multiple deadlines may be set on a given fence, even in parallel. See the * documentation for &dma_fence_ops.set_deadline. * * The deadline hint is just that, a hint. The driver that created the fence * may react by increasing frequency, making different scheduling choices, etc. * Or doing nothing at all. */ /** * dma_fence_set_deadline - set desired fence-wait deadline hint * @fence: the fence that is to be waited on * @deadline: the time by which the waiter hopes for the fence to be * signaled * * Give the fence signaler a hint about an upcoming deadline, such as * vblank, by which point the waiter would prefer the fence to be * signaled by. This is intended to give feedback to the fence signaler * to aid in power management decisions, such as boosting GPU frequency * if a periodic vblank deadline is approaching but the fence is not * yet signaled.. */ void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline) { if (fence->ops->set_deadline && !dma_fence_is_signaled(fence)) fence->ops->set_deadline(fence, deadline); } EXPORT_SYMBOL(dma_fence_set_deadline); /** * dma_fence_describe - Dump fence description into seq_file * @fence: the fence to describe * @seq: the seq_file to put the textual description into * * Dump a textual description of the fence and it's state into the seq_file. */ void dma_fence_describe(struct dma_fence *fence, struct seq_file *seq) { const char __rcu *timeline = ""; const char __rcu *driver = ""; const char *signaled = ""; rcu_read_lock(); if (!dma_fence_is_signaled(fence)) { timeline = dma_fence_timeline_name(fence); driver = dma_fence_driver_name(fence); signaled = "un"; } seq_printf(seq, "%llu:%llu %s %s %ssignalled\n", fence->context, fence->seqno, timeline, driver, signaled); rcu_read_unlock(); } EXPORT_SYMBOL(dma_fence_describe); static void __dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, spinlock_t *lock, u64 context, u64 seqno, unsigned long flags) { BUG_ON(!lock); BUG_ON(!ops || !ops->get_driver_name || !ops->get_timeline_name); kref_init(&fence->refcount); fence->ops = ops; INIT_LIST_HEAD(&fence->cb_list); fence->lock = lock; fence->context = context; fence->seqno = seqno; fence->flags = flags; fence->error = 0; trace_dma_fence_init(fence); } /** * dma_fence_init - Initialize a custom fence. * @fence: the fence to initialize * @ops: the dma_fence_ops for operations on this fence * @lock: the irqsafe spinlock to use for locking this fence * @context: the execution context this fence is run on * @seqno: a linear increasing sequence number for this context * * Initializes an allocated fence, the caller doesn't have to keep its * refcount after committing with this fence, but it will need to hold a * refcount again if &dma_fence_ops.enable_signaling gets called. * * context and seqno are used for easy comparison between fences, allowing * to check which fence is later by simply using dma_fence_later(). */ void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, spinlock_t *lock, u64 context, u64 seqno) { __dma_fence_init(fence, ops, lock, context, seqno, 0UL); } EXPORT_SYMBOL(dma_fence_init); /** * dma_fence_init64 - Initialize a custom fence with 64-bit seqno support. * @fence: the fence to initialize * @ops: the dma_fence_ops for operations on this fence * @lock: the irqsafe spinlock to use for locking this fence * @context: the execution context this fence is run on * @seqno: a linear increasing sequence number for this context * * Initializes an allocated fence, the caller doesn't have to keep its * refcount after committing with this fence, but it will need to hold a * refcount again if &dma_fence_ops.enable_signaling gets called. * * Context and seqno are used for easy comparison between fences, allowing * to check which fence is later by simply using dma_fence_later(). */ void dma_fence_init64(struct dma_fence *fence, const struct dma_fence_ops *ops, spinlock_t *lock, u64 context, u64 seqno) { __dma_fence_init(fence, ops, lock, context, seqno, BIT(DMA_FENCE_FLAG_SEQNO64_BIT)); } EXPORT_SYMBOL(dma_fence_init64); /** * dma_fence_driver_name - Access the driver name * @fence: the fence to query * * Returns a driver name backing the dma-fence implementation. * * IMPORTANT CONSIDERATION: * Dma-fence contract stipulates that access to driver provided data (data not * directly embedded into the object itself), such as the &dma_fence.lock and * memory potentially accessed by the &dma_fence.ops functions, is forbidden * after the fence has been signalled. Drivers are allowed to free that data, * and some do. * * To allow safe access drivers are mandated to guarantee a RCU grace period * between signalling the fence and freeing said data. * * As such access to the driver name is only valid inside a RCU locked section. * The pointer MUST be both queried and USED ONLY WITHIN a SINGLE block guarded * by the &rcu_read_lock and &rcu_read_unlock pair. */ const char __rcu *dma_fence_driver_name(struct dma_fence *fence) { RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "RCU protection is required for safe access to returned string"); if (!dma_fence_test_signaled_flag(fence)) return fence->ops->get_driver_name(fence); else return "detached-driver"; } EXPORT_SYMBOL(dma_fence_driver_name); /** * dma_fence_timeline_name - Access the timeline name * @fence: the fence to query * * Returns a timeline name provided by the dma-fence implementation. * * IMPORTANT CONSIDERATION: * Dma-fence contract stipulates that access to driver provided data (data not * directly embedded into the object itself), such as the &dma_fence.lock and * memory potentially accessed by the &dma_fence.ops functions, is forbidden * after the fence has been signalled. Drivers are allowed to free that data, * and some do. * * To allow safe access drivers are mandated to guarantee a RCU grace period * between signalling the fence and freeing said data. * * As such access to the driver name is only valid inside a RCU locked section. * The pointer MUST be both queried and USED ONLY WITHIN a SINGLE block guarded * by the &rcu_read_lock and &rcu_read_unlock pair. */ const char __rcu *dma_fence_timeline_name(struct dma_fence *fence) { RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "RCU protection is required for safe access to returned string"); if (!dma_fence_test_signaled_flag(fence)) return fence->ops->get_timeline_name(fence); else return "signaled-timeline"; } EXPORT_SYMBOL(dma_fence_timeline_name);
108 106 3 59 109 92 1 16 109 86 1 23 1 105 175 13 10 103 87 1 1 1 1 1 11 145 142 1 144 48 2 63 2 1 24 20 1 1 1 18 4 1 4 10 41 1 28 1 53 1 1 1 51 2 2 1 1 1 2 5 27 1 1 1 1 1 1 1 2 8 15 1 1 1 1 1 1 1 2 5 95 1 1 1 1 1 92 93 1 60 3 20 75 2 10 82 71 1 10 75 1 1 5 8 70 3 19 49 46 4 27 53 40 47 47 47 1 47 1 47 1 46 1 46 4 78 93 83 9 77 16 89 2 92 90 5 83 87 83 79 3 82 78 4 82 79 1 80 32 48 1 2 40 8 43 49 145 145 93 44 28 18 9 10 28 44 7 7 4 1 2 7 5 5 70 69 1 1 1 1 14 14 14 14 11 11 5 5 20 4 4 64 63 2 66 65 1 59 7 66 56 11 53 14 60 5 47 20 62 4 65 2 65 1 2 64 64 1 63 3 66 65 66 64 1 65 1 65 18 18 7 4 3 7 1 2 4 3 1 17 150 148 146 3 1 1 7 4 3 2 2 3 2 1 1 1 1 1 1 1 1 1 1 2 2 27 3 24 22 2 22 2 14 9 23 6 16 1 17 15 2 23 4 7 5 106 2 2 26 2 73 2 3 2 91 2 48 20 19 18 52 2 16 40 41 34 30 7 5 5 43 6 45 6 44 6 33 26 16 128 12 80 79 76 76 75 48 1 49 47 2 45 3 78 5 73 29 1 42 79 35 44 24 7 5 5 5 1 4 5 5 1 3 4 2 3 3 3 2 1 2 1 2 1 3 3 1 1 23 23 1 20 1 21 3 17 16 2 2 2 4 1 1 1 2 2 2 4 3 1 1 2 2 5 5 3 2 10 10 1 7 1 8 1 6 4 1 1 2 5 5 1 1 3 4 11 11 9 2 3 7 1 4 3 4 26 3 1 10 30 1 30 29 27 1 26 2 27 1 9 3 11 1 12 12 12 346 3 1 339 9 1 371 1 372 1 369 27 17 10 345 1 7 353 1 343 15 372 7 1 7 1 8 8 8 6 38 10 35 12 43 5 46 2 47 47 48 47 48 47 56 48 8 48 6 25 24 24 1 24 25 25 25 25 25 36 17 19 8 6 1 4 2 2 2 2 2 2 2 47 1 2 3 46 3 47 49 4 55 49 49 4 2 49 49 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 // SPDX-License-Identifier: GPL-2.0-only /* xfrm_user.c: User interface to configure xfrm engine. * * Copyright (C) 2002 David S. Miller (davem@redhat.com) * * Changes: * Mitsuru KANDA @USAGI * Kazunori MIYAZAWA @USAGI * Kunihiro Ishiguro <kunihiro@ipinfusion.com> * IPv6 support * */ #include <linux/compat.h> #include <linux/crypto.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/socket.h> #include <linux/string.h> #include <linux/net.h> #include <linux/skbuff.h> #include <linux/pfkeyv2.h> #include <linux/ipsec.h> #include <linux/init.h> #include <linux/security.h> #include <net/sock.h> #include <net/xfrm.h> #include <net/netlink.h> #include <net/ah.h> #include <linux/uaccess.h> #if IS_ENABLED(CONFIG_IPV6) #include <linux/in6.h> #endif #include <linux/unaligned.h> static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type, struct netlink_ext_ack *extack) { struct nlattr *rt = attrs[type]; struct xfrm_algo *algp; if (!rt) return 0; algp = nla_data(rt); if (nla_len(rt) < (int)xfrm_alg_len(algp)) { NL_SET_ERR_MSG(extack, "Invalid AUTH/CRYPT/COMP attribute length"); return -EINVAL; } switch (type) { case XFRMA_ALG_AUTH: case XFRMA_ALG_CRYPT: case XFRMA_ALG_COMP: break; default: NL_SET_ERR_MSG(extack, "Invalid algorithm attribute type"); return -EINVAL; } algp->alg_name[sizeof(algp->alg_name) - 1] = '\0'; return 0; } static int verify_auth_trunc(struct nlattr **attrs, struct netlink_ext_ack *extack) { struct nlattr *rt = attrs[XFRMA_ALG_AUTH_TRUNC]; struct xfrm_algo_auth *algp; if (!rt) return 0; algp = nla_data(rt); if (nla_len(rt) < (int)xfrm_alg_auth_len(algp)) { NL_SET_ERR_MSG(extack, "Invalid AUTH_TRUNC attribute length"); return -EINVAL; } algp->alg_name[sizeof(algp->alg_name) - 1] = '\0'; return 0; } static int verify_aead(struct nlattr **attrs, struct netlink_ext_ack *extack) { struct nlattr *rt = attrs[XFRMA_ALG_AEAD]; struct xfrm_algo_aead *algp; if (!rt) return 0; algp = nla_data(rt); if (nla_len(rt) < (int)aead_len(algp)) { NL_SET_ERR_MSG(extack, "Invalid AEAD attribute length"); return -EINVAL; } algp->alg_name[sizeof(algp->alg_name) - 1] = '\0'; return 0; } static void verify_one_addr(struct nlattr **attrs, enum xfrm_attr_type_t type, xfrm_address_t **addrp) { struct nlattr *rt = attrs[type]; if (rt && addrp) *addrp = nla_data(rt); } static inline int verify_sec_ctx_len(struct nlattr **attrs, struct netlink_ext_ack *extack) { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_user_sec_ctx *uctx; if (!rt) return 0; uctx = nla_data(rt); if (uctx->len > nla_len(rt) || uctx->len != (sizeof(struct xfrm_user_sec_ctx) + uctx->ctx_len)) { NL_SET_ERR_MSG(extack, "Invalid security context length"); return -EINVAL; } return 0; } static inline int verify_replay(struct xfrm_usersa_info *p, struct nlattr **attrs, u8 sa_dir, struct netlink_ext_ack *extack) { struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL]; struct xfrm_replay_state_esn *rs; if (!rt) { if (p->flags & XFRM_STATE_ESN) { NL_SET_ERR_MSG(extack, "Missing required attribute for ESN"); return -EINVAL; } return 0; } rs = nla_data(rt); if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8) { NL_SET_ERR_MSG(extack, "ESN bitmap length must be <= 128"); return -EINVAL; } if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) && nla_len(rt) != sizeof(*rs)) { NL_SET_ERR_MSG(extack, "ESN attribute is too short to fit the full bitmap length"); return -EINVAL; } /* As only ESP and AH support ESN feature. */ if ((p->id.proto != IPPROTO_ESP) && (p->id.proto != IPPROTO_AH)) { NL_SET_ERR_MSG(extack, "ESN only supported for ESP and AH"); return -EINVAL; } if (p->replay_window != 0) { NL_SET_ERR_MSG(extack, "ESN not compatible with legacy replay_window"); return -EINVAL; } if (sa_dir == XFRM_SA_DIR_OUT) { if (rs->replay_window) { NL_SET_ERR_MSG(extack, "Replay window should be 0 for output SA"); return -EINVAL; } if (rs->seq || rs->seq_hi) { NL_SET_ERR_MSG(extack, "Replay seq and seq_hi should be 0 for output SA"); return -EINVAL; } if (!(p->flags & XFRM_STATE_ESN)) { if (rs->oseq_hi) { NL_SET_ERR_MSG( extack, "Replay oseq_hi should be 0 in non-ESN mode for output SA"); return -EINVAL; } if (rs->oseq == U32_MAX) { NL_SET_ERR_MSG( extack, "Replay oseq should be less than 0xFFFFFFFF in non-ESN mode for output SA"); return -EINVAL; } } else { if (rs->oseq == U32_MAX && rs->oseq_hi == U32_MAX) { NL_SET_ERR_MSG( extack, "Replay oseq and oseq_hi should be less than 0xFFFFFFFF for output SA"); return -EINVAL; } } if (rs->bmp_len) { NL_SET_ERR_MSG(extack, "Replay bmp_len should 0 for output SA"); return -EINVAL; } } if (sa_dir == XFRM_SA_DIR_IN) { if (rs->oseq || rs->oseq_hi) { NL_SET_ERR_MSG(extack, "Replay oseq and oseq_hi should be 0 for input SA"); return -EINVAL; } if (!(p->flags & XFRM_STATE_ESN)) { if (rs->seq_hi) { NL_SET_ERR_MSG( extack, "Replay seq_hi should be 0 in non-ESN mode for input SA"); return -EINVAL; } if (rs->seq == U32_MAX) { NL_SET_ERR_MSG( extack, "Replay seq should be less than 0xFFFFFFFF in non-ESN mode for input SA"); return -EINVAL; } } else { if (rs->seq == U32_MAX && rs->seq_hi == U32_MAX) { NL_SET_ERR_MSG( extack, "Replay seq and seq_hi should be less than 0xFFFFFFFF for input SA"); return -EINVAL; } } } return 0; } static int verify_newsa_info(struct xfrm_usersa_info *p, struct nlattr **attrs, struct netlink_ext_ack *extack) { int err; u8 sa_dir = nla_get_u8_default(attrs[XFRMA_SA_DIR], 0); u16 family = p->sel.family; err = -EINVAL; switch (p->family) { case AF_INET: break; case AF_INET6: #if IS_ENABLED(CONFIG_IPV6) break; #else err = -EAFNOSUPPORT; NL_SET_ERR_MSG(extack, "IPv6 support disabled"); goto out; #endif default: NL_SET_ERR_MSG(extack, "Invalid address family"); goto out; } if (!family && !(p->flags & XFRM_STATE_AF_UNSPEC)) family = p->family; switch (family) { case AF_UNSPEC: break; case AF_INET: if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32) { NL_SET_ERR_MSG(extack, "Invalid prefix length in selector (must be <= 32 for IPv4)"); goto out; } break; case AF_INET6: #if IS_ENABLED(CONFIG_IPV6) if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128) { NL_SET_ERR_MSG(extack, "Invalid prefix length in selector (must be <= 128 for IPv6)"); goto out; } break; #else NL_SET_ERR_MSG(extack, "IPv6 support disabled"); err = -EAFNOSUPPORT; goto out; #endif default: NL_SET_ERR_MSG(extack, "Invalid address family in selector"); goto out; } err = -EINVAL; switch (p->id.proto) { case IPPROTO_AH: if (!attrs[XFRMA_ALG_AUTH] && !attrs[XFRMA_ALG_AUTH_TRUNC]) { NL_SET_ERR_MSG(extack, "Missing required attribute for AH: AUTH_TRUNC or AUTH"); goto out; } if (attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_CRYPT] || attrs[XFRMA_ALG_COMP] || attrs[XFRMA_TFCPAD]) { NL_SET_ERR_MSG(extack, "Invalid attributes for AH: AEAD, CRYPT, COMP, TFCPAD"); goto out; } break; case IPPROTO_ESP: if (attrs[XFRMA_ALG_COMP]) { NL_SET_ERR_MSG(extack, "Invalid attribute for ESP: COMP"); goto out; } if (!attrs[XFRMA_ALG_AUTH] && !attrs[XFRMA_ALG_AUTH_TRUNC] && !attrs[XFRMA_ALG_CRYPT] && !attrs[XFRMA_ALG_AEAD]) { NL_SET_ERR_MSG(extack, "Missing required attribute for ESP: at least one of AUTH, AUTH_TRUNC, CRYPT, AEAD"); goto out; } if ((attrs[XFRMA_ALG_AUTH] || attrs[XFRMA_ALG_AUTH_TRUNC] || attrs[XFRMA_ALG_CRYPT]) && attrs[XFRMA_ALG_AEAD]) { NL_SET_ERR_MSG(extack, "Invalid attribute combination for ESP: AEAD can't be used with AUTH, AUTH_TRUNC, CRYPT"); goto out; } if (attrs[XFRMA_TFCPAD] && p->mode != XFRM_MODE_TUNNEL) { NL_SET_ERR_MSG(extack, "TFC padding can only be used in tunnel mode"); goto out; } if ((attrs[XFRMA_IPTFS_DROP_TIME] || attrs[XFRMA_IPTFS_REORDER_WINDOW] || attrs[XFRMA_IPTFS_DONT_FRAG] || attrs[XFRMA_IPTFS_INIT_DELAY] || attrs[XFRMA_IPTFS_MAX_QSIZE] || attrs[XFRMA_IPTFS_PKT_SIZE]) && p->mode != XFRM_MODE_IPTFS) { NL_SET_ERR_MSG(extack, "IP-TFS options can only be used in IP-TFS mode"); goto out; } break; case IPPROTO_COMP: if (!attrs[XFRMA_ALG_COMP]) { NL_SET_ERR_MSG(extack, "Missing required attribute for COMP: COMP"); goto out; } if (attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_AUTH] || attrs[XFRMA_ALG_AUTH_TRUNC] || attrs[XFRMA_ALG_CRYPT] || attrs[XFRMA_TFCPAD]) { NL_SET_ERR_MSG(extack, "Invalid attributes for COMP: AEAD, AUTH, AUTH_TRUNC, CRYPT, TFCPAD"); goto out; } if (ntohl(p->id.spi) >= 0x10000) { NL_SET_ERR_MSG(extack, "SPI is too large for COMP (must be < 0x10000)"); goto out; } break; #if IS_ENABLED(CONFIG_IPV6) case IPPROTO_DSTOPTS: case IPPROTO_ROUTING: if (attrs[XFRMA_ALG_COMP] || attrs[XFRMA_ALG_AUTH] || attrs[XFRMA_ALG_AUTH_TRUNC] || attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_CRYPT] || attrs[XFRMA_ENCAP] || attrs[XFRMA_SEC_CTX] || attrs[XFRMA_TFCPAD]) { NL_SET_ERR_MSG(extack, "Invalid attributes for DSTOPTS/ROUTING"); goto out; } if (!attrs[XFRMA_COADDR]) { NL_SET_ERR_MSG(extack, "Missing required COADDR attribute for DSTOPTS/ROUTING"); goto out; } break; #endif default: NL_SET_ERR_MSG(extack, "Unsupported protocol"); goto out; } if ((err = verify_aead(attrs, extack))) goto out; if ((err = verify_auth_trunc(attrs, extack))) goto out; if ((err = verify_one_alg(attrs, XFRMA_ALG_AUTH, extack))) goto out; if ((err = verify_one_alg(attrs, XFRMA_ALG_CRYPT, extack))) goto out; if ((err = verify_one_alg(attrs, XFRMA_ALG_COMP, extack))) goto out; if ((err = verify_sec_ctx_len(attrs, extack))) goto out; if ((err = verify_replay(p, attrs, sa_dir, extack))) goto out; err = -EINVAL; switch (p->mode) { case XFRM_MODE_TRANSPORT: case XFRM_MODE_TUNNEL: case XFRM_MODE_ROUTEOPTIMIZATION: case XFRM_MODE_BEET: break; case XFRM_MODE_IPTFS: if (p->id.proto != IPPROTO_ESP) { NL_SET_ERR_MSG(extack, "IP-TFS mode only supported with ESP"); goto out; } if (sa_dir == 0) { NL_SET_ERR_MSG(extack, "IP-TFS mode requires in or out direction attribute"); goto out; } break; default: NL_SET_ERR_MSG(extack, "Unsupported mode"); goto out; } err = 0; if (attrs[XFRMA_MTIMER_THRESH]) { if (!attrs[XFRMA_ENCAP]) { NL_SET_ERR_MSG(extack, "MTIMER_THRESH attribute can only be set on ENCAP states"); err = -EINVAL; goto out; } if (sa_dir == XFRM_SA_DIR_OUT) { NL_SET_ERR_MSG(extack, "MTIMER_THRESH attribute should not be set on output SA"); err = -EINVAL; goto out; } } if (sa_dir == XFRM_SA_DIR_OUT) { if (p->flags & XFRM_STATE_DECAP_DSCP) { NL_SET_ERR_MSG(extack, "Flag DECAP_DSCP should not be set for output SA"); err = -EINVAL; goto out; } if (p->flags & XFRM_STATE_ICMP) { NL_SET_ERR_MSG(extack, "Flag ICMP should not be set for output SA"); err = -EINVAL; goto out; } if (p->flags & XFRM_STATE_WILDRECV) { NL_SET_ERR_MSG(extack, "Flag WILDRECV should not be set for output SA"); err = -EINVAL; goto out; } if (p->replay_window) { NL_SET_ERR_MSG(extack, "Replay window should be 0 for output SA"); err = -EINVAL; goto out; } if (attrs[XFRMA_IPTFS_DROP_TIME]) { NL_SET_ERR_MSG(extack, "IP-TFS drop time should not be set for output SA"); err = -EINVAL; goto out; } if (attrs[XFRMA_IPTFS_REORDER_WINDOW]) { NL_SET_ERR_MSG(extack, "IP-TFS reorder window should not be set for output SA"); err = -EINVAL; goto out; } if (attrs[XFRMA_REPLAY_VAL]) { struct xfrm_replay_state *replay; replay = nla_data(attrs[XFRMA_REPLAY_VAL]); if (replay->seq || replay->bitmap) { NL_SET_ERR_MSG(extack, "Replay seq and bitmap should be 0 for output SA"); err = -EINVAL; goto out; } } } if (sa_dir == XFRM_SA_DIR_IN) { if (p->flags & XFRM_STATE_NOPMTUDISC) { NL_SET_ERR_MSG(extack, "Flag NOPMTUDISC should not be set for input SA"); err = -EINVAL; goto out; } if (attrs[XFRMA_SA_EXTRA_FLAGS]) { u32 xflags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]); if (xflags & XFRM_SA_XFLAG_DONT_ENCAP_DSCP) { NL_SET_ERR_MSG(extack, "Flag DONT_ENCAP_DSCP should not be set for input SA"); err = -EINVAL; goto out; } if (xflags & XFRM_SA_XFLAG_OSEQ_MAY_WRAP) { NL_SET_ERR_MSG(extack, "Flag OSEQ_MAY_WRAP should not be set for input SA"); err = -EINVAL; goto out; } } if (attrs[XFRMA_IPTFS_DONT_FRAG]) { NL_SET_ERR_MSG(extack, "IP-TFS don't fragment should not be set for input SA"); err = -EINVAL; goto out; } if (attrs[XFRMA_IPTFS_INIT_DELAY]) { NL_SET_ERR_MSG(extack, "IP-TFS initial delay should not be set for input SA"); err = -EINVAL; goto out; } if (attrs[XFRMA_IPTFS_MAX_QSIZE]) { NL_SET_ERR_MSG(extack, "IP-TFS max queue size should not be set for input SA"); err = -EINVAL; goto out; } if (attrs[XFRMA_IPTFS_PKT_SIZE]) { NL_SET_ERR_MSG(extack, "IP-TFS packet size should not be set for input SA"); err = -EINVAL; goto out; } } if (!sa_dir && attrs[XFRMA_SA_PCPU]) { NL_SET_ERR_MSG(extack, "SA_PCPU only supported with SA_DIR"); err = -EINVAL; goto out; } out: return err; } static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, struct xfrm_algo_desc *(*get_byname)(const char *, int), struct nlattr *rta, struct netlink_ext_ack *extack) { struct xfrm_algo *p, *ualg; struct xfrm_algo_desc *algo; if (!rta) return 0; ualg = nla_data(rta); algo = get_byname(ualg->alg_name, 1); if (!algo) { NL_SET_ERR_MSG(extack, "Requested COMP algorithm not found"); return -ENOSYS; } *props = algo->desc.sadb_alg_id; p = kmemdup(ualg, xfrm_alg_len(ualg), GFP_KERNEL); if (!p) return -ENOMEM; strscpy(p->alg_name, algo->name); *algpp = p; return 0; } static int attach_crypt(struct xfrm_state *x, struct nlattr *rta, struct netlink_ext_ack *extack) { struct xfrm_algo *p, *ualg; struct xfrm_algo_desc *algo; if (!rta) return 0; ualg = nla_data(rta); algo = xfrm_ealg_get_byname(ualg->alg_name, 1); if (!algo) { NL_SET_ERR_MSG(extack, "Requested CRYPT algorithm not found"); return -ENOSYS; } x->props.ealgo = algo->desc.sadb_alg_id; p = kmemdup(ualg, xfrm_alg_len(ualg), GFP_KERNEL); if (!p) return -ENOMEM; strscpy(p->alg_name, algo->name); x->ealg = p; x->geniv = algo->uinfo.encr.geniv; return 0; } static int attach_auth(struct xfrm_algo_auth **algpp, u8 *props, struct nlattr *rta, struct netlink_ext_ack *extack) { struct xfrm_algo *ualg; struct xfrm_algo_auth *p; struct xfrm_algo_desc *algo; if (!rta) return 0; ualg = nla_data(rta); algo = xfrm_aalg_get_byname(ualg->alg_name, 1); if (!algo) { NL_SET_ERR_MSG(extack, "Requested AUTH algorithm not found"); return -ENOSYS; } *props = algo->desc.sadb_alg_id; p = kmalloc(sizeof(*p) + (ualg->alg_key_len + 7) / 8, GFP_KERNEL); if (!p) return -ENOMEM; strscpy(p->alg_name, algo->name); p->alg_key_len = ualg->alg_key_len; p->alg_trunc_len = algo->uinfo.auth.icv_truncbits; memcpy(p->alg_key, ualg->alg_key, (ualg->alg_key_len + 7) / 8); *algpp = p; return 0; } static int attach_auth_trunc(struct xfrm_algo_auth **algpp, u8 *props, struct nlattr *rta, struct netlink_ext_ack *extack) { struct xfrm_algo_auth *p, *ualg; struct xfrm_algo_desc *algo; if (!rta) return 0; ualg = nla_data(rta); algo = xfrm_aalg_get_byname(ualg->alg_name, 1); if (!algo) { NL_SET_ERR_MSG(extack, "Requested AUTH_TRUNC algorithm not found"); return -ENOSYS; } if (ualg->alg_trunc_len > algo->uinfo.auth.icv_fullbits) { NL_SET_ERR_MSG(extack, "Invalid length requested for truncated ICV"); return -EINVAL; } *props = algo->desc.sadb_alg_id; p = kmemdup(ualg, xfrm_alg_auth_len(ualg), GFP_KERNEL); if (!p) return -ENOMEM; strscpy(p->alg_name, algo->name); if (!p->alg_trunc_len) p->alg_trunc_len = algo->uinfo.auth.icv_truncbits; *algpp = p; return 0; } static int attach_aead(struct xfrm_state *x, struct nlattr *rta, struct netlink_ext_ack *extack) { struct xfrm_algo_aead *p, *ualg; struct xfrm_algo_desc *algo; if (!rta) return 0; ualg = nla_data(rta); algo = xfrm_aead_get_byname(ualg->alg_name, ualg->alg_icv_len, 1); if (!algo) { NL_SET_ERR_MSG(extack, "Requested AEAD algorithm not found"); return -ENOSYS; } x->props.ealgo = algo->desc.sadb_alg_id; p = kmemdup(ualg, aead_len(ualg), GFP_KERNEL); if (!p) return -ENOMEM; strscpy(p->alg_name, algo->name); x->aead = p; x->geniv = algo->uinfo.aead.geniv; return 0; } static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_esn, struct nlattr *rp, struct netlink_ext_ack *extack) { struct xfrm_replay_state_esn *up; unsigned int ulen; if (!replay_esn || !rp) return 0; up = nla_data(rp); ulen = xfrm_replay_state_esn_len(up); /* Check the overall length and the internal bitmap length to avoid * potential overflow. */ if (nla_len(rp) < (int)ulen) { NL_SET_ERR_MSG(extack, "ESN attribute is too short"); return -EINVAL; } if (xfrm_replay_state_esn_len(replay_esn) != ulen) { NL_SET_ERR_MSG(extack, "New ESN size doesn't match the existing SA's ESN size"); return -EINVAL; } if (replay_esn->bmp_len != up->bmp_len) { NL_SET_ERR_MSG(extack, "New ESN bitmap size doesn't match the existing SA's ESN bitmap"); return -EINVAL; } if (up->replay_window > up->bmp_len * sizeof(__u32) * 8) { NL_SET_ERR_MSG(extack, "ESN replay window is longer than the bitmap"); return -EINVAL; } return 0; } static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn, struct xfrm_replay_state_esn **preplay_esn, struct nlattr *rta) { struct xfrm_replay_state_esn *p, *pp, *up; unsigned int klen, ulen; if (!rta) return 0; up = nla_data(rta); klen = xfrm_replay_state_esn_len(up); ulen = nla_len(rta) >= (int)klen ? klen : sizeof(*up); p = kzalloc(klen, GFP_KERNEL); if (!p) return -ENOMEM; pp = kzalloc(klen, GFP_KERNEL); if (!pp) { kfree(p); return -ENOMEM; } memcpy(p, up, ulen); memcpy(pp, up, ulen); *replay_esn = p; *preplay_esn = pp; return 0; } static inline unsigned int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx) { unsigned int len = 0; if (xfrm_ctx) { len += sizeof(struct xfrm_user_sec_ctx); len += xfrm_ctx->ctx_len; } return len; } static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) { memcpy(&x->id, &p->id, sizeof(x->id)); memcpy(&x->sel, &p->sel, sizeof(x->sel)); memcpy(&x->lft, &p->lft, sizeof(x->lft)); x->props.mode = p->mode; x->props.replay_window = min_t(unsigned int, p->replay_window, sizeof(x->replay.bitmap) * 8); x->props.reqid = p->reqid; x->props.family = p->family; memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr)); x->props.flags = p->flags; if (!x->sel.family && !(p->flags & XFRM_STATE_AF_UNSPEC)) x->sel.family = p->family; } /* * someday when pfkey also has support, we could have the code * somehow made shareable and move it to xfrm_state.c - JHS * */ static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs, int update_esn) { struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; struct nlattr *re = update_esn ? attrs[XFRMA_REPLAY_ESN_VAL] : NULL; struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; struct nlattr *et = attrs[XFRMA_ETIMER_THRESH]; struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; struct nlattr *mt = attrs[XFRMA_MTIMER_THRESH]; if (re && x->replay_esn && x->preplay_esn) { struct xfrm_replay_state_esn *replay_esn; replay_esn = nla_data(re); memcpy(x->replay_esn, replay_esn, xfrm_replay_state_esn_len(replay_esn)); memcpy(x->preplay_esn, replay_esn, xfrm_replay_state_esn_len(replay_esn)); } if (rp) { struct xfrm_replay_state *replay; replay = nla_data(rp); memcpy(&x->replay, replay, sizeof(*replay)); memcpy(&x->preplay, replay, sizeof(*replay)); } if (lt) { struct xfrm_lifetime_cur *ltime; ltime = nla_data(lt); x->curlft.bytes = ltime->bytes; x->curlft.packets = ltime->packets; x->curlft.add_time = ltime->add_time; x->curlft.use_time = ltime->use_time; } if (et) x->replay_maxage = nla_get_u32(et); if (rt) x->replay_maxdiff = nla_get_u32(rt); if (mt) x->mapping_maxage = nla_get_u32(mt); } static void xfrm_smark_init(struct nlattr **attrs, struct xfrm_mark *m) { if (attrs[XFRMA_SET_MARK]) { m->v = nla_get_u32(attrs[XFRMA_SET_MARK]); m->m = nla_get_u32_default(attrs[XFRMA_SET_MARK_MASK], 0xffffffff); } else { m->v = m->m = 0; } } static struct xfrm_state *xfrm_state_construct(struct net *net, struct xfrm_usersa_info *p, struct nlattr **attrs, int *errp, struct netlink_ext_ack *extack) { struct xfrm_state *x = xfrm_state_alloc(net); int err = -ENOMEM; if (!x) goto error_no_put; copy_from_user_state(x, p); if (attrs[XFRMA_ENCAP]) { x->encap = kmemdup(nla_data(attrs[XFRMA_ENCAP]), sizeof(*x->encap), GFP_KERNEL); if (x->encap == NULL) goto error; } if (attrs[XFRMA_COADDR]) { x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]), sizeof(*x->coaddr), GFP_KERNEL); if (x->coaddr == NULL) goto error; } if (attrs[XFRMA_SA_EXTRA_FLAGS]) x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]); if ((err = attach_aead(x, attrs[XFRMA_ALG_AEAD], extack))) goto error; if ((err = attach_auth_trunc(&x->aalg, &x->props.aalgo, attrs[XFRMA_ALG_AUTH_TRUNC], extack))) goto error; if (!x->props.aalgo) { if ((err = attach_auth(&x->aalg, &x->props.aalgo, attrs[XFRMA_ALG_AUTH], extack))) goto error; } if ((err = attach_crypt(x, attrs[XFRMA_ALG_CRYPT], extack))) goto error; if ((err = attach_one_algo(&x->calg, &x->props.calgo, xfrm_calg_get_byname, attrs[XFRMA_ALG_COMP], extack))) goto error; if (attrs[XFRMA_TFCPAD]) x->tfcpad = nla_get_u32(attrs[XFRMA_TFCPAD]); xfrm_mark_get(attrs, &x->mark); xfrm_smark_init(attrs, &x->props.smark); if (attrs[XFRMA_IF_ID]) x->if_id = nla_get_u32(attrs[XFRMA_IF_ID]); if (attrs[XFRMA_SA_DIR]) x->dir = nla_get_u8(attrs[XFRMA_SA_DIR]); if (attrs[XFRMA_NAT_KEEPALIVE_INTERVAL]) x->nat_keepalive_interval = nla_get_u32(attrs[XFRMA_NAT_KEEPALIVE_INTERVAL]); if (attrs[XFRMA_SA_PCPU]) { x->pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]); if (x->pcpu_num >= num_possible_cpus()) { err = -ERANGE; NL_SET_ERR_MSG(extack, "pCPU number too big"); goto error; } } err = __xfrm_init_state(x, extack); if (err) goto error; if (attrs[XFRMA_SEC_CTX]) { err = security_xfrm_state_alloc(x, nla_data(attrs[XFRMA_SEC_CTX])); if (err) goto error; } if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn, attrs[XFRMA_REPLAY_ESN_VAL]))) goto error; x->km.seq = p->seq; x->replay_maxdiff = net->xfrm.sysctl_aevent_rseqth; /* sysctl_xfrm_aevent_etime is in 100ms units */ x->replay_maxage = (net->xfrm.sysctl_aevent_etime*HZ)/XFRM_AE_ETH_M; if ((err = xfrm_init_replay(x, extack))) goto error; /* override default values from above */ xfrm_update_ae_params(x, attrs, 0); xfrm_set_type_offload(x, attrs[XFRMA_OFFLOAD_DEV]); /* configure the hardware if offload is requested */ if (attrs[XFRMA_OFFLOAD_DEV]) { err = xfrm_dev_state_add(net, x, nla_data(attrs[XFRMA_OFFLOAD_DEV]), extack); if (err) goto error; } if (x->mode_cbs && x->mode_cbs->user_init) { err = x->mode_cbs->user_init(net, x, attrs, extack); if (err) goto error; } return x; error: x->km.state = XFRM_STATE_DEAD; xfrm_state_put(x); error_no_put: *errp = err; return NULL; } static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrm_usersa_info *p = nlmsg_data(nlh); struct xfrm_state *x; int err; struct km_event c; err = verify_newsa_info(p, attrs, extack); if (err) return err; x = xfrm_state_construct(net, p, attrs, &err, extack); if (!x) return err; xfrm_state_hold(x); if (nlh->nlmsg_type == XFRM_MSG_NEWSA) err = xfrm_state_add(x); else err = xfrm_state_update(x); xfrm_audit_state_add(x, err ? 0 : 1, true); if (err < 0) { x->km.state = XFRM_STATE_DEAD; xfrm_dev_state_delete(x); __xfrm_state_put(x); goto out; } if (x->km.state == XFRM_STATE_VOID) x->km.state = XFRM_STATE_VALID; c.seq = nlh->nlmsg_seq; c.portid = nlh->nlmsg_pid; c.event = nlh->nlmsg_type; km_state_notify(x, &c); out: xfrm_state_put(x); return err; } static struct xfrm_state *xfrm_user_state_lookup(struct net *net, struct xfrm_usersa_id *p, struct nlattr **attrs, int *errp) { struct xfrm_state *x = NULL; struct xfrm_mark m; int err; u32 mark = xfrm_mark_get(attrs, &m); if (xfrm_id_proto_match(p->proto, IPSEC_PROTO_ANY)) { err = -ESRCH; x = xfrm_state_lookup(net, mark, &p->daddr, p->spi, p->proto, p->family); } else { xfrm_address_t *saddr = NULL; verify_one_addr(attrs, XFRMA_SRCADDR, &saddr); if (!saddr) { err = -EINVAL; goto out; } err = -ESRCH; x = xfrm_state_lookup_byaddr(net, mark, &p->daddr, saddr, p->proto, p->family); } out: if (!x && errp) *errp = err; return x; } static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrm_state *x; int err = -ESRCH; struct km_event c; struct xfrm_usersa_id *p = nlmsg_data(nlh); x = xfrm_user_state_lookup(net, p, attrs, &err); if (x == NULL) return err; if ((err = security_xfrm_state_delete(x)) != 0) goto out; if (xfrm_state_kern(x)) { NL_SET_ERR_MSG(extack, "SA is in use by tunnels"); err = -EPERM; goto out; } err = xfrm_state_delete(x); if (err < 0) goto out; c.seq = nlh->nlmsg_seq; c.portid = nlh->nlmsg_pid; c.event = nlh->nlmsg_type; km_state_notify(x, &c); out: xfrm_audit_state_delete(x, err ? 0 : 1, true); xfrm_state_put(x); return err; } static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) { memset(p, 0, sizeof(*p)); memcpy(&p->id, &x->id, sizeof(p->id)); memcpy(&p->sel, &x->sel, sizeof(p->sel)); memcpy(&p->lft, &x->lft, sizeof(p->lft)); if (x->xso.dev) xfrm_dev_state_update_stats(x); memcpy(&p->curlft, &x->curlft, sizeof(p->curlft)); put_unaligned(x->stats.replay_window, &p->stats.replay_window); put_unaligned(x->stats.replay, &p->stats.replay); put_unaligned(x->stats.integrity_failed, &p->stats.integrity_failed); memcpy(&p->saddr, &x->props.saddr, sizeof(p->saddr)); p->mode = x->props.mode; p->replay_window = x->props.replay_window; p->reqid = x->props.reqid; p->family = x->props.family; p->flags = x->props.flags; p->seq = x->km.seq; } struct xfrm_dump_info { struct sk_buff *in_skb; struct sk_buff *out_skb; u32 nlmsg_seq; u16 nlmsg_flags; }; static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb) { struct xfrm_user_sec_ctx *uctx; struct nlattr *attr; int ctx_size = sizeof(*uctx) + s->ctx_len; attr = nla_reserve(skb, XFRMA_SEC_CTX, ctx_size); if (attr == NULL) return -EMSGSIZE; uctx = nla_data(attr); uctx->exttype = XFRMA_SEC_CTX; uctx->len = ctx_size; uctx->ctx_doi = s->ctx_doi; uctx->ctx_alg = s->ctx_alg; uctx->ctx_len = s->ctx_len; memcpy(uctx + 1, s->ctx_str, s->ctx_len); return 0; } static int copy_user_offload(struct xfrm_dev_offload *xso, struct sk_buff *skb) { struct xfrm_user_offload *xuo; struct nlattr *attr; attr = nla_reserve(skb, XFRMA_OFFLOAD_DEV, sizeof(*xuo)); if (attr == NULL) return -EMSGSIZE; xuo = nla_data(attr); memset(xuo, 0, sizeof(*xuo)); xuo->ifindex = xso->dev->ifindex; if (xso->dir == XFRM_DEV_OFFLOAD_IN) xuo->flags = XFRM_OFFLOAD_INBOUND; if (xso->type == XFRM_DEV_OFFLOAD_PACKET) xuo->flags |= XFRM_OFFLOAD_PACKET; return 0; } static bool xfrm_redact(void) { return IS_ENABLED(CONFIG_SECURITY) && security_locked_down(LOCKDOWN_XFRM_SECRET); } static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb) { struct xfrm_algo *algo; struct xfrm_algo_auth *ap; struct nlattr *nla; bool redact_secret = xfrm_redact(); nla = nla_reserve(skb, XFRMA_ALG_AUTH, sizeof(*algo) + (auth->alg_key_len + 7) / 8); if (!nla) return -EMSGSIZE; algo = nla_data(nla); strscpy_pad(algo->alg_name, auth->alg_name); if (redact_secret && auth->alg_key_len) memset(algo->alg_key, 0, (auth->alg_key_len + 7) / 8); else memcpy(algo->alg_key, auth->alg_key, (auth->alg_key_len + 7) / 8); algo->alg_key_len = auth->alg_key_len; nla = nla_reserve(skb, XFRMA_ALG_AUTH_TRUNC, xfrm_alg_auth_len(auth)); if (!nla) return -EMSGSIZE; ap = nla_data(nla); strscpy_pad(ap->alg_name, auth->alg_name); ap->alg_key_len = auth->alg_key_len; ap->alg_trunc_len = auth->alg_trunc_len; if (redact_secret && auth->alg_key_len) memset(ap->alg_key, 0, (auth->alg_key_len + 7) / 8); else memcpy(ap->alg_key, auth->alg_key, (auth->alg_key_len + 7) / 8); return 0; } static int copy_to_user_aead(struct xfrm_algo_aead *aead, struct sk_buff *skb) { struct nlattr *nla = nla_reserve(skb, XFRMA_ALG_AEAD, aead_len(aead)); struct xfrm_algo_aead *ap; bool redact_secret = xfrm_redact(); if (!nla) return -EMSGSIZE; ap = nla_data(nla); strscpy_pad(ap->alg_name, aead->alg_name); ap->alg_key_len = aead->alg_key_len; ap->alg_icv_len = aead->alg_icv_len; if (redact_secret && aead->alg_key_len) memset(ap->alg_key, 0, (aead->alg_key_len + 7) / 8); else memcpy(ap->alg_key, aead->alg_key, (aead->alg_key_len + 7) / 8); return 0; } static int copy_to_user_ealg(struct xfrm_algo *ealg, struct sk_buff *skb) { struct xfrm_algo *ap; bool redact_secret = xfrm_redact(); struct nlattr *nla = nla_reserve(skb, XFRMA_ALG_CRYPT, xfrm_alg_len(ealg)); if (!nla) return -EMSGSIZE; ap = nla_data(nla); strscpy_pad(ap->alg_name, ealg->alg_name); ap->alg_key_len = ealg->alg_key_len; if (redact_secret && ealg->alg_key_len) memset(ap->alg_key, 0, (ealg->alg_key_len + 7) / 8); else memcpy(ap->alg_key, ealg->alg_key, (ealg->alg_key_len + 7) / 8); return 0; } static int copy_to_user_calg(struct xfrm_algo *calg, struct sk_buff *skb) { struct nlattr *nla = nla_reserve(skb, XFRMA_ALG_COMP, sizeof(*calg)); struct xfrm_algo *ap; if (!nla) return -EMSGSIZE; ap = nla_data(nla); strscpy_pad(ap->alg_name, calg->alg_name); ap->alg_key_len = 0; return 0; } static int copy_to_user_encap(struct xfrm_encap_tmpl *ep, struct sk_buff *skb) { struct nlattr *nla = nla_reserve(skb, XFRMA_ENCAP, sizeof(*ep)); struct xfrm_encap_tmpl *uep; if (!nla) return -EMSGSIZE; uep = nla_data(nla); memset(uep, 0, sizeof(*uep)); uep->encap_type = ep->encap_type; uep->encap_sport = ep->encap_sport; uep->encap_dport = ep->encap_dport; uep->encap_oa = ep->encap_oa; return 0; } static int xfrm_smark_put(struct sk_buff *skb, struct xfrm_mark *m) { int ret = 0; if (m->v | m->m) { ret = nla_put_u32(skb, XFRMA_SET_MARK, m->v); if (!ret) ret = nla_put_u32(skb, XFRMA_SET_MARK_MASK, m->m); } return ret; } /* Don't change this without updating xfrm_sa_len! */ static int copy_to_user_state_extra(struct xfrm_state *x, struct xfrm_usersa_info *p, struct sk_buff *skb) { int ret = 0; copy_to_user_state(x, p); if (x->props.extra_flags) { ret = nla_put_u32(skb, XFRMA_SA_EXTRA_FLAGS, x->props.extra_flags); if (ret) goto out; } if (x->coaddr) { ret = nla_put(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); if (ret) goto out; } if (x->lastused) { ret = nla_put_u64_64bit(skb, XFRMA_LASTUSED, x->lastused, XFRMA_PAD); if (ret) goto out; } if (x->aead) { ret = copy_to_user_aead(x->aead, skb); if (ret) goto out; } if (x->aalg) { ret = copy_to_user_auth(x->aalg, skb); if (ret) goto out; } if (x->ealg) { ret = copy_to_user_ealg(x->ealg, skb); if (ret) goto out; } if (x->calg) { ret = copy_to_user_calg(x->calg, skb); if (ret) goto out; } if (x->encap) { ret = copy_to_user_encap(x->encap, skb); if (ret) goto out; } if (x->tfcpad) { ret = nla_put_u32(skb, XFRMA_TFCPAD, x->tfcpad); if (ret) goto out; } ret = xfrm_mark_put(skb, &x->mark); if (ret) goto out; ret = xfrm_smark_put(skb, &x->props.smark); if (ret) goto out; if (x->replay_esn) ret = nla_put(skb, XFRMA_REPLAY_ESN_VAL, xfrm_replay_state_esn_len(x->replay_esn), x->replay_esn); else ret = nla_put(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), &x->replay); if (ret) goto out; if(x->xso.dev) ret = copy_user_offload(&x->xso, skb); if (ret) goto out; if (x->if_id) { ret = nla_put_u32(skb, XFRMA_IF_ID, x->if_id); if (ret) goto out; } if (x->security) { ret = copy_sec_ctx(x->security, skb); if (ret) goto out; } if (x->mode_cbs && x->mode_cbs->copy_to_user) ret = x->mode_cbs->copy_to_user(x, skb); if (ret) goto out; if (x->mapping_maxage) { ret = nla_put_u32(skb, XFRMA_MTIMER_THRESH, x->mapping_maxage); if (ret) goto out; } if (x->pcpu_num != UINT_MAX) { ret = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num); if (ret) goto out; } if (x->dir) ret = nla_put_u8(skb, XFRMA_SA_DIR, x->dir); if (x->nat_keepalive_interval) { ret = nla_put_u32(skb, XFRMA_NAT_KEEPALIVE_INTERVAL, x->nat_keepalive_interval); if (ret) goto out; } out: return ret; } static int dump_one_state(struct xfrm_state *x, int count, void *ptr) { struct xfrm_dump_info *sp = ptr; struct sk_buff *in_skb = sp->in_skb; struct sk_buff *skb = sp->out_skb; struct xfrm_translator *xtr; struct xfrm_usersa_info *p; struct nlmsghdr *nlh; int err; nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, sp->nlmsg_seq, XFRM_MSG_NEWSA, sizeof(*p), sp->nlmsg_flags); if (nlh == NULL) return -EMSGSIZE; p = nlmsg_data(nlh); err = copy_to_user_state_extra(x, p, skb); if (err) { nlmsg_cancel(skb, nlh); return err; } nlmsg_end(skb, nlh); xtr = xfrm_get_translator(); if (xtr) { err = xtr->alloc_compat(skb, nlh); xfrm_put_translator(xtr); if (err) { nlmsg_cancel(skb, nlh); return err; } } return 0; } static int xfrm_dump_sa_done(struct netlink_callback *cb) { struct xfrm_state_walk *walk = (struct xfrm_state_walk *) &cb->args[1]; struct sock *sk = cb->skb->sk; struct net *net = sock_net(sk); if (cb->args[0]) xfrm_state_walk_done(walk, net); return 0; } static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct xfrm_state_walk *walk = (struct xfrm_state_walk *) &cb->args[1]; struct xfrm_dump_info info; BUILD_BUG_ON(sizeof(struct xfrm_state_walk) > sizeof(cb->args) - sizeof(cb->args[0])); info.in_skb = cb->skb; info.out_skb = skb; info.nlmsg_seq = cb->nlh->nlmsg_seq; info.nlmsg_flags = NLM_F_MULTI; if (!cb->args[0]) { struct nlattr *attrs[XFRMA_MAX+1]; struct xfrm_address_filter *filter = NULL; u8 proto = 0; int err; err = nlmsg_parse_deprecated(cb->nlh, 0, attrs, XFRMA_MAX, xfrma_policy, cb->extack); if (err < 0) return err; if (attrs[XFRMA_ADDRESS_FILTER]) { filter = kmemdup(nla_data(attrs[XFRMA_ADDRESS_FILTER]), sizeof(*filter), GFP_KERNEL); if (filter == NULL) return -ENOMEM; /* see addr_match(), (prefix length >> 5) << 2 * will be used to compare xfrm_address_t */ if (filter->splen > (sizeof(xfrm_address_t) << 3) || filter->dplen > (sizeof(xfrm_address_t) << 3)) { kfree(filter); return -EINVAL; } } if (attrs[XFRMA_PROTO]) proto = nla_get_u8(attrs[XFRMA_PROTO]); xfrm_state_walk_init(walk, proto, filter); cb->args[0] = 1; } (void) xfrm_state_walk(net, walk, dump_one_state, &info); return skb->len; } static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb, struct xfrm_state *x, u32 seq) { struct xfrm_dump_info info; struct sk_buff *skb; int err; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!skb) return ERR_PTR(-ENOMEM); info.in_skb = in_skb; info.out_skb = skb; info.nlmsg_seq = seq; info.nlmsg_flags = 0; err = dump_one_state(x, 0, &info); if (err) { kfree_skb(skb); return ERR_PTR(err); } return skb; } /* A wrapper for nlmsg_multicast() checking that nlsk is still available. * Must be called with RCU read lock. */ static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb, u32 pid, unsigned int group) { struct sock *nlsk = rcu_dereference(net->xfrm.nlsk); struct xfrm_translator *xtr; if (!nlsk) { kfree_skb(skb); return -EPIPE; } xtr = xfrm_get_translator(); if (xtr) { int err = xtr->alloc_compat(skb, nlmsg_hdr(skb)); xfrm_put_translator(xtr); if (err) { kfree_skb(skb); return err; } } return nlmsg_multicast(nlsk, skb, pid, group, GFP_ATOMIC); } static inline unsigned int xfrm_spdinfo_msgsize(void) { return NLMSG_ALIGN(4) + nla_total_size(sizeof(struct xfrmu_spdinfo)) + nla_total_size(sizeof(struct xfrmu_spdhinfo)) + nla_total_size(sizeof(struct xfrmu_spdhthresh)) + nla_total_size(sizeof(struct xfrmu_spdhthresh)); } static int build_spdinfo(struct sk_buff *skb, struct net *net, u32 portid, u32 seq, u32 flags) { struct xfrmk_spdinfo si; struct xfrmu_spdinfo spc; struct xfrmu_spdhinfo sph; struct xfrmu_spdhthresh spt4, spt6; struct nlmsghdr *nlh; int err; u32 *f; unsigned lseq; nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0); if (nlh == NULL) /* shouldn't really happen ... */ return -EMSGSIZE; f = nlmsg_data(nlh); *f = flags; xfrm_spd_getinfo(net, &si); spc.incnt = si.incnt; spc.outcnt = si.outcnt; spc.fwdcnt = si.fwdcnt; spc.inscnt = si.inscnt; spc.outscnt = si.outscnt; spc.fwdscnt = si.fwdscnt; sph.spdhcnt = si.spdhcnt; sph.spdhmcnt = si.spdhmcnt; do { lseq = read_seqbegin(&net->xfrm.policy_hthresh.lock); spt4.lbits = net->xfrm.policy_hthresh.lbits4; spt4.rbits = net->xfrm.policy_hthresh.rbits4; spt6.lbits = net->xfrm.policy_hthresh.lbits6; spt6.rbits = net->xfrm.policy_hthresh.rbits6; } while (read_seqretry(&net->xfrm.policy_hthresh.lock, lseq)); err = nla_put(skb, XFRMA_SPD_INFO, sizeof(spc), &spc); if (!err) err = nla_put(skb, XFRMA_SPD_HINFO, sizeof(sph), &sph); if (!err) err = nla_put(skb, XFRMA_SPD_IPV4_HTHRESH, sizeof(spt4), &spt4); if (!err) err = nla_put(skb, XFRMA_SPD_IPV6_HTHRESH, sizeof(spt6), &spt6); if (err) { nlmsg_cancel(skb, nlh); return err; } nlmsg_end(skb, nlh); return 0; } static int xfrm_set_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrmu_spdhthresh *thresh4 = NULL; struct xfrmu_spdhthresh *thresh6 = NULL; /* selector prefixlen thresholds to hash policies */ if (attrs[XFRMA_SPD_IPV4_HTHRESH]) { struct nlattr *rta = attrs[XFRMA_SPD_IPV4_HTHRESH]; if (nla_len(rta) < sizeof(*thresh4)) { NL_SET_ERR_MSG(extack, "Invalid SPD_IPV4_HTHRESH attribute length"); return -EINVAL; } thresh4 = nla_data(rta); if (thresh4->lbits > 32 || thresh4->rbits > 32) { NL_SET_ERR_MSG(extack, "Invalid hash threshold (must be <= 32 for IPv4)"); return -EINVAL; } } if (attrs[XFRMA_SPD_IPV6_HTHRESH]) { struct nlattr *rta = attrs[XFRMA_SPD_IPV6_HTHRESH]; if (nla_len(rta) < sizeof(*thresh6)) { NL_SET_ERR_MSG(extack, "Invalid SPD_IPV6_HTHRESH attribute length"); return -EINVAL; } thresh6 = nla_data(rta); if (thresh6->lbits > 128 || thresh6->rbits > 128) { NL_SET_ERR_MSG(extack, "Invalid hash threshold (must be <= 128 for IPv6)"); return -EINVAL; } } if (thresh4 || thresh6) { write_seqlock(&net->xfrm.policy_hthresh.lock); if (thresh4) { net->xfrm.policy_hthresh.lbits4 = thresh4->lbits; net->xfrm.policy_hthresh.rbits4 = thresh4->rbits; } if (thresh6) { net->xfrm.policy_hthresh.lbits6 = thresh6->lbits; net->xfrm.policy_hthresh.rbits6 = thresh6->rbits; } write_sequnlock(&net->xfrm.policy_hthresh.lock); xfrm_policy_hash_rebuild(net); } return 0; } static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct sk_buff *r_skb; u32 *flags = nlmsg_data(nlh); u32 sportid = NETLINK_CB(skb).portid; u32 seq = nlh->nlmsg_seq; int err; r_skb = nlmsg_new(xfrm_spdinfo_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; err = build_spdinfo(r_skb, net, sportid, seq, *flags); BUG_ON(err < 0); return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); } static inline unsigned int xfrm_sadinfo_msgsize(void) { return NLMSG_ALIGN(4) + nla_total_size(sizeof(struct xfrmu_sadhinfo)) + nla_total_size(4); /* XFRMA_SAD_CNT */ } static int build_sadinfo(struct sk_buff *skb, struct net *net, u32 portid, u32 seq, u32 flags) { struct xfrmk_sadinfo si; struct xfrmu_sadhinfo sh; struct nlmsghdr *nlh; int err; u32 *f; nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0); if (nlh == NULL) /* shouldn't really happen ... */ return -EMSGSIZE; f = nlmsg_data(nlh); *f = flags; xfrm_sad_getinfo(net, &si); sh.sadhmcnt = si.sadhmcnt; sh.sadhcnt = si.sadhcnt; err = nla_put_u32(skb, XFRMA_SAD_CNT, si.sadcnt); if (!err) err = nla_put(skb, XFRMA_SAD_HINFO, sizeof(sh), &sh); if (err) { nlmsg_cancel(skb, nlh); return err; } nlmsg_end(skb, nlh); return 0; } static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct sk_buff *r_skb; u32 *flags = nlmsg_data(nlh); u32 sportid = NETLINK_CB(skb).portid; u32 seq = nlh->nlmsg_seq; int err; r_skb = nlmsg_new(xfrm_sadinfo_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; err = build_sadinfo(r_skb, net, sportid, seq, *flags); BUG_ON(err < 0); return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); } static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrm_usersa_id *p = nlmsg_data(nlh); struct xfrm_state *x; struct sk_buff *resp_skb; int err = -ESRCH; x = xfrm_user_state_lookup(net, p, attrs, &err); if (x == NULL) goto out_noput; resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq); if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); } xfrm_state_put(x); out_noput: return err; } static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrm_state *x; struct xfrm_userspi_info *p; struct xfrm_translator *xtr; struct sk_buff *resp_skb; xfrm_address_t *daddr; int family; int err; u32 mark; struct xfrm_mark m; u32 if_id = 0; u32 pcpu_num = UINT_MAX; p = nlmsg_data(nlh); err = verify_spi_info(p->info.id.proto, p->min, p->max, extack); if (err) goto out_noput; family = p->info.family; daddr = &p->info.id.daddr; x = NULL; mark = xfrm_mark_get(attrs, &m); if (attrs[XFRMA_IF_ID]) if_id = nla_get_u32(attrs[XFRMA_IF_ID]); if (attrs[XFRMA_SA_PCPU]) { pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]); if (pcpu_num >= num_possible_cpus()) { err = -EINVAL; goto out_noput; } } if (p->info.seq) { x = xfrm_find_acq_byseq(net, mark, p->info.seq, pcpu_num); if (x && !xfrm_addr_equal(&x->id.daddr, daddr, family)) { xfrm_state_put(x); x = NULL; } } if (!x) x = xfrm_find_acq(net, &m, p->info.mode, p->info.reqid, if_id, pcpu_num, p->info.id.proto, daddr, &p->info.saddr, 1, family); err = -ENOENT; if (!x) { NL_SET_ERR_MSG(extack, "Target ACQUIRE not found"); goto out_noput; } err = xfrm_alloc_spi(x, p->min, p->max, extack); if (err) goto out; if (attrs[XFRMA_SA_DIR]) x->dir = nla_get_u8(attrs[XFRMA_SA_DIR]); resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq); if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); goto out; } xtr = xfrm_get_translator(); if (xtr) { err = xtr->alloc_compat(skb, nlmsg_hdr(skb)); xfrm_put_translator(xtr); if (err) { kfree_skb(resp_skb); goto out; } } err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); out: xfrm_state_put(x); out_noput: return err; } static int verify_policy_dir(u8 dir, struct netlink_ext_ack *extack) { switch (dir) { case XFRM_POLICY_IN: case XFRM_POLICY_OUT: case XFRM_POLICY_FWD: break; default: NL_SET_ERR_MSG(extack, "Invalid policy direction"); return -EINVAL; } return 0; } static int verify_policy_type(u8 type, struct netlink_ext_ack *extack) { switch (type) { case XFRM_POLICY_TYPE_MAIN: #ifdef CONFIG_XFRM_SUB_POLICY case XFRM_POLICY_TYPE_SUB: #endif break; default: NL_SET_ERR_MSG(extack, "Invalid policy type"); return -EINVAL; } return 0; } static int verify_newpolicy_info(struct xfrm_userpolicy_info *p, struct netlink_ext_ack *extack) { int ret; switch (p->share) { case XFRM_SHARE_ANY: case XFRM_SHARE_SESSION: case XFRM_SHARE_USER: case XFRM_SHARE_UNIQUE: break; default: NL_SET_ERR_MSG(extack, "Invalid policy share"); return -EINVAL; } switch (p->action) { case XFRM_POLICY_ALLOW: case XFRM_POLICY_BLOCK: break; default: NL_SET_ERR_MSG(extack, "Invalid policy action"); return -EINVAL; } switch (p->sel.family) { case AF_INET: if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32) { NL_SET_ERR_MSG(extack, "Invalid prefix length in selector (must be <= 32 for IPv4)"); return -EINVAL; } break; case AF_INET6: #if IS_ENABLED(CONFIG_IPV6) if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128) { NL_SET_ERR_MSG(extack, "Invalid prefix length in selector (must be <= 128 for IPv6)"); return -EINVAL; } break; #else NL_SET_ERR_MSG(extack, "IPv6 support disabled"); return -EAFNOSUPPORT; #endif default: NL_SET_ERR_MSG(extack, "Invalid selector family"); return -EINVAL; } ret = verify_policy_dir(p->dir, extack); if (ret) return ret; if (p->index && (xfrm_policy_id2dir(p->index) != p->dir)) { NL_SET_ERR_MSG(extack, "Policy index doesn't match direction"); return -EINVAL; } return 0; } static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct nlattr **attrs) { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_user_sec_ctx *uctx; if (!rt) return 0; uctx = nla_data(rt); return security_xfrm_policy_alloc(&pol->security, uctx, GFP_KERNEL); } static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut, int nr) { int i; xp->xfrm_nr = nr; for (i = 0; i < nr; i++, ut++) { struct xfrm_tmpl *t = &xp->xfrm_vec[i]; memcpy(&t->id, &ut->id, sizeof(struct xfrm_id)); memcpy(&t->saddr, &ut->saddr, sizeof(xfrm_address_t)); t->reqid = ut->reqid; t->mode = ut->mode; t->share = ut->share; t->optional = ut->optional; t->aalgos = ut->aalgos; t->ealgos = ut->ealgos; t->calgos = ut->calgos; /* If all masks are ~0, then we allow all algorithms. */ t->allalgs = !~(t->aalgos & t->ealgos & t->calgos); t->encap_family = ut->family; } } static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family, int dir, struct netlink_ext_ack *extack) { u16 prev_family; int i; if (nr > XFRM_MAX_DEPTH) { NL_SET_ERR_MSG(extack, "Template count must be <= XFRM_MAX_DEPTH (" __stringify(XFRM_MAX_DEPTH) ")"); return -EINVAL; } prev_family = family; for (i = 0; i < nr; i++) { /* We never validated the ut->family value, so many * applications simply leave it at zero. The check was * never made and ut->family was ignored because all * templates could be assumed to have the same family as * the policy itself. Now that we will have ipv4-in-ipv6 * and ipv6-in-ipv4 tunnels, this is no longer true. */ if (!ut[i].family) ut[i].family = family; switch (ut[i].mode) { case XFRM_MODE_TUNNEL: case XFRM_MODE_BEET: if (ut[i].optional && dir == XFRM_POLICY_OUT) { NL_SET_ERR_MSG(extack, "Mode in optional template not allowed in outbound policy"); return -EINVAL; } break; case XFRM_MODE_IPTFS: break; default: if (ut[i].family != prev_family) { NL_SET_ERR_MSG(extack, "Mode in template doesn't support a family change"); return -EINVAL; } break; } if (ut[i].mode >= XFRM_MODE_MAX) { NL_SET_ERR_MSG(extack, "Mode in template must be < XFRM_MODE_MAX (" __stringify(XFRM_MODE_MAX) ")"); return -EINVAL; } prev_family = ut[i].family; switch (ut[i].family) { case AF_INET: break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: break; #endif default: NL_SET_ERR_MSG(extack, "Invalid family in template"); return -EINVAL; } if (!xfrm_id_proto_valid(ut[i].id.proto)) { NL_SET_ERR_MSG(extack, "Invalid XFRM protocol in template"); return -EINVAL; } } return 0; } static int copy_from_user_tmpl(struct xfrm_policy *pol, struct nlattr **attrs, int dir, struct netlink_ext_ack *extack) { struct nlattr *rt = attrs[XFRMA_TMPL]; if (!rt) { pol->xfrm_nr = 0; } else { struct xfrm_user_tmpl *utmpl = nla_data(rt); int nr = nla_len(rt) / sizeof(*utmpl); int err; err = validate_tmpl(nr, utmpl, pol->family, dir, extack); if (err) return err; copy_templates(pol, utmpl, nr); } return 0; } static int copy_from_user_policy_type(u8 *tp, struct nlattr **attrs, struct netlink_ext_ack *extack) { struct nlattr *rt = attrs[XFRMA_POLICY_TYPE]; struct xfrm_userpolicy_type *upt; u8 type = XFRM_POLICY_TYPE_MAIN; int err; if (rt) { upt = nla_data(rt); type = upt->type; } err = verify_policy_type(type, extack); if (err) return err; *tp = type; return 0; } static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p) { xp->priority = p->priority; xp->index = p->index; memcpy(&xp->selector, &p->sel, sizeof(xp->selector)); memcpy(&xp->lft, &p->lft, sizeof(xp->lft)); xp->action = p->action; xp->flags = p->flags; xp->family = p->sel.family; /* XXX xp->share = p->share; */ } static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p, int dir) { memset(p, 0, sizeof(*p)); memcpy(&p->sel, &xp->selector, sizeof(p->sel)); memcpy(&p->lft, &xp->lft, sizeof(p->lft)); memcpy(&p->curlft, &xp->curlft, sizeof(p->curlft)); p->priority = xp->priority; p->index = xp->index; p->sel.family = xp->family; p->dir = dir; p->action = xp->action; p->flags = xp->flags; p->share = XFRM_SHARE_ANY; /* XXX xp->share */ } static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_userpolicy_info *p, struct nlattr **attrs, int *errp, struct netlink_ext_ack *extack) { struct xfrm_policy *xp = xfrm_policy_alloc(net, GFP_KERNEL); int err; if (!xp) { *errp = -ENOMEM; return NULL; } copy_from_user_policy(xp, p); err = copy_from_user_policy_type(&xp->type, attrs, extack); if (err) goto error; if (!(err = copy_from_user_tmpl(xp, attrs, p->dir, extack))) err = copy_from_user_sec_ctx(xp, attrs); if (err) goto error; xfrm_mark_get(attrs, &xp->mark); if (attrs[XFRMA_IF_ID]) xp->if_id = nla_get_u32(attrs[XFRMA_IF_ID]); /* configure the hardware if offload is requested */ if (attrs[XFRMA_OFFLOAD_DEV]) { err = xfrm_dev_policy_add(net, xp, nla_data(attrs[XFRMA_OFFLOAD_DEV]), p->dir, extack); if (err) goto error; } return xp; error: *errp = err; xp->walk.dead = 1; xfrm_policy_destroy(xp); return NULL; } static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrm_userpolicy_info *p = nlmsg_data(nlh); struct xfrm_policy *xp; struct km_event c; int err; int excl; err = verify_newpolicy_info(p, extack); if (err) return err; err = verify_sec_ctx_len(attrs, extack); if (err) return err; xp = xfrm_policy_construct(net, p, attrs, &err, extack); if (!xp) return err; /* shouldn't excl be based on nlh flags?? * Aha! this is anti-netlink really i.e more pfkey derived * in netlink excl is a flag and you wouldn't need * a type XFRM_MSG_UPDPOLICY - JHS */ excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY; err = xfrm_policy_insert(p->dir, xp, excl); xfrm_audit_policy_add(xp, err ? 0 : 1, true); if (err) { xfrm_dev_policy_delete(xp); xfrm_dev_policy_free(xp); security_xfrm_policy_free(xp->security); kfree(xp); return err; } c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; c.portid = nlh->nlmsg_pid; km_policy_notify(xp, p->dir, &c); xfrm_pol_put(xp); return 0; } static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb) { struct xfrm_user_tmpl vec[XFRM_MAX_DEPTH]; int i; if (xp->xfrm_nr == 0) return 0; if (xp->xfrm_nr > XFRM_MAX_DEPTH) return -ENOBUFS; for (i = 0; i < xp->xfrm_nr; i++) { struct xfrm_user_tmpl *up = &vec[i]; struct xfrm_tmpl *kp = &xp->xfrm_vec[i]; memset(up, 0, sizeof(*up)); memcpy(&up->id, &kp->id, sizeof(up->id)); up->family = kp->encap_family; memcpy(&up->saddr, &kp->saddr, sizeof(up->saddr)); up->reqid = kp->reqid; up->mode = kp->mode; up->share = kp->share; up->optional = kp->optional; up->aalgos = kp->aalgos; up->ealgos = kp->ealgos; up->calgos = kp->calgos; } return nla_put(skb, XFRMA_TMPL, sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr, vec); } static inline int copy_to_user_state_sec_ctx(struct xfrm_state *x, struct sk_buff *skb) { if (x->security) { return copy_sec_ctx(x->security, skb); } return 0; } static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb) { if (xp->security) return copy_sec_ctx(xp->security, skb); return 0; } static inline unsigned int userpolicy_type_attrsize(void) { #ifdef CONFIG_XFRM_SUB_POLICY return nla_total_size(sizeof(struct xfrm_userpolicy_type)); #else return 0; #endif } #ifdef CONFIG_XFRM_SUB_POLICY static int copy_to_user_policy_type(u8 type, struct sk_buff *skb) { struct xfrm_userpolicy_type upt; /* Sadly there are two holes in struct xfrm_userpolicy_type */ memset(&upt, 0, sizeof(upt)); upt.type = type; return nla_put(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); } #else static inline int copy_to_user_policy_type(u8 type, struct sk_buff *skb) { return 0; } #endif static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr) { struct xfrm_dump_info *sp = ptr; struct xfrm_userpolicy_info *p; struct sk_buff *in_skb = sp->in_skb; struct sk_buff *skb = sp->out_skb; struct xfrm_translator *xtr; struct nlmsghdr *nlh; int err; nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, sp->nlmsg_seq, XFRM_MSG_NEWPOLICY, sizeof(*p), sp->nlmsg_flags); if (nlh == NULL) return -EMSGSIZE; p = nlmsg_data(nlh); copy_to_user_policy(xp, p, dir); err = copy_to_user_tmpl(xp, skb); if (!err) err = copy_to_user_sec_ctx(xp, skb); if (!err) err = copy_to_user_policy_type(xp->type, skb); if (!err) err = xfrm_mark_put(skb, &xp->mark); if (!err) err = xfrm_if_id_put(skb, xp->if_id); if (!err && xp->xdo.dev) err = copy_user_offload(&xp->xdo, skb); if (err) { nlmsg_cancel(skb, nlh); return err; } nlmsg_end(skb, nlh); xtr = xfrm_get_translator(); if (xtr) { err = xtr->alloc_compat(skb, nlh); xfrm_put_translator(xtr); if (err) { nlmsg_cancel(skb, nlh); return err; } } return 0; } static int xfrm_dump_policy_done(struct netlink_callback *cb) { struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args; struct net *net = sock_net(cb->skb->sk); xfrm_policy_walk_done(walk, net); return 0; } static int xfrm_dump_policy_start(struct netlink_callback *cb) { struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args; BUILD_BUG_ON(sizeof(*walk) > sizeof(cb->args)); xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY); return 0; } static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args; struct xfrm_dump_info info; info.in_skb = cb->skb; info.out_skb = skb; info.nlmsg_seq = cb->nlh->nlmsg_seq; info.nlmsg_flags = NLM_F_MULTI; (void) xfrm_policy_walk(net, walk, dump_one_policy, &info); return skb->len; } static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb, struct xfrm_policy *xp, int dir, u32 seq) { struct xfrm_dump_info info; struct sk_buff *skb; int err; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); info.in_skb = in_skb; info.out_skb = skb; info.nlmsg_seq = seq; info.nlmsg_flags = 0; err = dump_one_policy(xp, dir, 0, &info); if (err) { kfree_skb(skb); return ERR_PTR(err); } return skb; } static int xfrm_notify_userpolicy(struct net *net) { struct xfrm_userpolicy_default *up; int len = NLMSG_ALIGN(sizeof(*up)); struct nlmsghdr *nlh; struct sk_buff *skb; int err; skb = nlmsg_new(len, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_GETDEFAULT, sizeof(*up), 0); if (nlh == NULL) { kfree_skb(skb); return -EMSGSIZE; } up = nlmsg_data(nlh); up->in = net->xfrm.policy_default[XFRM_POLICY_IN]; up->fwd = net->xfrm.policy_default[XFRM_POLICY_FWD]; up->out = net->xfrm.policy_default[XFRM_POLICY_OUT]; nlmsg_end(skb, nlh); rcu_read_lock(); err = xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY); rcu_read_unlock(); return err; } static bool xfrm_userpolicy_is_valid(__u8 policy) { return policy == XFRM_USERPOLICY_BLOCK || policy == XFRM_USERPOLICY_ACCEPT; } static int xfrm_set_default(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrm_userpolicy_default *up = nlmsg_data(nlh); if (xfrm_userpolicy_is_valid(up->in)) net->xfrm.policy_default[XFRM_POLICY_IN] = up->in; if (xfrm_userpolicy_is_valid(up->fwd)) net->xfrm.policy_default[XFRM_POLICY_FWD] = up->fwd; if (xfrm_userpolicy_is_valid(up->out)) net->xfrm.policy_default[XFRM_POLICY_OUT] = up->out; rt_genid_bump_all(net); xfrm_notify_userpolicy(net); return 0; } static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs, struct netlink_ext_ack *extack) { struct sk_buff *r_skb; struct nlmsghdr *r_nlh; struct net *net = sock_net(skb->sk); struct xfrm_userpolicy_default *r_up; int len = NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_default)); u32 portid = NETLINK_CB(skb).portid; u32 seq = nlh->nlmsg_seq; r_skb = nlmsg_new(len, GFP_ATOMIC); if (!r_skb) return -ENOMEM; r_nlh = nlmsg_put(r_skb, portid, seq, XFRM_MSG_GETDEFAULT, sizeof(*r_up), 0); if (!r_nlh) { kfree_skb(r_skb); return -EMSGSIZE; } r_up = nlmsg_data(r_nlh); r_up->in = net->xfrm.policy_default[XFRM_POLICY_IN]; r_up->fwd = net->xfrm.policy_default[XFRM_POLICY_FWD]; r_up->out = net->xfrm.policy_default[XFRM_POLICY_OUT]; nlmsg_end(r_skb, r_nlh); return nlmsg_unicast(net->xfrm.nlsk, r_skb, portid); } static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrm_policy *xp; struct xfrm_userpolicy_id *p; u8 type = XFRM_POLICY_TYPE_MAIN; int err; struct km_event c; int delete; struct xfrm_mark m; u32 if_id = 0; p = nlmsg_data(nlh); delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY; err = copy_from_user_policy_type(&type, attrs, extack); if (err) return err; err = verify_policy_dir(p->dir, extack); if (err) return err; if (attrs[XFRMA_IF_ID]) if_id = nla_get_u32(attrs[XFRMA_IF_ID]); xfrm_mark_get(attrs, &m); if (p->index) xp = xfrm_policy_byid(net, &m, if_id, type, p->dir, p->index, delete, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_sec_ctx *ctx; err = verify_sec_ctx_len(attrs, extack); if (err) return err; ctx = NULL; if (rt) { struct xfrm_user_sec_ctx *uctx = nla_data(rt); err = security_xfrm_policy_alloc(&ctx, uctx, GFP_KERNEL); if (err) return err; } xp = xfrm_policy_bysel_ctx(net, &m, if_id, type, p->dir, &p->sel, ctx, delete, &err); security_xfrm_policy_free(ctx); } if (xp == NULL) return -ENOENT; if (!delete) { struct sk_buff *resp_skb; resp_skb = xfrm_policy_netlink(skb, xp, p->dir, nlh->nlmsg_seq); if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); } } else { xfrm_audit_policy_delete(xp, err ? 0 : 1, true); if (err != 0) goto out; c.data.byid = p->index; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; c.portid = nlh->nlmsg_pid; km_policy_notify(xp, p->dir, &c); } out: xfrm_pol_put(xp); return err; } static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct km_event c; struct xfrm_usersa_flush *p = nlmsg_data(nlh); int err; err = xfrm_state_flush(net, p->proto, true); if (err) { if (err == -ESRCH) /* empty table */ return 0; return err; } c.data.proto = p->proto; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; c.portid = nlh->nlmsg_pid; c.net = net; km_state_notify(NULL, &c); return 0; } static inline unsigned int xfrm_aevent_msgsize(struct xfrm_state *x) { unsigned int replay_size = x->replay_esn ? xfrm_replay_state_esn_len(x->replay_esn) : sizeof(struct xfrm_replay_state); return NLMSG_ALIGN(sizeof(struct xfrm_aevent_id)) + nla_total_size(replay_size) + nla_total_size_64bit(sizeof(struct xfrm_lifetime_cur)) + nla_total_size(sizeof(struct xfrm_mark)) + nla_total_size(4) /* XFRM_AE_RTHR */ + nla_total_size(4) /* XFRM_AE_ETHR */ + nla_total_size(sizeof(x->dir)) /* XFRMA_SA_DIR */ + nla_total_size(4); /* XFRMA_SA_PCPU */ } static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c) { struct xfrm_aevent_id *id; struct nlmsghdr *nlh; int err; nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0); if (nlh == NULL) return -EMSGSIZE; id = nlmsg_data(nlh); memset(&id->sa_id, 0, sizeof(id->sa_id)); memcpy(&id->sa_id.daddr, &x->id.daddr, sizeof(x->id.daddr)); id->sa_id.spi = x->id.spi; id->sa_id.family = x->props.family; id->sa_id.proto = x->id.proto; memcpy(&id->saddr, &x->props.saddr, sizeof(x->props.saddr)); id->reqid = x->props.reqid; id->flags = c->data.aevent; if (x->replay_esn) { err = nla_put(skb, XFRMA_REPLAY_ESN_VAL, xfrm_replay_state_esn_len(x->replay_esn), x->replay_esn); } else { err = nla_put(skb, XFRMA_REPLAY_VAL, sizeof(x->replay), &x->replay); } if (err) goto out_cancel; err = nla_put_64bit(skb, XFRMA_LTIME_VAL, sizeof(x->curlft), &x->curlft, XFRMA_PAD); if (err) goto out_cancel; if (id->flags & XFRM_AE_RTHR) { err = nla_put_u32(skb, XFRMA_REPLAY_THRESH, x->replay_maxdiff); if (err) goto out_cancel; } if (id->flags & XFRM_AE_ETHR) { err = nla_put_u32(skb, XFRMA_ETIMER_THRESH, x->replay_maxage * 10 / HZ); if (err) goto out_cancel; } err = xfrm_mark_put(skb, &x->mark); if (err) goto out_cancel; err = xfrm_if_id_put(skb, x->if_id); if (err) goto out_cancel; if (x->pcpu_num != UINT_MAX) { err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num); if (err) goto out_cancel; } if (x->dir) { err = nla_put_u8(skb, XFRMA_SA_DIR, x->dir); if (err) goto out_cancel; } nlmsg_end(skb, nlh); return 0; out_cancel: nlmsg_cancel(skb, nlh); return err; } static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrm_state *x; struct sk_buff *r_skb; int err; struct km_event c; u32 mark; struct xfrm_mark m; struct xfrm_aevent_id *p = nlmsg_data(nlh); struct xfrm_usersa_id *id = &p->sa_id; mark = xfrm_mark_get(attrs, &m); x = xfrm_state_lookup(net, mark, &id->daddr, id->spi, id->proto, id->family); if (x == NULL) return -ESRCH; r_skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC); if (r_skb == NULL) { xfrm_state_put(x); return -ENOMEM; } /* * XXX: is this lock really needed - none of the other * gets lock (the concern is things getting updated * while we are still reading) - jhs */ spin_lock_bh(&x->lock); c.data.aevent = p->flags; c.seq = nlh->nlmsg_seq; c.portid = nlh->nlmsg_pid; err = build_aevent(r_skb, x, &c); BUG_ON(err < 0); err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid); spin_unlock_bh(&x->lock); xfrm_state_put(x); return err; } static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrm_state *x; struct km_event c; int err = -EINVAL; u32 mark = 0; struct xfrm_mark m; struct xfrm_aevent_id *p = nlmsg_data(nlh); struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; struct nlattr *re = attrs[XFRMA_REPLAY_ESN_VAL]; struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; struct nlattr *et = attrs[XFRMA_ETIMER_THRESH]; struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; if (!lt && !rp && !re && !et && !rt) { NL_SET_ERR_MSG(extack, "Missing required attribute for AE"); return err; } /* pedantic mode - thou shalt sayeth replaceth */ if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) { NL_SET_ERR_MSG(extack, "NLM_F_REPLACE flag is required"); return err; } mark = xfrm_mark_get(attrs, &m); x = xfrm_state_lookup(net, mark, &p->sa_id.daddr, p->sa_id.spi, p->sa_id.proto, p->sa_id.family); if (x == NULL) return -ESRCH; if (x->km.state != XFRM_STATE_VALID) { NL_SET_ERR_MSG(extack, "SA must be in VALID state"); goto out; } err = xfrm_replay_verify_len(x->replay_esn, re, extack); if (err) goto out; spin_lock_bh(&x->lock); xfrm_update_ae_params(x, attrs, 1); spin_unlock_bh(&x->lock); c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; c.portid = nlh->nlmsg_pid; c.data.aevent = XFRM_AE_CU; km_state_notify(x, &c); err = 0; out: xfrm_state_put(x); return err; } static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct km_event c; u8 type = XFRM_POLICY_TYPE_MAIN; int err; err = copy_from_user_policy_type(&type, attrs, extack); if (err) return err; err = xfrm_policy_flush(net, type, true); if (err) { if (err == -ESRCH) /* empty table */ return 0; return err; } c.data.type = type; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; c.portid = nlh->nlmsg_pid; c.net = net; km_policy_notify(NULL, 0, &c); return 0; } static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrm_policy *xp; struct xfrm_user_polexpire *up = nlmsg_data(nlh); struct xfrm_userpolicy_info *p = &up->pol; u8 type = XFRM_POLICY_TYPE_MAIN; int err = -ENOENT; struct xfrm_mark m; u32 if_id = 0; err = copy_from_user_policy_type(&type, attrs, extack); if (err) return err; err = verify_policy_dir(p->dir, extack); if (err) return err; if (attrs[XFRMA_IF_ID]) if_id = nla_get_u32(attrs[XFRMA_IF_ID]); xfrm_mark_get(attrs, &m); if (p->index) xp = xfrm_policy_byid(net, &m, if_id, type, p->dir, p->index, 0, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_sec_ctx *ctx; err = verify_sec_ctx_len(attrs, extack); if (err) return err; ctx = NULL; if (rt) { struct xfrm_user_sec_ctx *uctx = nla_data(rt); err = security_xfrm_policy_alloc(&ctx, uctx, GFP_KERNEL); if (err) return err; } xp = xfrm_policy_bysel_ctx(net, &m, if_id, type, p->dir, &p->sel, ctx, 0, &err); security_xfrm_policy_free(ctx); } if (xp == NULL) return -ENOENT; if (unlikely(xp->walk.dead)) goto out; err = 0; if (up->hard) { xfrm_policy_delete(xp, p->dir); xfrm_audit_policy_delete(xp, 1, true); } km_policy_expired(xp, p->dir, up->hard, nlh->nlmsg_pid); out: xfrm_pol_put(xp); return err; } static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrm_state *x; int err; struct xfrm_user_expire *ue = nlmsg_data(nlh); struct xfrm_usersa_info *p = &ue->state; struct xfrm_mark m; u32 mark = xfrm_mark_get(attrs, &m); x = xfrm_state_lookup(net, mark, &p->id.daddr, p->id.spi, p->id.proto, p->family); err = -ENOENT; if (x == NULL) return err; spin_lock_bh(&x->lock); err = -EINVAL; if (x->km.state != XFRM_STATE_VALID) { NL_SET_ERR_MSG(extack, "SA must be in VALID state"); goto out; } km_state_expired(x, ue->hard, nlh->nlmsg_pid); if (ue->hard) { __xfrm_state_delete(x); xfrm_audit_state_delete(x, 1, true); } err = 0; out: spin_unlock_bh(&x->lock); xfrm_state_put(x); return err; } static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrm_policy *xp; struct xfrm_user_tmpl *ut; int i; struct nlattr *rt = attrs[XFRMA_TMPL]; struct xfrm_mark mark; struct xfrm_user_acquire *ua = nlmsg_data(nlh); struct xfrm_state *x = xfrm_state_alloc(net); int err = -ENOMEM; if (!x) goto nomem; xfrm_mark_get(attrs, &mark); if (attrs[XFRMA_SA_PCPU]) { x->pcpu_num = nla_get_u32(attrs[XFRMA_SA_PCPU]); err = -EINVAL; if (x->pcpu_num >= num_possible_cpus()) goto free_state; } err = verify_newpolicy_info(&ua->policy, extack); if (err) goto free_state; err = verify_sec_ctx_len(attrs, extack); if (err) goto free_state; /* build an XP */ xp = xfrm_policy_construct(net, &ua->policy, attrs, &err, extack); if (!xp) goto free_state; memcpy(&x->id, &ua->id, sizeof(ua->id)); memcpy(&x->props.saddr, &ua->saddr, sizeof(ua->saddr)); memcpy(&x->sel, &ua->sel, sizeof(ua->sel)); xp->mark.m = x->mark.m = mark.m; xp->mark.v = x->mark.v = mark.v; ut = nla_data(rt); /* extract the templates and for each call km_key */ for (i = 0; i < xp->xfrm_nr; i++, ut++) { struct xfrm_tmpl *t = &xp->xfrm_vec[i]; memcpy(&x->id, &t->id, sizeof(x->id)); x->props.mode = t->mode; x->props.reqid = t->reqid; x->props.family = ut->family; t->aalgos = ua->aalgos; t->ealgos = ua->ealgos; t->calgos = ua->calgos; err = km_query(x, t, xp); } xfrm_state_free(x); xfrm_dev_policy_delete(xp); xfrm_dev_policy_free(xp); security_xfrm_policy_free(xp->security); kfree(xp); return 0; free_state: xfrm_state_free(x); nomem: return err; } #ifdef CONFIG_XFRM_MIGRATE static int copy_from_user_migrate(struct xfrm_migrate *ma, struct xfrm_kmaddress *k, struct nlattr **attrs, int *num, struct netlink_ext_ack *extack) { struct nlattr *rt = attrs[XFRMA_MIGRATE]; struct xfrm_user_migrate *um; int i, num_migrate; if (k != NULL) { struct xfrm_user_kmaddress *uk; uk = nla_data(attrs[XFRMA_KMADDRESS]); memcpy(&k->local, &uk->local, sizeof(k->local)); memcpy(&k->remote, &uk->remote, sizeof(k->remote)); k->family = uk->family; k->reserved = uk->reserved; } um = nla_data(rt); num_migrate = nla_len(rt) / sizeof(*um); if (num_migrate <= 0 || num_migrate > XFRM_MAX_DEPTH) { NL_SET_ERR_MSG(extack, "Invalid number of SAs to migrate, must be 0 < num <= XFRM_MAX_DEPTH (6)"); return -EINVAL; } for (i = 0; i < num_migrate; i++, um++, ma++) { memcpy(&ma->old_daddr, &um->old_daddr, sizeof(ma->old_daddr)); memcpy(&ma->old_saddr, &um->old_saddr, sizeof(ma->old_saddr)); memcpy(&ma->new_daddr, &um->new_daddr, sizeof(ma->new_daddr)); memcpy(&ma->new_saddr, &um->new_saddr, sizeof(ma->new_saddr)); ma->proto = um->proto; ma->mode = um->mode; ma->reqid = um->reqid; ma->old_family = um->old_family; ma->new_family = um->new_family; } *num = i; return 0; } static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs, struct netlink_ext_ack *extack) { struct xfrm_userpolicy_id *pi = nlmsg_data(nlh); struct xfrm_migrate m[XFRM_MAX_DEPTH]; struct xfrm_kmaddress km, *kmp; u8 type; int err; int n = 0; struct net *net = sock_net(skb->sk); struct xfrm_encap_tmpl *encap = NULL; struct xfrm_user_offload *xuo = NULL; u32 if_id = 0; if (!attrs[XFRMA_MIGRATE]) { NL_SET_ERR_MSG(extack, "Missing required MIGRATE attribute"); return -EINVAL; } kmp = attrs[XFRMA_KMADDRESS] ? &km : NULL; err = copy_from_user_policy_type(&type, attrs, extack); if (err) return err; err = copy_from_user_migrate(m, kmp, attrs, &n, extack); if (err) return err; if (!n) return 0; if (attrs[XFRMA_ENCAP]) { encap = kmemdup(nla_data(attrs[XFRMA_ENCAP]), sizeof(*encap), GFP_KERNEL); if (!encap) return -ENOMEM; } if (attrs[XFRMA_IF_ID]) if_id = nla_get_u32(attrs[XFRMA_IF_ID]); if (attrs[XFRMA_OFFLOAD_DEV]) { xuo = kmemdup(nla_data(attrs[XFRMA_OFFLOAD_DEV]), sizeof(*xuo), GFP_KERNEL); if (!xuo) { err = -ENOMEM; goto error; } } err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap, if_id, extack, xuo); error: kfree(encap); kfree(xuo); return err; } #else static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr **attrs, struct netlink_ext_ack *extack) { return -ENOPROTOOPT; } #endif #ifdef CONFIG_XFRM_MIGRATE static int copy_to_user_migrate(const struct xfrm_migrate *m, struct sk_buff *skb) { struct xfrm_user_migrate um; memset(&um, 0, sizeof(um)); um.proto = m->proto; um.mode = m->mode; um.reqid = m->reqid; um.old_family = m->old_family; memcpy(&um.old_daddr, &m->old_daddr, sizeof(um.old_daddr)); memcpy(&um.old_saddr, &m->old_saddr, sizeof(um.old_saddr)); um.new_family = m->new_family; memcpy(&um.new_daddr, &m->new_daddr, sizeof(um.new_daddr)); memcpy(&um.new_saddr, &m->new_saddr, sizeof(um.new_saddr)); return nla_put(skb, XFRMA_MIGRATE, sizeof(um), &um); } static int copy_to_user_kmaddress(const struct xfrm_kmaddress *k, struct sk_buff *skb) { struct xfrm_user_kmaddress uk; memset(&uk, 0, sizeof(uk)); uk.family = k->family; uk.reserved = k->reserved; memcpy(&uk.local, &k->local, sizeof(uk.local)); memcpy(&uk.remote, &k->remote, sizeof(uk.remote)); return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk); } static inline unsigned int xfrm_migrate_msgsize(int num_migrate, int with_kma, int with_encp) { return NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id)) + (with_kma ? nla_total_size(sizeof(struct xfrm_kmaddress)) : 0) + (with_encp ? nla_total_size(sizeof(struct xfrm_encap_tmpl)) : 0) + nla_total_size(sizeof(struct xfrm_user_migrate) * num_migrate) + userpolicy_type_attrsize(); } static int build_migrate(struct sk_buff *skb, const struct xfrm_migrate *m, int num_migrate, const struct xfrm_kmaddress *k, const struct xfrm_selector *sel, const struct xfrm_encap_tmpl *encap, u8 dir, u8 type) { const struct xfrm_migrate *mp; struct xfrm_userpolicy_id *pol_id; struct nlmsghdr *nlh; int i, err; nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_MIGRATE, sizeof(*pol_id), 0); if (nlh == NULL) return -EMSGSIZE; pol_id = nlmsg_data(nlh); /* copy data from selector, dir, and type to the pol_id */ memset(pol_id, 0, sizeof(*pol_id)); memcpy(&pol_id->sel, sel, sizeof(pol_id->sel)); pol_id->dir = dir; if (k != NULL) { err = copy_to_user_kmaddress(k, skb); if (err) goto out_cancel; } if (encap) { err = nla_put(skb, XFRMA_ENCAP, sizeof(*encap), encap); if (err) goto out_cancel; } err = copy_to_user_policy_type(type, skb); if (err) goto out_cancel; for (i = 0, mp = m ; i < num_migrate; i++, mp++) { err = copy_to_user_migrate(mp, skb); if (err) goto out_cancel; } nlmsg_end(skb, nlh); return 0; out_cancel: nlmsg_cancel(skb, nlh); return err; } static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_migrate, const struct xfrm_kmaddress *k, const struct xfrm_encap_tmpl *encap) { struct net *net = &init_net; struct sk_buff *skb; int err; skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k, !!encap), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; /* build migrate */ err = build_migrate(skb, m, num_migrate, k, sel, encap, dir, type); BUG_ON(err < 0); return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MIGRATE); } #else static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_migrate, const struct xfrm_kmaddress *k, const struct xfrm_encap_tmpl *encap) { return -ENOPROTOOPT; } #endif #define XMSGSIZE(type) sizeof(struct type) const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info), [XFRM_MSG_DELSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id), [XFRM_MSG_GETSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id), [XFRM_MSG_NEWPOLICY - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_info), [XFRM_MSG_DELPOLICY - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id), [XFRM_MSG_GETPOLICY - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id), [XFRM_MSG_ALLOCSPI - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userspi_info), [XFRM_MSG_ACQUIRE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_acquire), [XFRM_MSG_EXPIRE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_expire), [XFRM_MSG_UPDPOLICY - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_info), [XFRM_MSG_UPDSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info), [XFRM_MSG_POLEXPIRE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_polexpire), [XFRM_MSG_FLUSHSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_flush), [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = 0, [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), [XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), [XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report), [XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_id), [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = sizeof(u32), [XFRM_MSG_NEWSPDINFO - XFRM_MSG_BASE] = sizeof(u32), [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = sizeof(u32), [XFRM_MSG_SETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default), [XFRM_MSG_GETDEFAULT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_default), }; EXPORT_SYMBOL_GPL(xfrm_msg_min); #undef XMSGSIZE const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_UNSPEC] = { .strict_start_type = XFRMA_SA_DIR }, [XFRMA_SA] = { .len = sizeof(struct xfrm_usersa_info)}, [XFRMA_POLICY] = { .len = sizeof(struct xfrm_userpolicy_info)}, [XFRMA_LASTUSED] = { .type = NLA_U64}, [XFRMA_ALG_AUTH_TRUNC] = { .len = sizeof(struct xfrm_algo_auth)}, [XFRMA_ALG_AEAD] = { .len = sizeof(struct xfrm_algo_aead) }, [XFRMA_ALG_AUTH] = { .len = sizeof(struct xfrm_algo) }, [XFRMA_ALG_CRYPT] = { .len = sizeof(struct xfrm_algo) }, [XFRMA_ALG_COMP] = { .len = sizeof(struct xfrm_algo) }, [XFRMA_ENCAP] = { .len = sizeof(struct xfrm_encap_tmpl) }, [XFRMA_TMPL] = { .len = sizeof(struct xfrm_user_tmpl) }, [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_user_sec_ctx) }, [XFRMA_LTIME_VAL] = { .len = sizeof(struct xfrm_lifetime_cur) }, [XFRMA_REPLAY_VAL] = { .len = sizeof(struct xfrm_replay_state) }, [XFRMA_REPLAY_THRESH] = { .type = NLA_U32 }, [XFRMA_ETIMER_THRESH] = { .type = NLA_U32 }, [XFRMA_SRCADDR] = { .len = sizeof(xfrm_address_t) }, [XFRMA_COADDR] = { .len = sizeof(xfrm_address_t) }, [XFRMA_POLICY_TYPE] = { .len = sizeof(struct xfrm_userpolicy_type)}, [XFRMA_MIGRATE] = { .len = sizeof(struct xfrm_user_migrate) }, [XFRMA_KMADDRESS] = { .len = sizeof(struct xfrm_user_kmaddress) }, [XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) }, [XFRMA_TFCPAD] = { .type = NLA_U32 }, [XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) }, [XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 }, [XFRMA_PROTO] = { .type = NLA_U8 }, [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) }, [XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) }, [XFRMA_SET_MARK] = { .type = NLA_U32 }, [XFRMA_SET_MARK_MASK] = { .type = NLA_U32 }, [XFRMA_IF_ID] = { .type = NLA_U32 }, [XFRMA_MTIMER_THRESH] = { .type = NLA_U32 }, [XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT), [XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 }, [XFRMA_SA_PCPU] = { .type = NLA_U32 }, [XFRMA_IPTFS_DROP_TIME] = { .type = NLA_U32 }, [XFRMA_IPTFS_REORDER_WINDOW] = { .type = NLA_U16 }, [XFRMA_IPTFS_DONT_FRAG] = { .type = NLA_FLAG }, [XFRMA_IPTFS_INIT_DELAY] = { .type = NLA_U32 }, [XFRMA_IPTFS_MAX_QSIZE] = { .type = NLA_U32 }, [XFRMA_IPTFS_PKT_SIZE] = { .type = NLA_U32 }, }; EXPORT_SYMBOL_GPL(xfrma_policy); static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = { [XFRMA_SPD_IPV4_HTHRESH] = { .len = sizeof(struct xfrmu_spdhthresh) }, [XFRMA_SPD_IPV6_HTHRESH] = { .len = sizeof(struct xfrmu_spdhthresh) }, }; static const struct xfrm_link { int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **, struct netlink_ext_ack *); int (*start)(struct netlink_callback *); int (*dump)(struct sk_buff *, struct netlink_callback *); int (*done)(struct netlink_callback *); const struct nla_policy *nla_pol; int nla_max; } xfrm_dispatch[XFRM_NR_MSGTYPES] = { [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = { .doit = xfrm_add_sa }, [XFRM_MSG_DELSA - XFRM_MSG_BASE] = { .doit = xfrm_del_sa }, [XFRM_MSG_GETSA - XFRM_MSG_BASE] = { .doit = xfrm_get_sa, .dump = xfrm_dump_sa, .done = xfrm_dump_sa_done }, [XFRM_MSG_NEWPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_add_policy }, [XFRM_MSG_DELPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy }, [XFRM_MSG_GETPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy, .start = xfrm_dump_policy_start, .dump = xfrm_dump_policy, .done = xfrm_dump_policy_done }, [XFRM_MSG_ALLOCSPI - XFRM_MSG_BASE] = { .doit = xfrm_alloc_userspi }, [XFRM_MSG_ACQUIRE - XFRM_MSG_BASE] = { .doit = xfrm_add_acquire }, [XFRM_MSG_EXPIRE - XFRM_MSG_BASE] = { .doit = xfrm_add_sa_expire }, [XFRM_MSG_UPDPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_add_policy }, [XFRM_MSG_UPDSA - XFRM_MSG_BASE] = { .doit = xfrm_add_sa }, [XFRM_MSG_POLEXPIRE - XFRM_MSG_BASE] = { .doit = xfrm_add_pol_expire}, [XFRM_MSG_FLUSHSA - XFRM_MSG_BASE] = { .doit = xfrm_flush_sa }, [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_flush_policy }, [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = { .doit = xfrm_new_ae }, [XFRM_MSG_GETAE - XFRM_MSG_BASE] = { .doit = xfrm_get_ae }, [XFRM_MSG_MIGRATE - XFRM_MSG_BASE] = { .doit = xfrm_do_migrate }, [XFRM_MSG_GETSADINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_sadinfo }, [XFRM_MSG_NEWSPDINFO - XFRM_MSG_BASE] = { .doit = xfrm_set_spdinfo, .nla_pol = xfrma_spd_policy, .nla_max = XFRMA_SPD_MAX }, [XFRM_MSG_GETSPDINFO - XFRM_MSG_BASE] = { .doit = xfrm_get_spdinfo }, [XFRM_MSG_SETDEFAULT - XFRM_MSG_BASE] = { .doit = xfrm_set_default }, [XFRM_MSG_GETDEFAULT - XFRM_MSG_BASE] = { .doit = xfrm_get_default }, }; static int xfrm_reject_unused_attr(int type, struct nlattr **attrs, struct netlink_ext_ack *extack) { if (attrs[XFRMA_SA_DIR]) { switch (type) { case XFRM_MSG_NEWSA: case XFRM_MSG_UPDSA: case XFRM_MSG_ALLOCSPI: break; default: NL_SET_ERR_MSG(extack, "Invalid attribute SA_DIR"); return -EINVAL; } } if (attrs[XFRMA_SA_PCPU]) { switch (type) { case XFRM_MSG_NEWSA: case XFRM_MSG_UPDSA: case XFRM_MSG_ALLOCSPI: case XFRM_MSG_ACQUIRE: break; default: NL_SET_ERR_MSG(extack, "Invalid attribute SA_PCPU"); return -EINVAL; } } return 0; } static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *attrs[XFRMA_MAX+1]; const struct xfrm_link *link; struct nlmsghdr *nlh64 = NULL; int type, err; type = nlh->nlmsg_type; if (type > XFRM_MSG_MAX) return -EINVAL; type -= XFRM_MSG_BASE; link = &xfrm_dispatch[type]; /* All operations require privileges, even GET */ if (!netlink_net_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (in_compat_syscall()) { struct xfrm_translator *xtr = xfrm_get_translator(); if (!xtr) return -EOPNOTSUPP; nlh64 = xtr->rcv_msg_compat(nlh, link->nla_max, link->nla_pol, extack); xfrm_put_translator(xtr); if (IS_ERR(nlh64)) return PTR_ERR(nlh64); if (nlh64) nlh = nlh64; } if ((type == (XFRM_MSG_GETSA - XFRM_MSG_BASE) || type == (XFRM_MSG_GETPOLICY - XFRM_MSG_BASE)) && (nlh->nlmsg_flags & NLM_F_DUMP)) { struct netlink_dump_control c = { .start = link->start, .dump = link->dump, .done = link->done, }; if (link->dump == NULL) { err = -EINVAL; goto err; } err = netlink_dump_start(net->xfrm.nlsk, skb, nlh, &c); goto err; } err = nlmsg_parse_deprecated(nlh, xfrm_msg_min[type], attrs, link->nla_max ? : XFRMA_MAX, link->nla_pol ? : xfrma_policy, extack); if (err < 0) goto err; if (!link->nla_pol || link->nla_pol == xfrma_policy) { err = xfrm_reject_unused_attr((type + XFRM_MSG_BASE), attrs, extack); if (err < 0) goto err; } if (link->doit == NULL) { err = -EINVAL; goto err; } err = link->doit(skb, nlh, attrs, extack); /* We need to free skb allocated in xfrm_alloc_compat() before * returning from this function, because consume_skb() won't take * care of frag_list since netlink destructor sets * sbk->head to NULL. (see netlink_skb_destructor()) */ if (skb_has_frag_list(skb)) { kfree_skb(skb_shinfo(skb)->frag_list); skb_shinfo(skb)->frag_list = NULL; } err: kvfree(nlh64); return err; } static void xfrm_netlink_rcv(struct sk_buff *skb) { struct net *net = sock_net(skb->sk); mutex_lock(&net->xfrm.xfrm_cfg_mutex); netlink_rcv_skb(skb, &xfrm_user_rcv_msg); mutex_unlock(&net->xfrm.xfrm_cfg_mutex); } static inline unsigned int xfrm_expire_msgsize(void) { return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)) + nla_total_size(sizeof(struct xfrm_mark)) + nla_total_size(sizeof_field(struct xfrm_state, dir)) + nla_total_size(4); /* XFRMA_SA_PCPU */ } static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct km_event *c) { struct xfrm_user_expire *ue; struct nlmsghdr *nlh; int err; nlh = nlmsg_put(skb, c->portid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0); if (nlh == NULL) return -EMSGSIZE; ue = nlmsg_data(nlh); copy_to_user_state(x, &ue->state); ue->hard = (c->data.hard != 0) ? 1 : 0; /* clear the padding bytes */ memset_after(ue, 0, hard); err = xfrm_mark_put(skb, &x->mark); if (err) return err; err = xfrm_if_id_put(skb, x->if_id); if (err) return err; if (x->pcpu_num != UINT_MAX) { err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num); if (err) return err; } if (x->dir) { err = nla_put_u8(skb, XFRMA_SA_DIR, x->dir); if (err) return err; } nlmsg_end(skb, nlh); return 0; } static int xfrm_exp_state_notify(struct xfrm_state *x, const struct km_event *c) { struct net *net = xs_net(x); struct sk_buff *skb; skb = nlmsg_new(xfrm_expire_msgsize(), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; if (build_expire(skb, x, c) < 0) { kfree_skb(skb); return -EMSGSIZE; } return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE); } static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event *c) { struct net *net = xs_net(x); struct sk_buff *skb; int err; skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; err = build_aevent(skb, x, c); BUG_ON(err < 0); return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_AEVENTS); } static int xfrm_notify_sa_flush(const struct km_event *c) { struct net *net = c->net; struct xfrm_usersa_flush *p; struct nlmsghdr *nlh; struct sk_buff *skb; int len = NLMSG_ALIGN(sizeof(struct xfrm_usersa_flush)); skb = nlmsg_new(len, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_FLUSHSA, sizeof(*p), 0); if (nlh == NULL) { kfree_skb(skb); return -EMSGSIZE; } p = nlmsg_data(nlh); p->proto = c->data.proto; nlmsg_end(skb, nlh); return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA); } static inline unsigned int xfrm_sa_len(struct xfrm_state *x) { unsigned int l = 0; if (x->aead) l += nla_total_size(aead_len(x->aead)); if (x->aalg) { l += nla_total_size(sizeof(struct xfrm_algo) + (x->aalg->alg_key_len + 7) / 8); l += nla_total_size(xfrm_alg_auth_len(x->aalg)); } if (x->ealg) l += nla_total_size(xfrm_alg_len(x->ealg)); if (x->calg) l += nla_total_size(sizeof(*x->calg)); if (x->encap) l += nla_total_size(sizeof(*x->encap)); if (x->tfcpad) l += nla_total_size(sizeof(x->tfcpad)); if (x->replay_esn) l += nla_total_size(xfrm_replay_state_esn_len(x->replay_esn)); else l += nla_total_size(sizeof(struct xfrm_replay_state)); if (x->security) l += nla_total_size(sizeof(struct xfrm_user_sec_ctx) + x->security->ctx_len); if (x->coaddr) l += nla_total_size(sizeof(*x->coaddr)); if (x->props.extra_flags) l += nla_total_size(sizeof(x->props.extra_flags)); if (x->xso.dev) l += nla_total_size(sizeof(struct xfrm_user_offload)); if (x->props.smark.v | x->props.smark.m) { l += nla_total_size(sizeof(x->props.smark.v)); l += nla_total_size(sizeof(x->props.smark.m)); } if (x->if_id) l += nla_total_size(sizeof(x->if_id)); if (x->pcpu_num) l += nla_total_size(sizeof(x->pcpu_num)); /* Must count x->lastused as it may become non-zero behind our back. */ l += nla_total_size_64bit(sizeof(u64)); if (x->mapping_maxage) l += nla_total_size(sizeof(x->mapping_maxage)); if (x->dir) l += nla_total_size(sizeof(x->dir)); if (x->nat_keepalive_interval) l += nla_total_size(sizeof(x->nat_keepalive_interval)); if (x->mode_cbs && x->mode_cbs->sa_len) l += x->mode_cbs->sa_len(x); return l; } static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c) { struct net *net = xs_net(x); struct xfrm_usersa_info *p; struct xfrm_usersa_id *id; struct nlmsghdr *nlh; struct sk_buff *skb; unsigned int len = xfrm_sa_len(x); unsigned int headlen; int err; headlen = sizeof(*p); if (c->event == XFRM_MSG_DELSA) { len += nla_total_size(headlen); headlen = sizeof(*id); len += nla_total_size(sizeof(struct xfrm_mark)); } len += NLMSG_ALIGN(headlen); skb = nlmsg_new(len, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; nlh = nlmsg_put(skb, c->portid, c->seq, c->event, headlen, 0); err = -EMSGSIZE; if (nlh == NULL) goto out_free_skb; p = nlmsg_data(nlh); if (c->event == XFRM_MSG_DELSA) { struct nlattr *attr; id = nlmsg_data(nlh); memset(id, 0, sizeof(*id)); memcpy(&id->daddr, &x->id.daddr, sizeof(id->daddr)); id->spi = x->id.spi; id->family = x->props.family; id->proto = x->id.proto; attr = nla_reserve(skb, XFRMA_SA, sizeof(*p)); err = -EMSGSIZE; if (attr == NULL) goto out_free_skb; p = nla_data(attr); } err = copy_to_user_state_extra(x, p, skb); if (err) goto out_free_skb; nlmsg_end(skb, nlh); return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA); out_free_skb: kfree_skb(skb); return err; } static int xfrm_send_state_notify(struct xfrm_state *x, const struct km_event *c) { switch (c->event) { case XFRM_MSG_EXPIRE: return xfrm_exp_state_notify(x, c); case XFRM_MSG_NEWAE: return xfrm_aevent_state_notify(x, c); case XFRM_MSG_DELSA: case XFRM_MSG_UPDSA: case XFRM_MSG_NEWSA: return xfrm_notify_sa(x, c); case XFRM_MSG_FLUSHSA: return xfrm_notify_sa_flush(c); default: printk(KERN_NOTICE "xfrm_user: Unknown SA event %d\n", c->event); break; } return 0; } static inline unsigned int xfrm_acquire_msgsize(struct xfrm_state *x, struct xfrm_policy *xp) { return NLMSG_ALIGN(sizeof(struct xfrm_user_acquire)) + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) + nla_total_size(sizeof(struct xfrm_mark)) + nla_total_size(xfrm_user_sec_ctx_size(x->security)) + nla_total_size(4) /* XFRMA_SA_PCPU */ + userpolicy_type_attrsize(); } static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, struct xfrm_tmpl *xt, struct xfrm_policy *xp) { __u32 seq = xfrm_get_acqseq(); struct xfrm_user_acquire *ua; struct nlmsghdr *nlh; int err; nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_ACQUIRE, sizeof(*ua), 0); if (nlh == NULL) return -EMSGSIZE; ua = nlmsg_data(nlh); memcpy(&ua->id, &x->id, sizeof(ua->id)); memcpy(&ua->saddr, &x->props.saddr, sizeof(ua->saddr)); memcpy(&ua->sel, &x->sel, sizeof(ua->sel)); copy_to_user_policy(xp, &ua->policy, XFRM_POLICY_OUT); ua->aalgos = xt->aalgos; ua->ealgos = xt->ealgos; ua->calgos = xt->calgos; ua->seq = x->km.seq = seq; err = copy_to_user_tmpl(xp, skb); if (!err) err = copy_to_user_state_sec_ctx(x, skb); if (!err) err = copy_to_user_policy_type(xp->type, skb); if (!err) err = xfrm_mark_put(skb, &xp->mark); if (!err) err = xfrm_if_id_put(skb, xp->if_id); if (!err && xp->xdo.dev) err = copy_user_offload(&xp->xdo, skb); if (!err && x->pcpu_num != UINT_MAX) err = nla_put_u32(skb, XFRMA_SA_PCPU, x->pcpu_num); if (err) { nlmsg_cancel(skb, nlh); return err; } nlmsg_end(skb, nlh); return 0; } static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, struct xfrm_policy *xp) { struct net *net = xs_net(x); struct sk_buff *skb; int err; skb = nlmsg_new(xfrm_acquire_msgsize(x, xp), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; err = build_acquire(skb, x, xt, xp); BUG_ON(err < 0); return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_ACQUIRE); } /* User gives us xfrm_user_policy_info followed by an array of 0 * or more templates. */ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, u8 *data, int len, int *dir) { struct net *net = sock_net(sk); struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data; struct xfrm_user_tmpl *ut = (struct xfrm_user_tmpl *) (p + 1); struct xfrm_policy *xp; int nr; switch (sk->sk_family) { case AF_INET: if (opt != IP_XFRM_POLICY) { *dir = -EOPNOTSUPP; return NULL; } break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: if (opt != IPV6_XFRM_POLICY) { *dir = -EOPNOTSUPP; return NULL; } break; #endif default: *dir = -EINVAL; return NULL; } *dir = -EINVAL; if (len < sizeof(*p) || verify_newpolicy_info(p, NULL)) return NULL; nr = ((len - sizeof(*p)) / sizeof(*ut)); if (validate_tmpl(nr, ut, p->sel.family, p->dir, NULL)) return NULL; if (p->dir > XFRM_POLICY_OUT) return NULL; xp = xfrm_policy_alloc(net, GFP_ATOMIC); if (xp == NULL) { *dir = -ENOBUFS; return NULL; } copy_from_user_policy(xp, p); xp->type = XFRM_POLICY_TYPE_MAIN; copy_templates(xp, ut, nr); *dir = p->dir; return xp; } static inline unsigned int xfrm_polexpire_msgsize(struct xfrm_policy *xp) { return NLMSG_ALIGN(sizeof(struct xfrm_user_polexpire)) + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) + nla_total_size(xfrm_user_sec_ctx_size(xp->security)) + nla_total_size(sizeof(struct xfrm_mark)) + userpolicy_type_attrsize(); } static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, int dir, const struct km_event *c) { struct xfrm_user_polexpire *upe; int hard = c->data.hard; struct nlmsghdr *nlh; int err; nlh = nlmsg_put(skb, c->portid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0); if (nlh == NULL) return -EMSGSIZE; upe = nlmsg_data(nlh); copy_to_user_policy(xp, &upe->pol, dir); err = copy_to_user_tmpl(xp, skb); if (!err) err = copy_to_user_sec_ctx(xp, skb); if (!err) err = copy_to_user_policy_type(xp->type, skb); if (!err) err = xfrm_mark_put(skb, &xp->mark); if (!err) err = xfrm_if_id_put(skb, xp->if_id); if (!err && xp->xdo.dev) err = copy_user_offload(&xp->xdo, skb); if (err) { nlmsg_cancel(skb, nlh); return err; } upe->hard = !!hard; nlmsg_end(skb, nlh); return 0; } static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) { struct net *net = xp_net(xp); struct sk_buff *skb; int err; skb = nlmsg_new(xfrm_polexpire_msgsize(xp), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; err = build_polexpire(skb, xp, dir, c); BUG_ON(err < 0); return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE); } static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c) { unsigned int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); struct net *net = xp_net(xp); struct xfrm_userpolicy_info *p; struct xfrm_userpolicy_id *id; struct nlmsghdr *nlh; struct sk_buff *skb; unsigned int headlen; int err; headlen = sizeof(*p); if (c->event == XFRM_MSG_DELPOLICY) { len += nla_total_size(headlen); headlen = sizeof(*id); } len += userpolicy_type_attrsize(); len += nla_total_size(sizeof(struct xfrm_mark)); len += NLMSG_ALIGN(headlen); skb = nlmsg_new(len, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; nlh = nlmsg_put(skb, c->portid, c->seq, c->event, headlen, 0); err = -EMSGSIZE; if (nlh == NULL) goto out_free_skb; p = nlmsg_data(nlh); if (c->event == XFRM_MSG_DELPOLICY) { struct nlattr *attr; id = nlmsg_data(nlh); memset(id, 0, sizeof(*id)); id->dir = dir; if (c->data.byid) id->index = xp->index; else memcpy(&id->sel, &xp->selector, sizeof(id->sel)); attr = nla_reserve(skb, XFRMA_POLICY, sizeof(*p)); err = -EMSGSIZE; if (attr == NULL) goto out_free_skb; p = nla_data(attr); } copy_to_user_policy(xp, p, dir); err = copy_to_user_tmpl(xp, skb); if (!err) err = copy_to_user_policy_type(xp->type, skb); if (!err) err = xfrm_mark_put(skb, &xp->mark); if (!err) err = xfrm_if_id_put(skb, xp->if_id); if (!err && xp->xdo.dev) err = copy_user_offload(&xp->xdo, skb); if (err) goto out_free_skb; nlmsg_end(skb, nlh); return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY); out_free_skb: kfree_skb(skb); return err; } static int xfrm_notify_policy_flush(const struct km_event *c) { struct net *net = c->net; struct nlmsghdr *nlh; struct sk_buff *skb; int err; skb = nlmsg_new(userpolicy_type_attrsize(), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0); err = -EMSGSIZE; if (nlh == NULL) goto out_free_skb; err = copy_to_user_policy_type(c->data.type, skb); if (err) goto out_free_skb; nlmsg_end(skb, nlh); return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_POLICY); out_free_skb: kfree_skb(skb); return err; } static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) { switch (c->event) { case XFRM_MSG_NEWPOLICY: case XFRM_MSG_UPDPOLICY: case XFRM_MSG_DELPOLICY: return xfrm_notify_policy(xp, dir, c); case XFRM_MSG_FLUSHPOLICY: return xfrm_notify_policy_flush(c); case XFRM_MSG_POLEXPIRE: return xfrm_exp_policy_notify(xp, dir, c); default: printk(KERN_NOTICE "xfrm_user: Unknown Policy event %d\n", c->event); } return 0; } static inline unsigned int xfrm_report_msgsize(void) { return NLMSG_ALIGN(sizeof(struct xfrm_user_report)); } static int build_report(struct sk_buff *skb, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) { struct xfrm_user_report *ur; struct nlmsghdr *nlh; nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur), 0); if (nlh == NULL) return -EMSGSIZE; ur = nlmsg_data(nlh); ur->proto = proto; memcpy(&ur->sel, sel, sizeof(ur->sel)); if (addr) { int err = nla_put(skb, XFRMA_COADDR, sizeof(*addr), addr); if (err) { nlmsg_cancel(skb, nlh); return err; } } nlmsg_end(skb, nlh); return 0; } static int xfrm_send_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) { struct sk_buff *skb; int err; skb = nlmsg_new(xfrm_report_msgsize(), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; err = build_report(skb, proto, sel, addr); BUG_ON(err < 0); return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_REPORT); } static inline unsigned int xfrm_mapping_msgsize(void) { return NLMSG_ALIGN(sizeof(struct xfrm_user_mapping)); } static int build_mapping(struct sk_buff *skb, struct xfrm_state *x, xfrm_address_t *new_saddr, __be16 new_sport) { struct xfrm_user_mapping *um; struct nlmsghdr *nlh; nlh = nlmsg_put(skb, 0, 0, XFRM_MSG_MAPPING, sizeof(*um), 0); if (nlh == NULL) return -EMSGSIZE; um = nlmsg_data(nlh); memcpy(&um->id.daddr, &x->id.daddr, sizeof(um->id.daddr)); um->id.spi = x->id.spi; um->id.family = x->props.family; um->id.proto = x->id.proto; memcpy(&um->new_saddr, new_saddr, sizeof(um->new_saddr)); memcpy(&um->old_saddr, &x->props.saddr, sizeof(um->old_saddr)); um->new_sport = new_sport; um->old_sport = x->encap->encap_sport; um->reqid = x->props.reqid; nlmsg_end(skb, nlh); return 0; } static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) { struct net *net = xs_net(x); struct sk_buff *skb; int err; if (x->id.proto != IPPROTO_ESP) return -EINVAL; if (!x->encap) return -EINVAL; skb = nlmsg_new(xfrm_mapping_msgsize(), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; err = build_mapping(skb, x, ipaddr, sport); BUG_ON(err < 0); return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MAPPING); } static bool xfrm_is_alive(const struct km_event *c) { return (bool)xfrm_acquire_is_on(c->net); } static struct xfrm_mgr netlink_mgr = { .notify = xfrm_send_state_notify, .acquire = xfrm_send_acquire, .compile_policy = xfrm_compile_policy, .notify_policy = xfrm_send_policy_notify, .report = xfrm_send_report, .migrate = xfrm_send_migrate, .new_mapping = xfrm_send_mapping, .is_alive = xfrm_is_alive, }; static int __net_init xfrm_user_net_init(struct net *net) { struct sock *nlsk; struct netlink_kernel_cfg cfg = { .groups = XFRMNLGRP_MAX, .input = xfrm_netlink_rcv, }; nlsk = netlink_kernel_create(net, NETLINK_XFRM, &cfg); if (nlsk == NULL) return -ENOMEM; net->xfrm.nlsk_stash = nlsk; /* Don't set to NULL */ rcu_assign_pointer(net->xfrm.nlsk, nlsk); return 0; } static void __net_exit xfrm_user_net_pre_exit(struct net *net) { RCU_INIT_POINTER(net->xfrm.nlsk, NULL); } static void __net_exit xfrm_user_net_exit(struct list_head *net_exit_list) { struct net *net; list_for_each_entry(net, net_exit_list, exit_list) netlink_kernel_release(net->xfrm.nlsk_stash); } static struct pernet_operations xfrm_user_net_ops = { .init = xfrm_user_net_init, .pre_exit = xfrm_user_net_pre_exit, .exit_batch = xfrm_user_net_exit, }; static int __init xfrm_user_init(void) { int rv; printk(KERN_INFO "Initializing XFRM netlink socket\n"); rv = register_pernet_subsys(&xfrm_user_net_ops); if (rv < 0) return rv; xfrm_register_km(&netlink_mgr); return 0; } static void __exit xfrm_user_exit(void) { xfrm_unregister_km(&netlink_mgr); unregister_pernet_subsys(&xfrm_user_net_ops); } module_init(xfrm_user_init); module_exit(xfrm_user_exit); MODULE_DESCRIPTION("XFRM User interface"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_XFRM);
1 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 // SPDX-License-Identifier: GPL-2.0-or-later /* * Force feedback support for various HID compliant devices by ThrustMaster: * ThrustMaster FireStorm Dual Power 2 * and possibly others whose device ids haven't been added. * * Modified to support ThrustMaster devices by Zinx Verituse * on 2003-01-25 from the Logitech force feedback driver, * which is by Johann Deneux. * * Copyright (c) 2003 Zinx Verituse <zinx@epicsol.org> * Copyright (c) 2002 Johann Deneux */ /* */ #include <linux/hid.h> #include <linux/input.h> #include <linux/slab.h> #include <linux/module.h> #include "hid-ids.h" #define THRUSTMASTER_DEVICE_ID_2_IN_1_DT 0xb320 static const signed short ff_rumble[] = { FF_RUMBLE, -1 }; static const signed short ff_joystick[] = { FF_CONSTANT, -1 }; #ifdef CONFIG_THRUSTMASTER_FF /* Usages for thrustmaster devices I know about */ #define THRUSTMASTER_USAGE_FF (HID_UP_GENDESK | 0xbb) struct tmff_device { struct hid_report *report; struct hid_field *ff_field; }; /* Changes values from 0 to 0xffff into values from minimum to maximum */ static inline int tmff_scale_u16(unsigned int in, int minimum, int maximum) { int ret; ret = (in * (maximum - minimum) / 0xffff) + minimum; if (ret < minimum) return minimum; if (ret > maximum) return maximum; return ret; } /* Changes values from -0x80 to 0x7f into values from minimum to maximum */ static inline int tmff_scale_s8(int in, int minimum, int maximum) { int ret; ret = (((in + 0x80) * (maximum - minimum)) / 0xff) + minimum; if (ret < minimum) return minimum; if (ret > maximum) return maximum; return ret; } static int tmff_play(struct input_dev *dev, void *data, struct ff_effect *effect) { struct hid_device *hid = input_get_drvdata(dev); struct tmff_device *tmff = data; struct hid_field *ff_field = tmff->ff_field; int x, y; int left, right; /* Rumbling */ switch (effect->type) { case FF_CONSTANT: x = tmff_scale_s8(effect->u.ramp.start_level, ff_field->logical_minimum, ff_field->logical_maximum); y = tmff_scale_s8(effect->u.ramp.end_level, ff_field->logical_minimum, ff_field->logical_maximum); dbg_hid("(x, y)=(%04x, %04x)\n", x, y); ff_field->value[0] = x; ff_field->value[1] = y; hid_hw_request(hid, tmff->report, HID_REQ_SET_REPORT); break; case FF_RUMBLE: left = tmff_scale_u16(effect->u.rumble.weak_magnitude, ff_field->logical_minimum, ff_field->logical_maximum); right = tmff_scale_u16(effect->u.rumble.strong_magnitude, ff_field->logical_minimum, ff_field->logical_maximum); /* 2-in-1 strong motor is left */ if (hid->product == THRUSTMASTER_DEVICE_ID_2_IN_1_DT) swap(left, right); dbg_hid("(left,right)=(%08x, %08x)\n", left, right); ff_field->value[0] = left; ff_field->value[1] = right; hid_hw_request(hid, tmff->report, HID_REQ_SET_REPORT); break; } return 0; } static int tmff_init(struct hid_device *hid, const signed short *ff_bits) { struct tmff_device *tmff; struct hid_report *report; struct list_head *report_list; struct hid_input *hidinput; struct input_dev *input_dev; int error; int i; if (list_empty(&hid->inputs)) { hid_err(hid, "no inputs found\n"); return -ENODEV; } hidinput = list_entry(hid->inputs.next, struct hid_input, list); input_dev = hidinput->input; tmff = kzalloc(sizeof(struct tmff_device), GFP_KERNEL); if (!tmff) return -ENOMEM; /* Find the report to use */ report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; list_for_each_entry(report, report_list, list) { int fieldnum; for (fieldnum = 0; fieldnum < report->maxfield; ++fieldnum) { struct hid_field *field = report->field[fieldnum]; if (field->maxusage <= 0) continue; switch (field->usage[0].hid) { case THRUSTMASTER_USAGE_FF: if (field->report_count < 2) { hid_warn(hid, "ignoring FF field with report_count < 2\n"); continue; } if (field->logical_maximum == field->logical_minimum) { hid_warn(hid, "ignoring FF field with logical_maximum == logical_minimum\n"); continue; } if (tmff->report && tmff->report != report) { hid_warn(hid, "ignoring FF field in other report\n"); continue; } if (tmff->ff_field && tmff->ff_field != field) { hid_warn(hid, "ignoring duplicate FF field\n"); continue; } tmff->report = report; tmff->ff_field = field; for (i = 0; ff_bits[i] >= 0; i++) set_bit(ff_bits[i], input_dev->ffbit); break; default: hid_warn(hid, "ignoring unknown output usage %08x\n", field->usage[0].hid); continue; } } } if (!tmff->report) { hid_err(hid, "can't find FF field in output reports\n"); error = -ENODEV; goto fail; } error = input_ff_create_memless(input_dev, tmff, tmff_play); if (error) goto fail; hid_info(hid, "force feedback for ThrustMaster devices by Zinx Verituse <zinx@epicsol.org>\n"); return 0; fail: kfree(tmff); return error; } #else static inline int tmff_init(struct hid_device *hid, const signed short *ff_bits) { return 0; } #endif static int tm_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); goto err; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF); if (ret) { hid_err(hdev, "hw start failed\n"); goto err; } tmff_init(hdev, (void *)id->driver_data); return 0; err: return ret; } static const struct hid_device_id tm_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb300), .driver_data = (unsigned long)ff_rumble }, { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb304), /* FireStorm Dual Power 2 (and 3) */ .driver_data = (unsigned long)ff_rumble }, { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, THRUSTMASTER_DEVICE_ID_2_IN_1_DT), /* Dual Trigger 2-in-1 */ .driver_data = (unsigned long)ff_rumble }, { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb323), /* Dual Trigger 3-in-1 (PC Mode) */ .driver_data = (unsigned long)ff_rumble }, { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb324), /* Dual Trigger 3-in-1 (PS3 Mode) */ .driver_data = (unsigned long)ff_rumble }, { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb605), /* NASCAR PRO FF2 Wheel */ .driver_data = (unsigned long)ff_joystick }, { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb651), /* FGT Rumble Force Wheel */ .driver_data = (unsigned long)ff_rumble }, { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb653), /* RGT Force Feedback CLUTCH Raging Wheel */ .driver_data = (unsigned long)ff_joystick }, { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb654), /* FGT Force Feedback Wheel */ .driver_data = (unsigned long)ff_joystick }, { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb65a), /* F430 Force Feedback Wheel */ .driver_data = (unsigned long)ff_joystick }, { } }; MODULE_DEVICE_TABLE(hid, tm_devices); static struct hid_driver tm_driver = { .name = "thrustmaster", .id_table = tm_devices, .probe = tm_probe, }; module_hid_driver(tm_driver); MODULE_DESCRIPTION("Force feedback support for various HID compliant devices by ThrustMaster"); MODULE_LICENSE("GPL");
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 // SPDX-License-Identifier: GPL-2.0-or-later /* * Force feedback support for GreenAsia (Product ID 0x12) based devices * * The devices are distributed under various names and the same USB device ID * can be used in many game controllers. * * 0e8f:0012 "GreenAsia Inc. USB Joystick " * - tested with MANTA Warior MM816 and SpeedLink Strike2 SL-6635. * * Copyright (c) 2008 Lukasz Lubojanski <lukasz@lubojanski.info> */ /* */ #include <linux/input.h> #include <linux/slab.h> #include <linux/hid.h> #include <linux/module.h> #include "hid-ids.h" #ifdef CONFIG_GREENASIA_FF struct gaff_device { struct hid_report *report; }; static int hid_gaff_play(struct input_dev *dev, void *data, struct ff_effect *effect) { struct hid_device *hid = input_get_drvdata(dev); struct gaff_device *gaff = data; int left, right; left = effect->u.rumble.strong_magnitude; right = effect->u.rumble.weak_magnitude; dbg_hid("called with 0x%04x 0x%04x", left, right); left = left * 0xfe / 0xffff; right = right * 0xfe / 0xffff; gaff->report->field[0]->value[0] = 0x51; gaff->report->field[0]->value[1] = 0x0; gaff->report->field[0]->value[2] = right; gaff->report->field[0]->value[3] = 0; gaff->report->field[0]->value[4] = left; gaff->report->field[0]->value[5] = 0; dbg_hid("running with 0x%02x 0x%02x", left, right); hid_hw_request(hid, gaff->report, HID_REQ_SET_REPORT); gaff->report->field[0]->value[0] = 0xfa; gaff->report->field[0]->value[1] = 0xfe; gaff->report->field[0]->value[2] = 0x0; gaff->report->field[0]->value[4] = 0x0; hid_hw_request(hid, gaff->report, HID_REQ_SET_REPORT); return 0; } static int gaff_init(struct hid_device *hid) { struct gaff_device *gaff; struct hid_report *report; struct hid_input *hidinput; struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct list_head *report_ptr = report_list; struct input_dev *dev; int error; if (list_empty(&hid->inputs)) { hid_err(hid, "no inputs found\n"); return -ENODEV; } hidinput = list_entry(hid->inputs.next, struct hid_input, list); dev = hidinput->input; if (list_empty(report_list)) { hid_err(hid, "no output reports found\n"); return -ENODEV; } report_ptr = report_ptr->next; report = list_entry(report_ptr, struct hid_report, list); if (report->maxfield < 1) { hid_err(hid, "no fields in the report\n"); return -ENODEV; } if (report->field[0]->report_count < 6) { hid_err(hid, "not enough values in the field\n"); return -ENODEV; } gaff = kzalloc(sizeof(struct gaff_device), GFP_KERNEL); if (!gaff) return -ENOMEM; set_bit(FF_RUMBLE, dev->ffbit); error = input_ff_create_memless(dev, gaff, hid_gaff_play); if (error) { kfree(gaff); return error; } gaff->report = report; gaff->report->field[0]->value[0] = 0x51; gaff->report->field[0]->value[1] = 0x00; gaff->report->field[0]->value[2] = 0x00; gaff->report->field[0]->value[3] = 0x00; hid_hw_request(hid, gaff->report, HID_REQ_SET_REPORT); gaff->report->field[0]->value[0] = 0xfa; gaff->report->field[0]->value[1] = 0xfe; hid_hw_request(hid, gaff->report, HID_REQ_SET_REPORT); hid_info(hid, "Force Feedback for GreenAsia 0x12 devices by Lukasz Lubojanski <lukasz@lubojanski.info>\n"); return 0; } #else static inline int gaff_init(struct hid_device *hdev) { return 0; } #endif static int ga_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; dev_dbg(&hdev->dev, "Greenasia HID hardware probe..."); ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); goto err; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF); if (ret) { hid_err(hdev, "hw start failed\n"); goto err; } gaff_init(hdev); return 0; err: return ret; } static const struct hid_device_id ga_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_GREENASIA, 0x0012), }, { } }; MODULE_DEVICE_TABLE(hid, ga_devices); static struct hid_driver ga_driver = { .name = "greenasia", .id_table = ga_devices, .probe = ga_probe, }; module_hid_driver(ga_driver); MODULE_DESCRIPTION("Force feedback support for GreenAsia (Product ID 0x12) based devices"); MODULE_LICENSE("GPL");
7 7 7 7 7 7 6 7 7 7 7 9 9 9 2 7 7 7 7 7 9 9 2 2 1 8 7 1 7 7 7 7 7 8 5 3 7 1 7 1 1 7 7 7 7 9 9 8 1 9 9 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ #include "xfs_platform.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_bmap.h" #include "xfs_alloc.h" #include "xfs_fsops.h" #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_log.h" #include "xfs_log_priv.h" #include "xfs_dir2.h" #include "xfs_extfree_item.h" #include "xfs_mru_cache.h" #include "xfs_inode_item.h" #include "xfs_icache.h" #include "xfs_trace.h" #include "xfs_icreate_item.h" #include "xfs_filestream.h" #include "xfs_quota.h" #include "xfs_sysfs.h" #include "xfs_ondisk.h" #include "xfs_rmap_item.h" #include "xfs_refcount_item.h" #include "xfs_bmap_item.h" #include "xfs_reflink.h" #include "xfs_pwork.h" #include "xfs_ag.h" #include "xfs_defer.h" #include "xfs_attr_item.h" #include "xfs_xattr.h" #include "xfs_iunlink_item.h" #include "xfs_dahash_test.h" #include "xfs_rtbitmap.h" #include "xfs_exchmaps_item.h" #include "xfs_parent.h" #include "xfs_rtalloc.h" #include "xfs_zone_alloc.h" #include "scrub/stats.h" #include "scrub/rcbag_btree.h" #include <linux/magic.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> static const struct super_operations xfs_super_operations; static struct dentry *xfs_debugfs; /* top-level xfs debugfs dir */ static struct kset *xfs_kset; /* top-level xfs sysfs dir */ #ifdef DEBUG static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */ #endif enum xfs_dax_mode { XFS_DAX_INODE = 0, XFS_DAX_ALWAYS = 1, XFS_DAX_NEVER = 2, }; /* Were quota mount options provided? Must use the upper 16 bits of qflags. */ #define XFS_QFLAGS_MNTOPTS (1U << 31) static void xfs_mount_set_dax_mode( struct xfs_mount *mp, enum xfs_dax_mode mode) { switch (mode) { case XFS_DAX_INODE: mp->m_features &= ~(XFS_FEAT_DAX_ALWAYS | XFS_FEAT_DAX_NEVER); break; case XFS_DAX_ALWAYS: mp->m_features |= XFS_FEAT_DAX_ALWAYS; mp->m_features &= ~XFS_FEAT_DAX_NEVER; break; case XFS_DAX_NEVER: mp->m_features |= XFS_FEAT_DAX_NEVER; mp->m_features &= ~XFS_FEAT_DAX_ALWAYS; break; } } static const struct constant_table dax_param_enums[] = { {"inode", XFS_DAX_INODE }, {"always", XFS_DAX_ALWAYS }, {"never", XFS_DAX_NEVER }, {} }; /* * Table driven mount option parser. */ enum { Op_deprecated, Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid, Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups, Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_largeio, Opt_nolargeio, Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota, Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce, Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, Opt_max_open_zones, Opt_lifetime, Opt_nolifetime, Opt_max_atomic_write, }; #define fsparam_dead(NAME) \ __fsparam(NULL, (NAME), Op_deprecated, fs_param_deprecated, NULL) static const struct fs_parameter_spec xfs_fs_parameters[] = { /* * These mount options were supposed to be deprecated in September 2025 * but the deprecation warning was buggy, so not all users were * notified. The deprecation is now obnoxiously loud and postponed to * September 2030. */ fsparam_dead("attr2"), fsparam_dead("noattr2"), fsparam_dead("ikeep"), fsparam_dead("noikeep"), fsparam_u32("logbufs", Opt_logbufs), fsparam_string("logbsize", Opt_logbsize), fsparam_string("logdev", Opt_logdev), fsparam_string("rtdev", Opt_rtdev), fsparam_flag("wsync", Opt_wsync), fsparam_flag("noalign", Opt_noalign), fsparam_flag("swalloc", Opt_swalloc), fsparam_u32("sunit", Opt_sunit), fsparam_u32("swidth", Opt_swidth), fsparam_flag("nouuid", Opt_nouuid), fsparam_flag("grpid", Opt_grpid), fsparam_flag("nogrpid", Opt_nogrpid), fsparam_flag("bsdgroups", Opt_bsdgroups), fsparam_flag("sysvgroups", Opt_sysvgroups), fsparam_string("allocsize", Opt_allocsize), fsparam_flag("norecovery", Opt_norecovery), fsparam_flag("inode64", Opt_inode64), fsparam_flag("inode32", Opt_inode32), fsparam_flag("largeio", Opt_largeio), fsparam_flag("nolargeio", Opt_nolargeio), fsparam_flag("filestreams", Opt_filestreams), fsparam_flag("quota", Opt_quota), fsparam_flag("noquota", Opt_noquota), fsparam_flag("usrquota", Opt_usrquota), fsparam_flag("grpquota", Opt_grpquota), fsparam_flag("prjquota", Opt_prjquota), fsparam_flag("uquota", Opt_uquota), fsparam_flag("gquota", Opt_gquota), fsparam_flag("pquota", Opt_pquota), fsparam_flag("uqnoenforce", Opt_uqnoenforce), fsparam_flag("gqnoenforce", Opt_gqnoenforce), fsparam_flag("pqnoenforce", Opt_pqnoenforce), fsparam_flag("qnoenforce", Opt_qnoenforce), fsparam_flag("discard", Opt_discard), fsparam_flag("nodiscard", Opt_nodiscard), fsparam_flag("dax", Opt_dax), fsparam_enum("dax", Opt_dax_enum, dax_param_enums), fsparam_u32("max_open_zones", Opt_max_open_zones), fsparam_flag("lifetime", Opt_lifetime), fsparam_flag("nolifetime", Opt_nolifetime), fsparam_string("max_atomic_write", Opt_max_atomic_write), {} }; struct proc_xfs_info { uint64_t flag; char *str; }; static int xfs_fs_show_options( struct seq_file *m, struct dentry *root) { static struct proc_xfs_info xfs_info_set[] = { /* the few simple ones we can get from the mount struct */ { XFS_FEAT_WSYNC, ",wsync" }, { XFS_FEAT_NOALIGN, ",noalign" }, { XFS_FEAT_SWALLOC, ",swalloc" }, { XFS_FEAT_NOUUID, ",nouuid" }, { XFS_FEAT_NORECOVERY, ",norecovery" }, { XFS_FEAT_FILESTREAMS, ",filestreams" }, { XFS_FEAT_GRPID, ",grpid" }, { XFS_FEAT_DISCARD, ",discard" }, { XFS_FEAT_LARGE_IOSIZE, ",largeio" }, { XFS_FEAT_DAX_ALWAYS, ",dax=always" }, { XFS_FEAT_DAX_NEVER, ",dax=never" }, { XFS_FEAT_NOLIFETIME, ",nolifetime" }, { 0, NULL } }; struct xfs_mount *mp = XFS_M(root->d_sb); struct proc_xfs_info *xfs_infop; for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) { if (mp->m_features & xfs_infop->flag) seq_puts(m, xfs_infop->str); } seq_printf(m, ",inode%d", xfs_has_small_inums(mp) ? 32 : 64); if (xfs_has_allocsize(mp)) seq_printf(m, ",allocsize=%dk", (1 << mp->m_allocsize_log) >> 10); if (mp->m_logbufs > 0) seq_printf(m, ",logbufs=%d", mp->m_logbufs); if (mp->m_logbsize > 0) seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10); if (mp->m_logname) seq_show_option(m, "logdev", mp->m_logname); if (mp->m_rtname) seq_show_option(m, "rtdev", mp->m_rtname); if (mp->m_dalign > 0) seq_printf(m, ",sunit=%d", (int)XFS_FSB_TO_BB(mp, mp->m_dalign)); if (mp->m_swidth > 0) seq_printf(m, ",swidth=%d", (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); if (mp->m_qflags & XFS_UQUOTA_ENFD) seq_puts(m, ",usrquota"); else if (mp->m_qflags & XFS_UQUOTA_ACCT) seq_puts(m, ",uqnoenforce"); if (mp->m_qflags & XFS_PQUOTA_ENFD) seq_puts(m, ",prjquota"); else if (mp->m_qflags & XFS_PQUOTA_ACCT) seq_puts(m, ",pqnoenforce"); if (mp->m_qflags & XFS_GQUOTA_ENFD) seq_puts(m, ",grpquota"); else if (mp->m_qflags & XFS_GQUOTA_ACCT) seq_puts(m, ",gqnoenforce"); if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) seq_puts(m, ",noquota"); if (mp->m_max_open_zones) seq_printf(m, ",max_open_zones=%u", mp->m_max_open_zones); if (mp->m_awu_max_bytes) seq_printf(m, ",max_atomic_write=%lluk", mp->m_awu_max_bytes >> 10); return 0; } static bool xfs_set_inode_alloc_perag( struct xfs_perag *pag, xfs_ino_t ino, xfs_agnumber_t max_metadata) { if (!xfs_is_inode32(pag_mount(pag))) { set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); return false; } if (ino > XFS_MAXINUMBER_32) { clear_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); return false; } set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); if (pag_agno(pag) < max_metadata) set_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); else clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); return true; } /* * Set parameters for inode allocation heuristics, taking into account * filesystem size and inode32/inode64 mount options; i.e. specifically * whether or not XFS_FEAT_SMALL_INUMS is set. * * Inode allocation patterns are altered only if inode32 is requested * (XFS_FEAT_SMALL_INUMS), and the filesystem is sufficiently large. * If altered, XFS_OPSTATE_INODE32 is set as well. * * An agcount independent of that in the mount structure is provided * because in the growfs case, mp->m_sb.sb_agcount is not yet updated * to the potentially higher ag count. * * Returns the maximum AG index which may contain inodes. */ xfs_agnumber_t xfs_set_inode_alloc( struct xfs_mount *mp, xfs_agnumber_t agcount) { xfs_agnumber_t index; xfs_agnumber_t maxagi = 0; xfs_sb_t *sbp = &mp->m_sb; xfs_agnumber_t max_metadata; xfs_agino_t agino; xfs_ino_t ino; /* * Calculate how much should be reserved for inodes to meet * the max inode percentage. Used only for inode32. */ if (M_IGEO(mp)->maxicount) { uint64_t icount; icount = sbp->sb_dblocks * sbp->sb_imax_pct; do_div(icount, 100); icount += sbp->sb_agblocks - 1; do_div(icount, sbp->sb_agblocks); max_metadata = icount; } else { max_metadata = agcount; } /* Get the last possible inode in the filesystem */ agino = XFS_AGB_TO_AGINO(mp, sbp->sb_agblocks - 1); ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); /* * If user asked for no more than 32-bit inodes, and the fs is * sufficiently large, set XFS_OPSTATE_INODE32 if we must alter * the allocator to accommodate the request. */ if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32) xfs_set_inode32(mp); else xfs_clear_inode32(mp); for (index = 0; index < agcount; index++) { struct xfs_perag *pag; ino = XFS_AGINO_TO_INO(mp, index, agino); pag = xfs_perag_get(mp, index); if (xfs_set_inode_alloc_perag(pag, ino, max_metadata)) maxagi++; xfs_perag_put(pag); } return xfs_is_inode32(mp) ? maxagi : agcount; } static int xfs_setup_dax_always( struct xfs_mount *mp) { if (!mp->m_ddev_targp->bt_daxdev && (!mp->m_rtdev_targp || !mp->m_rtdev_targp->bt_daxdev)) { xfs_alert(mp, "DAX unsupported by block device. Turning off DAX."); goto disable_dax; } if (mp->m_super->s_blocksize != PAGE_SIZE) { xfs_alert(mp, "DAX not supported for blocksize. Turning off DAX."); goto disable_dax; } if (xfs_has_reflink(mp) && bdev_is_partition(mp->m_ddev_targp->bt_bdev)) { xfs_alert(mp, "DAX and reflink cannot work with multi-partitions!"); return -EINVAL; } return 0; disable_dax: xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER); return 0; } STATIC int xfs_blkdev_get( xfs_mount_t *mp, const char *name, struct file **bdev_filep) { int error = 0; blk_mode_t mode; mode = sb_open_mode(mp->m_super->s_flags); *bdev_filep = bdev_file_open_by_path(name, mode, mp->m_super, &fs_holder_ops); if (IS_ERR(*bdev_filep)) { error = PTR_ERR(*bdev_filep); *bdev_filep = NULL; xfs_warn(mp, "Invalid device [%s], error=%d", name, error); } return error; } STATIC void xfs_shutdown_devices( struct xfs_mount *mp) { /* * Udev is triggered whenever anyone closes a block device or unmounts * a file systemm on a block device. * The default udev rules invoke blkid to read the fs super and create * symlinks to the bdev under /dev/disk. For this, it uses buffered * reads through the page cache. * * xfs_db also uses buffered reads to examine metadata. There is no * coordination between xfs_db and udev, which means that they can run * concurrently. Note there is no coordination between the kernel and * blkid either. * * On a system with 64k pages, the page cache can cache the superblock * and the root inode (and hence the root directory) with the same 64k * page. If udev spawns blkid after the mkfs and the system is busy * enough that it is still running when xfs_db starts up, they'll both * read from the same page in the pagecache. * * The unmount writes updated inode metadata to disk directly. The XFS * buffer cache does not use the bdev pagecache, so it needs to * invalidate that pagecache on unmount. If the above scenario occurs, * the pagecache no longer reflects what's on disk, xfs_db reads the * stale metadata, and fails to find /a. Most of the time this succeeds * because closing a bdev invalidates the page cache, but when processes * race, everyone loses. */ if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { blkdev_issue_flush(mp->m_logdev_targp->bt_bdev); invalidate_bdev(mp->m_logdev_targp->bt_bdev); } if (mp->m_rtdev_targp) { blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev); invalidate_bdev(mp->m_rtdev_targp->bt_bdev); } blkdev_issue_flush(mp->m_ddev_targp->bt_bdev); invalidate_bdev(mp->m_ddev_targp->bt_bdev); } /* * The file system configurations are: * (1) device (partition) with data and internal log * (2) logical volume with data and log subvolumes. * (3) logical volume with data, log, and realtime subvolumes. * * We only have to handle opening the log and realtime volumes here if * they are present. The data subvolume has already been opened by * get_sb_bdev() and is stored in sb->s_bdev. */ STATIC int xfs_open_devices( struct xfs_mount *mp) { struct super_block *sb = mp->m_super; struct block_device *ddev = sb->s_bdev; struct file *logdev_file = NULL, *rtdev_file = NULL; int error; /* * Open real time and log devices - order is important. */ if (mp->m_logname) { error = xfs_blkdev_get(mp, mp->m_logname, &logdev_file); if (error) return error; } if (mp->m_rtname) { error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev_file); if (error) goto out_close_logdev; if (file_bdev(rtdev_file) == ddev || (logdev_file && file_bdev(rtdev_file) == file_bdev(logdev_file))) { xfs_warn(mp, "Cannot mount filesystem with identical rtdev and ddev/logdev."); error = -EINVAL; goto out_close_rtdev; } } /* * Setup xfs_mount buffer target pointers */ mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb->s_bdev_file); if (IS_ERR(mp->m_ddev_targp)) { error = PTR_ERR(mp->m_ddev_targp); mp->m_ddev_targp = NULL; goto out_close_rtdev; } if (rtdev_file) { mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev_file); if (IS_ERR(mp->m_rtdev_targp)) { error = PTR_ERR(mp->m_rtdev_targp); mp->m_rtdev_targp = NULL; goto out_free_ddev_targ; } } if (logdev_file && file_bdev(logdev_file) != ddev) { mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev_file); if (IS_ERR(mp->m_logdev_targp)) { error = PTR_ERR(mp->m_logdev_targp); mp->m_logdev_targp = NULL; goto out_free_rtdev_targ; } } else { mp->m_logdev_targp = mp->m_ddev_targp; /* Handle won't be used, drop it */ if (logdev_file) bdev_fput(logdev_file); } return 0; out_free_rtdev_targ: if (mp->m_rtdev_targp) xfs_free_buftarg(mp->m_rtdev_targp); out_free_ddev_targ: xfs_free_buftarg(mp->m_ddev_targp); out_close_rtdev: if (rtdev_file) bdev_fput(rtdev_file); out_close_logdev: if (logdev_file) bdev_fput(logdev_file); return error; } /* * Setup xfs_mount buffer target pointers based on superblock */ STATIC int xfs_setup_devices( struct xfs_mount *mp) { int error; error = xfs_configure_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize, mp->m_sb.sb_dblocks); if (error) return error; if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { unsigned int log_sector_size = BBSIZE; if (xfs_has_sector(mp)) log_sector_size = mp->m_sb.sb_logsectsize; error = xfs_configure_buftarg(mp->m_logdev_targp, log_sector_size, mp->m_sb.sb_logblocks); if (error) return error; } if (mp->m_sb.sb_rtstart) { if (mp->m_rtdev_targp) { xfs_warn(mp, "can't use internal and external rtdev at the same time"); return -EINVAL; } mp->m_rtdev_targp = mp->m_ddev_targp; } else if (mp->m_rtname) { error = xfs_configure_buftarg(mp->m_rtdev_targp, mp->m_sb.sb_sectsize, mp->m_sb.sb_rblocks); if (error) return error; } return 0; } STATIC int xfs_init_mount_workqueues( struct xfs_mount *mp) { mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s", XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU), 1, mp->m_super->s_id); if (!mp->m_buf_workqueue) goto out; mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s", XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU), 0, mp->m_super->s_id); if (!mp->m_unwritten_workqueue) goto out_destroy_buf; mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU), 0, mp->m_super->s_id); if (!mp->m_reclaim_workqueue) goto out_destroy_unwritten; mp->m_blockgc_wq = alloc_workqueue("xfs-blockgc/%s", XFS_WQFLAGS(WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM), 0, mp->m_super->s_id); if (!mp->m_blockgc_wq) goto out_destroy_reclaim; mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s", XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU), 1, mp->m_super->s_id); if (!mp->m_inodegc_wq) goto out_destroy_blockgc; mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", XFS_WQFLAGS(WQ_FREEZABLE | WQ_PERCPU), 0, mp->m_super->s_id); if (!mp->m_sync_workqueue) goto out_destroy_inodegc; return 0; out_destroy_inodegc: destroy_workqueue(mp->m_inodegc_wq); out_destroy_blockgc: destroy_workqueue(mp->m_blockgc_wq); out_destroy_reclaim: destroy_workqueue(mp->m_reclaim_workqueue); out_destroy_unwritten: destroy_workqueue(mp->m_unwritten_workqueue); out_destroy_buf: destroy_workqueue(mp->m_buf_workqueue); out: return -ENOMEM; } STATIC void xfs_destroy_mount_workqueues( struct xfs_mount *mp) { destroy_workqueue(mp->m_sync_workqueue); destroy_workqueue(mp->m_blockgc_wq); destroy_workqueue(mp->m_inodegc_wq); destroy_workqueue(mp->m_reclaim_workqueue); destroy_workqueue(mp->m_unwritten_workqueue); destroy_workqueue(mp->m_buf_workqueue); } static void xfs_flush_inodes_worker( struct work_struct *work) { struct xfs_mount *mp = container_of(work, struct xfs_mount, m_flush_inodes_work); struct super_block *sb = mp->m_super; if (down_read_trylock(&sb->s_umount)) { sync_inodes_sb(sb); up_read(&sb->s_umount); } } /* * Flush all dirty data to disk. Must not be called while holding an XFS_ILOCK * or a page lock. We use sync_inodes_sb() here to ensure we block while waiting * for IO to complete so that we effectively throttle multiple callers to the * rate at which IO is completing. */ void xfs_flush_inodes( struct xfs_mount *mp) { /* * If flush_work() returns true then that means we waited for a flush * which was already in progress. Don't bother running another scan. */ if (flush_work(&mp->m_flush_inodes_work)) return; queue_work(mp->m_sync_workqueue, &mp->m_flush_inodes_work); flush_work(&mp->m_flush_inodes_work); } /* Catch misguided souls that try to use this interface on XFS */ STATIC struct inode * xfs_fs_alloc_inode( struct super_block *sb) { BUG(); return NULL; } /* * Now that the generic code is guaranteed not to be accessing * the linux inode, we can inactivate and reclaim the inode. */ STATIC void xfs_fs_destroy_inode( struct inode *inode) { struct xfs_inode *ip = XFS_I(inode); trace_xfs_destroy_inode(ip); ASSERT(!rwsem_is_locked(&inode->i_rwsem)); XFS_STATS_INC(ip->i_mount, vn_rele); XFS_STATS_INC(ip->i_mount, vn_remove); xfs_inode_mark_reclaimable(ip); } /* * Slab object creation initialisation for the XFS inode. * This covers only the idempotent fields in the XFS inode; * all other fields need to be initialised on allocation * from the slab. This avoids the need to repeatedly initialise * fields in the xfs inode that left in the initialise state * when freeing the inode. */ STATIC void xfs_fs_inode_init_once( void *inode) { struct xfs_inode *ip = inode; memset(ip, 0, sizeof(struct xfs_inode)); /* vfs inode */ inode_init_once(VFS_I(ip)); /* xfs inode */ atomic_set(&ip->i_pincount, 0); spin_lock_init(&ip->i_flags_lock); init_rwsem(&ip->i_lock); } /* * We do an unlocked check for XFS_IDONTCACHE here because we are already * serialised against cache hits here via the inode->i_lock and igrab() in * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be * racing with us, and it avoids needing to grab a spinlock here for every inode * we drop the final reference on. */ STATIC int xfs_fs_drop_inode( struct inode *inode) { struct xfs_inode *ip = XFS_I(inode); /* * If this unlinked inode is in the middle of recovery, don't * drop the inode just yet; log recovery will take care of * that. See the comment for this inode flag. */ if (ip->i_flags & XFS_IRECOVERY) { ASSERT(xlog_recovery_needed(ip->i_mount->m_log)); return 0; } return inode_generic_drop(inode); } STATIC void xfs_fs_evict_inode( struct inode *inode) { if (IS_DAX(inode)) dax_break_layout_final(inode); truncate_inode_pages_final(&inode->i_data); clear_inode(inode); if (IS_ENABLED(CONFIG_XFS_RT) && S_ISREG(inode->i_mode) && inode->i_private) { xfs_open_zone_put(inode->i_private); inode->i_private = NULL; } } static void xfs_mount_free( struct xfs_mount *mp) { if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) xfs_free_buftarg(mp->m_logdev_targp); if (mp->m_rtdev_targp && mp->m_rtdev_targp != mp->m_ddev_targp) xfs_free_buftarg(mp->m_rtdev_targp); if (mp->m_ddev_targp) xfs_free_buftarg(mp->m_ddev_targp); debugfs_remove(mp->m_debugfs); kfree(mp->m_rtname); kfree(mp->m_logname); kfree(mp); } STATIC int xfs_fs_sync_fs( struct super_block *sb, int wait) { struct xfs_mount *mp = XFS_M(sb); int error; trace_xfs_fs_sync_fs(mp, __return_address); /* * Doing anything during the async pass would be counterproductive. */ if (!wait) return 0; error = xfs_log_force(mp, XFS_LOG_SYNC); if (error) return error; /* * If we are called with page faults frozen out, it means we are about * to freeze the transaction subsystem. Take the opportunity to shut * down inodegc because once SB_FREEZE_FS is set it's too late to * prevent inactivation races with freeze. The fs doesn't get called * again by the freezing process until after SB_FREEZE_FS has been set, * so it's now or never. Same logic applies to speculative allocation * garbage collection. * * We don't care if this is a normal syncfs call that does this or * freeze that does this - we can run this multiple times without issue * and we won't race with a restart because a restart can only occur * when the state is either SB_FREEZE_FS or SB_FREEZE_COMPLETE. */ if (sb->s_writers.frozen == SB_FREEZE_PAGEFAULT) { xfs_inodegc_stop(mp); xfs_blockgc_stop(mp); xfs_zone_gc_stop(mp); } return 0; } static xfs_extlen_t xfs_internal_log_size( struct xfs_mount *mp) { if (!mp->m_sb.sb_logstart) return 0; return mp->m_sb.sb_logblocks; } static void xfs_statfs_data( struct xfs_mount *mp, struct kstatfs *st) { int64_t fdblocks = xfs_sum_freecounter(mp, XC_FREE_BLOCKS); /* make sure st->f_bfree does not underflow */ st->f_bfree = max(0LL, fdblocks - xfs_freecounter_unavailable(mp, XC_FREE_BLOCKS)); /* * sb_dblocks can change during growfs, but nothing cares about reporting * the old or new value during growfs. */ st->f_blocks = mp->m_sb.sb_dblocks - xfs_internal_log_size(mp); } /* * When stat(v)fs is called on a file with the realtime bit set or a directory * with the rtinherit bit, report freespace information for the RT device * instead of the main data device. */ static void xfs_statfs_rt( struct xfs_mount *mp, struct kstatfs *st) { st->f_bfree = xfs_rtbxlen_to_blen(mp, xfs_sum_freecounter(mp, XC_FREE_RTEXTENTS)); st->f_blocks = mp->m_sb.sb_rblocks - xfs_rtbxlen_to_blen(mp, mp->m_free[XC_FREE_RTEXTENTS].res_total); } static void xfs_statfs_inodes( struct xfs_mount *mp, struct kstatfs *st) { uint64_t icount = percpu_counter_sum(&mp->m_icount); uint64_t ifree = percpu_counter_sum(&mp->m_ifree); uint64_t fakeinos = XFS_FSB_TO_INO(mp, st->f_bfree); st->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER); if (M_IGEO(mp)->maxicount) st->f_files = min_t(typeof(st->f_files), st->f_files, M_IGEO(mp)->maxicount); /* If sb_icount overshot maxicount, report actual allocation */ st->f_files = max_t(typeof(st->f_files), st->f_files, mp->m_sb.sb_icount); /* Make sure st->f_ffree does not underflow */ st->f_ffree = max_t(int64_t, 0, st->f_files - (icount - ifree)); } STATIC int xfs_fs_statfs( struct dentry *dentry, struct kstatfs *st) { struct xfs_mount *mp = XFS_M(dentry->d_sb); struct xfs_inode *ip = XFS_I(d_inode(dentry)); /* * Expedite background inodegc but don't wait. We do not want to block * here waiting hours for a billion extent file to be truncated. */ xfs_inodegc_push(mp); st->f_type = XFS_SUPER_MAGIC; st->f_namelen = MAXNAMELEN - 1; st->f_bsize = mp->m_sb.sb_blocksize; st->f_fsid = u64_to_fsid(huge_encode_dev(mp->m_ddev_targp->bt_dev)); xfs_statfs_data(mp, st); xfs_statfs_inodes(mp, st); if (XFS_IS_REALTIME_MOUNT(mp) && (ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) xfs_statfs_rt(mp, st); if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) && ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) == (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD)) xfs_qm_statvfs(ip, st); /* * XFS does not distinguish between blocks available to privileged and * unprivileged users. */ st->f_bavail = st->f_bfree; return 0; } STATIC void xfs_save_resvblks( struct xfs_mount *mp) { enum xfs_free_counter i; for (i = 0; i < XC_FREE_NR; i++) { mp->m_free[i].res_saved = mp->m_free[i].res_total; xfs_reserve_blocks(mp, i, 0); } } STATIC void xfs_restore_resvblks( struct xfs_mount *mp) { uint64_t resblks; enum xfs_free_counter i; for (i = 0; i < XC_FREE_NR; i++) { if (mp->m_free[i].res_saved) { resblks = mp->m_free[i].res_saved; mp->m_free[i].res_saved = 0; } else resblks = xfs_default_resblks(mp, i); xfs_reserve_blocks(mp, i, resblks); } } /* * Second stage of a freeze. The data is already frozen so we only * need to take care of the metadata. Once that's done sync the superblock * to the log to dirty it in case of a crash while frozen. This ensures that we * will recover the unlinked inode lists on the next mount. */ STATIC int xfs_fs_freeze( struct super_block *sb) { struct xfs_mount *mp = XFS_M(sb); unsigned int flags; int ret; /* * The filesystem is now frozen far enough that memory reclaim * cannot safely operate on the filesystem. Hence we need to * set a GFP_NOFS context here to avoid recursion deadlocks. */ flags = memalloc_nofs_save(); xfs_save_resvblks(mp); ret = xfs_log_quiesce(mp); memalloc_nofs_restore(flags); /* * For read-write filesystems, we need to restart the inodegc on error * because we stopped it at SB_FREEZE_PAGEFAULT level and a thaw is not * going to be run to restart it now. We are at SB_FREEZE_FS level * here, so we can restart safely without racing with a stop in * xfs_fs_sync_fs(). */ if (ret && !xfs_is_readonly(mp)) { xfs_blockgc_start(mp); xfs_inodegc_start(mp); xfs_zone_gc_start(mp); } return ret; } STATIC int xfs_fs_unfreeze( struct super_block *sb) { struct xfs_mount *mp = XFS_M(sb); xfs_restore_resvblks(mp); xfs_log_work_queue(mp); /* * Don't reactivate the inodegc worker on a readonly filesystem because * inodes are sent directly to reclaim. Don't reactivate the blockgc * worker because there are no speculative preallocations on a readonly * filesystem. */ if (!xfs_is_readonly(mp)) { xfs_zone_gc_start(mp); xfs_blockgc_start(mp); xfs_inodegc_start(mp); } return 0; } /* * This function fills in xfs_mount_t fields based on mount args. * Note: the superblock _has_ now been read in. */ STATIC int xfs_finish_flags( struct xfs_mount *mp) { /* Fail a mount where the logbuf is smaller than the log stripe */ if (xfs_has_logv2(mp)) { if (mp->m_logbsize <= 0 && mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) { mp->m_logbsize = mp->m_sb.sb_logsunit; } else if (mp->m_logbsize > 0 && mp->m_logbsize < mp->m_sb.sb_logsunit) { xfs_warn(mp, "logbuf size must be greater than or equal to log stripe size"); return -EINVAL; } } else { /* Fail a mount if the logbuf is larger than 32K */ if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { xfs_warn(mp, "logbuf size for version 1 logs must be 16K or 32K"); return -EINVAL; } } /* * prohibit r/w mounts of read-only filesystems */ if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !xfs_is_readonly(mp)) { xfs_warn(mp, "cannot mount a read-only filesystem as read-write"); return -EROFS; } if ((mp->m_qflags & XFS_GQUOTA_ACCT) && (mp->m_qflags & XFS_PQUOTA_ACCT) && !xfs_has_pquotino(mp)) { xfs_warn(mp, "Super block does not support project and group quota together"); return -EINVAL; } if (!xfs_has_zoned(mp)) { if (mp->m_max_open_zones) { xfs_warn(mp, "max_open_zones mount option only supported on zoned file systems."); return -EINVAL; } if (mp->m_features & XFS_FEAT_NOLIFETIME) { xfs_warn(mp, "nolifetime mount option only supported on zoned file systems."); return -EINVAL; } } return 0; } static int xfs_init_percpu_counters( struct xfs_mount *mp) { int error; int i; error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL); if (error) return -ENOMEM; error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL); if (error) goto free_icount; error = percpu_counter_init(&mp->m_delalloc_blks, 0, GFP_KERNEL); if (error) goto free_ifree; error = percpu_counter_init(&mp->m_delalloc_rtextents, 0, GFP_KERNEL); if (error) goto free_delalloc; for (i = 0; i < XC_FREE_NR; i++) { error = percpu_counter_init(&mp->m_free[i].count, 0, GFP_KERNEL); if (error) goto free_freecounters; } return 0; free_freecounters: while (--i >= 0) percpu_counter_destroy(&mp->m_free[i].count); percpu_counter_destroy(&mp->m_delalloc_rtextents); free_delalloc: percpu_counter_destroy(&mp->m_delalloc_blks); free_ifree: percpu_counter_destroy(&mp->m_ifree); free_icount: percpu_counter_destroy(&mp->m_icount); return -ENOMEM; } void xfs_reinit_percpu_counters( struct xfs_mount *mp) { percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount); percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree); xfs_set_freecounter(mp, XC_FREE_BLOCKS, mp->m_sb.sb_fdblocks); if (!xfs_has_zoned(mp)) xfs_set_freecounter(mp, XC_FREE_RTEXTENTS, mp->m_sb.sb_frextents); } static void xfs_destroy_percpu_counters( struct xfs_mount *mp) { enum xfs_free_counter i; for (i = 0; i < XC_FREE_NR; i++) percpu_counter_destroy(&mp->m_free[i].count); percpu_counter_destroy(&mp->m_icount); percpu_counter_destroy(&mp->m_ifree); ASSERT(xfs_is_shutdown(mp) || percpu_counter_sum(&mp->m_delalloc_rtextents) == 0); percpu_counter_destroy(&mp->m_delalloc_rtextents); ASSERT(xfs_is_shutdown(mp) || percpu_counter_sum(&mp->m_delalloc_blks) == 0); percpu_counter_destroy(&mp->m_delalloc_blks); } static int xfs_inodegc_init_percpu( struct xfs_mount *mp) { struct xfs_inodegc *gc; int cpu; mp->m_inodegc = alloc_percpu(struct xfs_inodegc); if (!mp->m_inodegc) return -ENOMEM; for_each_possible_cpu(cpu) { gc = per_cpu_ptr(mp->m_inodegc, cpu); gc->cpu = cpu; gc->mp = mp; init_llist_head(&gc->list); gc->items = 0; gc->error = 0; INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker); } return 0; } static void xfs_inodegc_free_percpu( struct xfs_mount *mp) { if (!mp->m_inodegc) return; free_percpu(mp->m_inodegc); } static void xfs_fs_put_super( struct super_block *sb) { struct xfs_mount *mp = XFS_M(sb); xfs_notice(mp, "Unmounting Filesystem %pU", &mp->m_sb.sb_uuid); xfs_filestream_unmount(mp); xfs_unmountfs(mp); xfs_rtmount_freesb(mp); xfs_freesb(mp); xchk_mount_stats_free(mp); free_percpu(mp->m_stats.xs_stats); xfs_inodegc_free_percpu(mp); xfs_destroy_percpu_counters(mp); xfs_destroy_mount_workqueues(mp); xfs_shutdown_devices(mp); } static long xfs_fs_nr_cached_objects( struct super_block *sb, struct shrink_control *sc) { /* Paranoia: catch incorrect calls during mount setup or teardown */ if (WARN_ON_ONCE(!sb->s_fs_info)) return 0; return xfs_reclaim_inodes_count(XFS_M(sb)); } static long xfs_fs_free_cached_objects( struct super_block *sb, struct shrink_control *sc) { return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan); } static void xfs_fs_shutdown( struct super_block *sb) { xfs_force_shutdown(XFS_M(sb), SHUTDOWN_DEVICE_REMOVED); } static int xfs_fs_show_stats( struct seq_file *m, struct dentry *root) { struct xfs_mount *mp = XFS_M(root->d_sb); if (xfs_has_zoned(mp) && IS_ENABLED(CONFIG_XFS_RT)) xfs_zoned_show_stats(m, mp); return 0; } static const struct super_operations xfs_super_operations = { .alloc_inode = xfs_fs_alloc_inode, .destroy_inode = xfs_fs_destroy_inode, .drop_inode = xfs_fs_drop_inode, .evict_inode = xfs_fs_evict_inode, .put_super = xfs_fs_put_super, .sync_fs = xfs_fs_sync_fs, .freeze_fs = xfs_fs_freeze, .unfreeze_fs = xfs_fs_unfreeze, .statfs = xfs_fs_statfs, .show_options = xfs_fs_show_options, .nr_cached_objects = xfs_fs_nr_cached_objects, .free_cached_objects = xfs_fs_free_cached_objects, .shutdown = xfs_fs_shutdown, .show_stats = xfs_fs_show_stats, }; static int suffix_kstrtoint( const char *s, unsigned int base, int *res) { int last, shift_left_factor = 0, _res; char *value; int ret = 0; value = kstrdup(s, GFP_KERNEL); if (!value) return -ENOMEM; last = strlen(value) - 1; if (value[last] == 'K' || value[last] == 'k') { shift_left_factor = 10; value[last] = '\0'; } if (value[last] == 'M' || value[last] == 'm') { shift_left_factor = 20; value[last] = '\0'; } if (value[last] == 'G' || value[last] == 'g') { shift_left_factor = 30; value[last] = '\0'; } if (kstrtoint(value, base, &_res)) ret = -EINVAL; kfree(value); *res = _res << shift_left_factor; return ret; } static int suffix_kstrtoull( const char *s, unsigned int base, unsigned long long *res) { int last, shift_left_factor = 0; unsigned long long _res; char *value; int ret = 0; value = kstrdup(s, GFP_KERNEL); if (!value) return -ENOMEM; last = strlen(value) - 1; if (value[last] == 'K' || value[last] == 'k') { shift_left_factor = 10; value[last] = '\0'; } if (value[last] == 'M' || value[last] == 'm') { shift_left_factor = 20; value[last] = '\0'; } if (value[last] == 'G' || value[last] == 'g') { shift_left_factor = 30; value[last] = '\0'; } if (kstrtoull(value, base, &_res)) ret = -EINVAL; kfree(value); *res = _res << shift_left_factor; return ret; } static inline void xfs_fs_warn_deprecated( struct fs_context *fc, struct fs_parameter *param) { /* * Always warn about someone passing in a deprecated mount option. * Previously we wouldn't print the warning if we were reconfiguring * and current mount point already had the flag set, but that was not * the right thing to do. * * Many distributions mount the root filesystem with no options in the * initramfs and rely on mount -a to remount the root fs with the * options in fstab. However, the old behavior meant that there would * never be a warning about deprecated mount options for the root fs in * /etc/fstab. On a single-fs system, that means no warning at all. * * Compounding this problem are distribution scripts that copy * /proc/mounts to fstab, which means that we can't remove mount * options unless we're 100% sure they have only ever been advertised * in /proc/mounts in response to explicitly provided mount options. */ xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key); } /* * Set mount state from a mount option. * * NOTE: mp->m_super is NULL here! */ static int xfs_fs_parse_param( struct fs_context *fc, struct fs_parameter *param) { struct xfs_mount *parsing_mp = fc->s_fs_info; struct fs_parse_result result; int size = 0; int opt; BUILD_BUG_ON(XFS_QFLAGS_MNTOPTS & XFS_MOUNT_QUOTA_ALL); opt = fs_parse(fc, xfs_fs_parameters, param, &result); if (opt < 0) return opt; switch (opt) { case Op_deprecated: xfs_fs_warn_deprecated(fc, param); return 0; case Opt_logbufs: parsing_mp->m_logbufs = result.uint_32; return 0; case Opt_logbsize: if (suffix_kstrtoint(param->string, 10, &parsing_mp->m_logbsize)) return -EINVAL; return 0; case Opt_logdev: kfree(parsing_mp->m_logname); parsing_mp->m_logname = kstrdup(param->string, GFP_KERNEL); if (!parsing_mp->m_logname) return -ENOMEM; return 0; case Opt_rtdev: kfree(parsing_mp->m_rtname); parsing_mp->m_rtname = kstrdup(param->string, GFP_KERNEL); if (!parsing_mp->m_rtname) return -ENOMEM; return 0; case Opt_allocsize: if (suffix_kstrtoint(param->string, 10, &size)) return -EINVAL; parsing_mp->m_allocsize_log = ffs(size) - 1; parsing_mp->m_features |= XFS_FEAT_ALLOCSIZE; return 0; case Opt_grpid: case Opt_bsdgroups: parsing_mp->m_features |= XFS_FEAT_GRPID; return 0; case Opt_nogrpid: case Opt_sysvgroups: parsing_mp->m_features &= ~XFS_FEAT_GRPID; return 0; case Opt_wsync: parsing_mp->m_features |= XFS_FEAT_WSYNC; return 0; case Opt_norecovery: parsing_mp->m_features |= XFS_FEAT_NORECOVERY; return 0; case Opt_noalign: parsing_mp->m_features |= XFS_FEAT_NOALIGN; return 0; case Opt_swalloc: parsing_mp->m_features |= XFS_FEAT_SWALLOC; return 0; case Opt_sunit: parsing_mp->m_dalign = result.uint_32; return 0; case Opt_swidth: parsing_mp->m_swidth = result.uint_32; return 0; case Opt_inode32: parsing_mp->m_features |= XFS_FEAT_SMALL_INUMS; return 0; case Opt_inode64: parsing_mp->m_features &= ~XFS_FEAT_SMALL_INUMS; return 0; case Opt_nouuid: parsing_mp->m_features |= XFS_FEAT_NOUUID; return 0; case Opt_largeio: parsing_mp->m_features |= XFS_FEAT_LARGE_IOSIZE; return 0; case Opt_nolargeio: parsing_mp->m_features &= ~XFS_FEAT_LARGE_IOSIZE; return 0; case Opt_filestreams: parsing_mp->m_features |= XFS_FEAT_FILESTREAMS; return 0; case Opt_noquota: parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT; parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD; parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; return 0; case Opt_quota: case Opt_uquota: case Opt_usrquota: parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD); parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; return 0; case Opt_qnoenforce: case Opt_uqnoenforce: parsing_mp->m_qflags |= XFS_UQUOTA_ACCT; parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD; parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; return 0; case Opt_pquota: case Opt_prjquota: parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD); parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; return 0; case Opt_pqnoenforce: parsing_mp->m_qflags |= XFS_PQUOTA_ACCT; parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD; parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; return 0; case Opt_gquota: case Opt_grpquota: parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD); parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; return 0; case Opt_gqnoenforce: parsing_mp->m_qflags |= XFS_GQUOTA_ACCT; parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD; parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS; return 0; case Opt_discard: parsing_mp->m_features |= XFS_FEAT_DISCARD; return 0; case Opt_nodiscard: parsing_mp->m_features &= ~XFS_FEAT_DISCARD; return 0; #ifdef CONFIG_FS_DAX case Opt_dax: xfs_mount_set_dax_mode(parsing_mp, XFS_DAX_ALWAYS); return 0; case Opt_dax_enum: xfs_mount_set_dax_mode(parsing_mp, result.uint_32); return 0; #endif case Opt_max_open_zones: parsing_mp->m_max_open_zones = result.uint_32; return 0; case Opt_lifetime: parsing_mp->m_features &= ~XFS_FEAT_NOLIFETIME; return 0; case Opt_nolifetime: parsing_mp->m_features |= XFS_FEAT_NOLIFETIME; return 0; case Opt_max_atomic_write: if (suffix_kstrtoull(param->string, 10, &parsing_mp->m_awu_max_bytes)) { xfs_warn(parsing_mp, "max atomic write size must be positive integer"); return -EINVAL; } return 0; default: xfs_warn(parsing_mp, "unknown mount option [%s].", param->key); return -EINVAL; } return 0; } static int xfs_fs_validate_params( struct xfs_mount *mp) { /* No recovery flag requires a read-only mount */ if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) { xfs_warn(mp, "no-recovery mounts must be read-only."); return -EINVAL; } if (xfs_has_noalign(mp) && (mp->m_dalign || mp->m_swidth)) { xfs_warn(mp, "sunit and swidth options incompatible with the noalign option"); return -EINVAL; } if (!IS_ENABLED(CONFIG_XFS_QUOTA) && (mp->m_qflags & ~XFS_QFLAGS_MNTOPTS)) { xfs_warn(mp, "quota support not available in this kernel."); return -EINVAL; } if ((mp->m_dalign && !mp->m_swidth) || (!mp->m_dalign && mp->m_swidth)) { xfs_warn(mp, "sunit and swidth must be specified together"); return -EINVAL; } if (mp->m_dalign && (mp->m_swidth % mp->m_dalign != 0)) { xfs_warn(mp, "stripe width (%d) must be a multiple of the stripe unit (%d)", mp->m_swidth, mp->m_dalign); return -EINVAL; } if (mp->m_logbufs != -1 && mp->m_logbufs != 0 && (mp->m_logbufs < XLOG_MIN_ICLOGS || mp->m_logbufs > XLOG_MAX_ICLOGS)) { xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); return -EINVAL; } if (mp->m_logbsize != -1 && mp->m_logbsize != 0 && (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || !is_power_of_2(mp->m_logbsize))) { xfs_warn(mp, "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", mp->m_logbsize); return -EINVAL; } if (xfs_has_allocsize(mp) && (mp->m_allocsize_log > XFS_MAX_IO_LOG || mp->m_allocsize_log < XFS_MIN_IO_LOG)) { xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", mp->m_allocsize_log, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG); return -EINVAL; } return 0; } struct dentry * xfs_debugfs_mkdir( const char *name, struct dentry *parent) { struct dentry *child; /* Apparently we're expected to ignore error returns?? */ child = debugfs_create_dir(name, parent); if (IS_ERR(child)) return NULL; return child; } static int xfs_fs_fill_super( struct super_block *sb, struct fs_context *fc) { struct xfs_mount *mp = sb->s_fs_info; struct inode *root; int flags = 0, error; mp->m_super = sb; /* * Copy VFS mount flags from the context now that all parameter parsing * is guaranteed to have been completed by either the old mount API or * the newer fsopen/fsconfig API. */ if (fc->sb_flags & SB_RDONLY) xfs_set_readonly(mp); if (fc->sb_flags & SB_DIRSYNC) mp->m_features |= XFS_FEAT_DIRSYNC; if (fc->sb_flags & SB_SYNCHRONOUS) mp->m_features |= XFS_FEAT_WSYNC; error = xfs_fs_validate_params(mp); if (error) return error; if (!sb_min_blocksize(sb, BBSIZE)) { xfs_err(mp, "unable to set blocksize"); return -EINVAL; } sb->s_xattr = xfs_xattr_handlers; sb->s_export_op = &xfs_export_operations; #ifdef CONFIG_XFS_QUOTA sb->s_qcop = &xfs_quotactl_operations; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ; #endif sb->s_op = &xfs_super_operations; /* * Delay mount work if the debug hook is set. This is debug * instrumention to coordinate simulation of xfs mount failures with * VFS superblock operations */ if (xfs_globals.mount_delay) { xfs_notice(mp, "Delaying mount for %d seconds.", xfs_globals.mount_delay); msleep(xfs_globals.mount_delay * 1000); } if (fc->sb_flags & SB_SILENT) flags |= XFS_MFSI_QUIET; error = xfs_open_devices(mp); if (error) return error; if (xfs_debugfs) { mp->m_debugfs = xfs_debugfs_mkdir(mp->m_super->s_id, xfs_debugfs); } else { mp->m_debugfs = NULL; } error = xfs_init_mount_workqueues(mp); if (error) goto out_shutdown_devices; error = xfs_init_percpu_counters(mp); if (error) goto out_destroy_workqueues; error = xfs_inodegc_init_percpu(mp); if (error) goto out_destroy_counters; /* Allocate stats memory before we do operations that might use it */ mp->m_stats.xs_stats = alloc_percpu(struct xfsstats); if (!mp->m_stats.xs_stats) { error = -ENOMEM; goto out_destroy_inodegc; } error = xchk_mount_stats_alloc(mp); if (error) goto out_free_stats; error = xfs_readsb(mp, flags); if (error) goto out_free_scrub_stats; error = xfs_finish_flags(mp); if (error) goto out_free_sb; error = xfs_setup_devices(mp); if (error) goto out_free_sb; /* * V4 support is undergoing deprecation. * * Note: this has to use an open coded m_features check as xfs_has_crc * always returns false for !CONFIG_XFS_SUPPORT_V4. */ if (!(mp->m_features & XFS_FEAT_CRC)) { if (!IS_ENABLED(CONFIG_XFS_SUPPORT_V4)) { xfs_warn(mp, "Deprecated V4 format (crc=0) not supported by kernel."); error = -EINVAL; goto out_free_sb; } xfs_warn_once(mp, "Deprecated V4 format (crc=0) will not be supported after September 2030."); } /* ASCII case insensitivity is undergoing deprecation. */ if (xfs_has_asciici(mp)) { #ifdef CONFIG_XFS_SUPPORT_ASCII_CI xfs_warn_once(mp, "Deprecated ASCII case-insensitivity feature (ascii-ci=1) will not be supported after September 2030."); #else xfs_warn(mp, "Deprecated ASCII case-insensitivity feature (ascii-ci=1) not supported by kernel."); error = -EINVAL; goto out_free_sb; #endif } /* * Filesystem claims it needs repair, so refuse the mount unless * norecovery is also specified, in which case the filesystem can * be mounted with no risk of further damage. */ if (xfs_has_needsrepair(mp) && !xfs_has_norecovery(mp)) { xfs_warn(mp, "Filesystem needs repair. Please run xfs_repair."); error = -EFSCORRUPTED; goto out_free_sb; } /* * Don't touch the filesystem if a user tool thinks it owns the primary * superblock. mkfs doesn't clear the flag from secondary supers, so * we don't check them at all. */ if (mp->m_sb.sb_inprogress) { xfs_warn(mp, "Offline file system operation in progress!"); error = -EFSCORRUPTED; goto out_free_sb; } if (mp->m_sb.sb_blocksize > PAGE_SIZE) { size_t max_folio_size = mapping_max_folio_size_supported(); if (!xfs_has_crc(mp)) { xfs_warn(mp, "V4 Filesystem with blocksize %d bytes. Only pagesize (%ld) or less is supported.", mp->m_sb.sb_blocksize, PAGE_SIZE); error = -ENOSYS; goto out_free_sb; } if (mp->m_sb.sb_blocksize > max_folio_size) { xfs_warn(mp, "block size (%u bytes) not supported; Only block size (%zu) or less is supported", mp->m_sb.sb_blocksize, max_folio_size); error = -ENOSYS; goto out_free_sb; } } /* Ensure this filesystem fits in the page cache limits */ if (xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_dblocks) || xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_rblocks)) { xfs_warn(mp, "file system too large to be mounted on this system."); error = -EFBIG; goto out_free_sb; } /* * XFS block mappings use 54 bits to store the logical block offset. * This should suffice to handle the maximum file size that the VFS * supports (currently 2^63 bytes on 64-bit and ULONG_MAX << PAGE_SHIFT * bytes on 32-bit), but as XFS and VFS have gotten the s_maxbytes * calculation wrong on 32-bit kernels in the past, we'll add a WARN_ON * to check this assertion. * * Avoid integer overflow by comparing the maximum bmbt offset to the * maximum pagecache offset in units of fs blocks. */ if (!xfs_verify_fileoff(mp, XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE))) { xfs_warn(mp, "MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!", XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE), XFS_MAX_FILEOFF); error = -EINVAL; goto out_free_sb; } error = xfs_rtmount_readsb(mp); if (error) goto out_free_sb; error = xfs_filestream_mount(mp); if (error) goto out_free_rtsb; /* * we must configure the block size in the superblock before we run the * full mount process as the mount process can lookup and cache inodes. */ sb->s_magic = XFS_SUPER_MAGIC; sb->s_blocksize = mp->m_sb.sb_blocksize; sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_max_links = XFS_MAXLINK; sb->s_time_gran = 1; if (xfs_has_bigtime(mp)) { sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN); sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX); } else { sb->s_time_min = XFS_LEGACY_TIME_MIN; sb->s_time_max = XFS_LEGACY_TIME_MAX; } trace_xfs_inode_timestamp_range(mp, sb->s_time_min, sb->s_time_max); sb->s_iflags |= SB_I_CGROUPWB | SB_I_ALLOW_HSM; set_posix_acl_flag(sb); /* version 5 superblocks support inode version counters. */ if (xfs_has_crc(mp)) sb->s_flags |= SB_I_VERSION; if (xfs_has_dax_always(mp)) { error = xfs_setup_dax_always(mp); if (error) goto out_filestream_unmount; } if (xfs_has_discard(mp) && !bdev_max_discard_sectors(sb->s_bdev)) { xfs_warn(mp, "mounting with \"discard\" option, but the device does not support discard"); mp->m_features &= ~XFS_FEAT_DISCARD; } if (xfs_has_zoned(mp)) { if (!xfs_has_metadir(mp)) { xfs_alert(mp, "metadir feature required for zoned realtime devices."); error = -EINVAL; goto out_filestream_unmount; } xfs_warn_experimental(mp, XFS_EXPERIMENTAL_ZONED); } if (xfs_has_reflink(mp)) { if (xfs_has_realtime(mp) && !xfs_reflink_supports_rextsize(mp, mp->m_sb.sb_rextsize)) { xfs_alert(mp, "reflink not compatible with realtime extent size %u!", mp->m_sb.sb_rextsize); error = -EINVAL; goto out_filestream_unmount; } if (xfs_has_zoned(mp)) { xfs_alert(mp, "reflink not compatible with zoned RT device!"); error = -EINVAL; goto out_filestream_unmount; } if (xfs_globals.always_cow) { xfs_info(mp, "using DEBUG-only always_cow mode."); mp->m_always_cow = true; } } /* * If no quota mount options were provided, maybe we'll try to pick * up the quota accounting and enforcement flags from the ondisk sb. */ if (!(mp->m_qflags & XFS_QFLAGS_MNTOPTS)) xfs_set_resuming_quotaon(mp); mp->m_qflags &= ~XFS_QFLAGS_MNTOPTS; error = xfs_mountfs(mp); if (error) goto out_filestream_unmount; root = igrab(VFS_I(mp->m_rootip)); if (!root) { error = -ENOENT; goto out_unmount; } sb->s_root = d_make_root(root); if (!sb->s_root) { error = -ENOMEM; goto out_unmount; } return 0; out_filestream_unmount: xfs_filestream_unmount(mp); out_free_rtsb: xfs_rtmount_freesb(mp); out_free_sb: xfs_freesb(mp); out_free_scrub_stats: xchk_mount_stats_free(mp); out_free_stats: free_percpu(mp->m_stats.xs_stats); out_destroy_inodegc: xfs_inodegc_free_percpu(mp); out_destroy_counters: xfs_destroy_percpu_counters(mp); out_destroy_workqueues: xfs_destroy_mount_workqueues(mp); out_shutdown_devices: xfs_shutdown_devices(mp); return error; out_unmount: xfs_filestream_unmount(mp); xfs_unmountfs(mp); goto out_free_rtsb; } static int xfs_fs_get_tree( struct fs_context *fc) { return get_tree_bdev(fc, xfs_fs_fill_super); } static int xfs_remount_rw( struct xfs_mount *mp) { struct xfs_sb *sbp = &mp->m_sb; int error; if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp && xfs_readonly_buftarg(mp->m_logdev_targp)) { xfs_warn(mp, "ro->rw transition prohibited by read-only logdev"); return -EACCES; } if (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp)) { xfs_warn(mp, "ro->rw transition prohibited by read-only rtdev"); return -EACCES; } if (xfs_has_norecovery(mp)) { xfs_warn(mp, "ro->rw transition prohibited on norecovery mount"); return -EINVAL; } if (xfs_sb_is_v5(sbp) && xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { xfs_warn(mp, "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem", (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_UNKNOWN)); return -EINVAL; } xfs_clear_readonly(mp); /* * If this is the first remount to writeable state we might have some * superblock changes to update. */ if (mp->m_update_sb) { error = xfs_sync_sb(mp, false); if (error) { xfs_warn(mp, "failed to write sb changes"); return error; } mp->m_update_sb = false; } /* * Fill out the reserve pool if it is empty. Use the stashed value if * it is non-zero, otherwise go with the default. */ xfs_restore_resvblks(mp); xfs_log_work_queue(mp); xfs_blockgc_start(mp); /* Create the per-AG metadata reservation pool .*/ error = xfs_fs_reserve_ag_blocks(mp); if (error && error != -ENOSPC) return error; /* Re-enable the background inode inactivation worker. */ xfs_inodegc_start(mp); /* Restart zone reclaim */ xfs_zone_gc_start(mp); return 0; } static int xfs_remount_ro( struct xfs_mount *mp) { struct xfs_icwalk icw = { .icw_flags = XFS_ICWALK_FLAG_SYNC, }; int error; /* Flush all the dirty data to disk. */ error = sync_filesystem(mp->m_super); if (error) return error; /* * Cancel background eofb scanning so it cannot race with the final * log force+buftarg wait and deadlock the remount. */ xfs_blockgc_stop(mp); /* * Clear out all remaining COW staging extents and speculative post-EOF * preallocations so that we don't leave inodes requiring inactivation * cleanups during reclaim on a read-only mount. We must process every * cached inode, so this requires a synchronous cache scan. */ error = xfs_blockgc_free_space(mp, &icw); if (error) { xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); return error; } /* * Stop the inodegc background worker. xfs_fs_reconfigure already * flushed all pending inodegc work when it sync'd the filesystem. * The VFS holds s_umount, so we know that inodes cannot enter * xfs_fs_destroy_inode during a remount operation. In readonly mode * we send inodes straight to reclaim, so no inodes will be queued. */ xfs_inodegc_stop(mp); /* Stop zone reclaim */ xfs_zone_gc_stop(mp); /* Free the per-AG metadata reservation pool. */ xfs_fs_unreserve_ag_blocks(mp); /* * Before we sync the metadata, we need to free up the reserve block * pool so that the used block count in the superblock on disk is * correct at the end of the remount. Stash the current* reserve pool * size so that if we get remounted rw, we can return it to the same * size. */ xfs_save_resvblks(mp); xfs_log_clean(mp); xfs_set_readonly(mp); return 0; } /* * Logically we would return an error here to prevent users from believing * they might have changed mount options using remount which can't be changed. * * But unfortunately mount(8) adds all options from mtab and fstab to the mount * arguments in some cases so we can't blindly reject options, but have to * check for each specified option if it actually differs from the currently * set option and only reject it if that's the case. * * Until that is implemented we return success for every remount request, and * silently ignore all options that we can't actually change. */ static int xfs_fs_reconfigure( struct fs_context *fc) { struct xfs_mount *mp = XFS_M(fc->root->d_sb); struct xfs_mount *new_mp = fc->s_fs_info; int flags = fc->sb_flags; int error; new_mp->m_qflags &= ~XFS_QFLAGS_MNTOPTS; /* version 5 superblocks always support version counters. */ if (xfs_has_crc(mp)) fc->sb_flags |= SB_I_VERSION; error = xfs_fs_validate_params(new_mp); if (error) return error; /* Validate new max_atomic_write option before making other changes */ if (mp->m_awu_max_bytes != new_mp->m_awu_max_bytes) { error = xfs_set_max_atomic_write_opt(mp, new_mp->m_awu_max_bytes); if (error) return error; } /* inode32 -> inode64 */ if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) { mp->m_features &= ~XFS_FEAT_SMALL_INUMS; mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount); } /* inode64 -> inode32 */ if (!xfs_has_small_inums(mp) && xfs_has_small_inums(new_mp)) { mp->m_features |= XFS_FEAT_SMALL_INUMS; mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount); } /* * Now that mp has been modified according to the remount options, we * do a final option validation with xfs_finish_flags() just like it is * just like it is done during mount. We cannot use * done during mount. We cannot use xfs_finish_flags() on new_mp as it * contains only the user given options. */ error = xfs_finish_flags(mp); if (error) return error; /* ro -> rw */ if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) { error = xfs_remount_rw(mp); if (error) return error; } /* rw -> ro */ if (!xfs_is_readonly(mp) && (flags & SB_RDONLY)) { error = xfs_remount_ro(mp); if (error) return error; } return 0; } static void xfs_fs_free( struct fs_context *fc) { struct xfs_mount *mp = fc->s_fs_info; /* * mp is stored in the fs_context when it is initialized. * mp is transferred to the superblock on a successful mount, * but if an error occurs before the transfer we have to free * it here. */ if (mp) xfs_mount_free(mp); } static const struct fs_context_operations xfs_context_ops = { .parse_param = xfs_fs_parse_param, .get_tree = xfs_fs_get_tree, .reconfigure = xfs_fs_reconfigure, .free = xfs_fs_free, }; /* * WARNING: do not initialise any parameters in this function that depend on * mount option parsing having already been performed as this can be called from * fsopen() before any parameters have been set. */ static int xfs_init_fs_context( struct fs_context *fc) { struct xfs_mount *mp; int i; mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); if (!mp) return -ENOMEM; spin_lock_init(&mp->m_sb_lock); for (i = 0; i < XG_TYPE_MAX; i++) xa_init(&mp->m_groups[i].xa); mutex_init(&mp->m_growlock); mutex_init(&mp->m_metafile_resv_lock); INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker); INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); mp->m_kobj.kobject.kset = xfs_kset; /* * We don't create the finobt per-ag space reservation until after log * recovery, so we must set this to true so that an ifree transaction * started during log recovery will not depend on space reservations * for finobt expansion. */ mp->m_finobt_nores = true; /* * These can be overridden by the mount option parsing. */ mp->m_logbufs = -1; mp->m_logbsize = -1; mp->m_allocsize_log = 16; /* 64k */ xfs_hooks_init(&mp->m_dir_update_hooks); fc->s_fs_info = mp; fc->ops = &xfs_context_ops; return 0; } static void xfs_kill_sb( struct super_block *sb) { kill_block_super(sb); xfs_mount_free(XFS_M(sb)); } static struct file_system_type xfs_fs_type = { .owner = THIS_MODULE, .name = "xfs", .init_fs_context = xfs_init_fs_context, .parameters = xfs_fs_parameters, .kill_sb = xfs_kill_sb, .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME | FS_LBS, }; MODULE_ALIAS_FS("xfs"); STATIC int __init xfs_init_caches(void) { int error; xfs_buf_cache = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0, SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT, NULL); if (!xfs_buf_cache) goto out; xfs_log_ticket_cache = kmem_cache_create("xfs_log_ticket", sizeof(struct xlog_ticket), 0, 0, NULL); if (!xfs_log_ticket_cache) goto out_destroy_buf_cache; error = xfs_btree_init_cur_caches(); if (error) goto out_destroy_log_ticket_cache; error = rcbagbt_init_cur_cache(); if (error) goto out_destroy_btree_cur_cache; error = xfs_defer_init_item_caches(); if (error) goto out_destroy_rcbagbt_cur_cache; xfs_da_state_cache = kmem_cache_create("xfs_da_state", sizeof(struct xfs_da_state), 0, 0, NULL); if (!xfs_da_state_cache) goto out_destroy_defer_item_cache; xfs_ifork_cache = kmem_cache_create("xfs_ifork", sizeof(struct xfs_ifork), 0, 0, NULL); if (!xfs_ifork_cache) goto out_destroy_da_state_cache; xfs_trans_cache = kmem_cache_create("xfs_trans", sizeof(struct xfs_trans), 0, 0, NULL); if (!xfs_trans_cache) goto out_destroy_ifork_cache; /* * The size of the cache-allocated buf log item is the maximum * size possible under XFS. This wastes a little bit of memory, * but it is much faster. */ xfs_buf_item_cache = kmem_cache_create("xfs_buf_item", sizeof(struct xfs_buf_log_item), 0, 0, NULL); if (!xfs_buf_item_cache) goto out_destroy_trans_cache; xfs_efd_cache = kmem_cache_create("xfs_efd_item", xfs_efd_log_item_sizeof(XFS_EFD_MAX_FAST_EXTENTS), 0, 0, NULL); if (!xfs_efd_cache) goto out_destroy_buf_item_cache; xfs_efi_cache = kmem_cache_create("xfs_efi_item", xfs_efi_log_item_sizeof(XFS_EFI_MAX_FAST_EXTENTS), 0, 0, NULL); if (!xfs_efi_cache) goto out_destroy_efd_cache; xfs_inode_cache = kmem_cache_create("xfs_inode", sizeof(struct xfs_inode), 0, (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT), xfs_fs_inode_init_once); if (!xfs_inode_cache) goto out_destroy_efi_cache; xfs_ili_cache = kmem_cache_create("xfs_ili", sizeof(struct xfs_inode_log_item), 0, SLAB_RECLAIM_ACCOUNT, NULL); if (!xfs_ili_cache) goto out_destroy_inode_cache; xfs_icreate_cache = kmem_cache_create("xfs_icr", sizeof(struct xfs_icreate_item), 0, 0, NULL); if (!xfs_icreate_cache) goto out_destroy_ili_cache; xfs_rud_cache = kmem_cache_create("xfs_rud_item", sizeof(struct xfs_rud_log_item), 0, 0, NULL); if (!xfs_rud_cache) goto out_destroy_icreate_cache; xfs_rui_cache = kmem_cache_create("xfs_rui_item", xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS), 0, 0, NULL); if (!xfs_rui_cache) goto out_destroy_rud_cache; xfs_cud_cache = kmem_cache_create("xfs_cud_item", sizeof(struct xfs_cud_log_item), 0, 0, NULL); if (!xfs_cud_cache) goto out_destroy_rui_cache; xfs_cui_cache = kmem_cache_create("xfs_cui_item", xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS), 0, 0, NULL); if (!xfs_cui_cache) goto out_destroy_cud_cache; xfs_bud_cache = kmem_cache_create("xfs_bud_item", sizeof(struct xfs_bud_log_item), 0, 0, NULL); if (!xfs_bud_cache) goto out_destroy_cui_cache; xfs_bui_cache = kmem_cache_create("xfs_bui_item", xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS), 0, 0, NULL); if (!xfs_bui_cache) goto out_destroy_bud_cache; xfs_attrd_cache = kmem_cache_create("xfs_attrd_item", sizeof(struct xfs_attrd_log_item), 0, 0, NULL); if (!xfs_attrd_cache) goto out_destroy_bui_cache; xfs_attri_cache = kmem_cache_create("xfs_attri_item", sizeof(struct xfs_attri_log_item), 0, 0, NULL); if (!xfs_attri_cache) goto out_destroy_attrd_cache; xfs_iunlink_cache = kmem_cache_create("xfs_iul_item", sizeof(struct xfs_iunlink_item), 0, 0, NULL); if (!xfs_iunlink_cache) goto out_destroy_attri_cache; xfs_xmd_cache = kmem_cache_create("xfs_xmd_item", sizeof(struct xfs_xmd_log_item), 0, 0, NULL); if (!xfs_xmd_cache) goto out_destroy_iul_cache; xfs_xmi_cache = kmem_cache_create("xfs_xmi_item", sizeof(struct xfs_xmi_log_item), 0, 0, NULL); if (!xfs_xmi_cache) goto out_destroy_xmd_cache; xfs_parent_args_cache = kmem_cache_create("xfs_parent_args", sizeof(struct xfs_parent_args), 0, 0, NULL); if (!xfs_parent_args_cache) goto out_destroy_xmi_cache; return 0; out_destroy_xmi_cache: kmem_cache_destroy(xfs_xmi_cache); out_destroy_xmd_cache: kmem_cache_destroy(xfs_xmd_cache); out_destroy_iul_cache: kmem_cache_destroy(xfs_iunlink_cache); out_destroy_attri_cache: kmem_cache_destroy(xfs_attri_cache); out_destroy_attrd_cache: kmem_cache_destroy(xfs_attrd_cache); out_destroy_bui_cache: kmem_cache_destroy(xfs_bui_cache); out_destroy_bud_cache: kmem_cache_destroy(xfs_bud_cache); out_destroy_cui_cache: kmem_cache_destroy(xfs_cui_cache); out_destroy_cud_cache: kmem_cache_destroy(xfs_cud_cache); out_destroy_rui_cache: kmem_cache_destroy(xfs_rui_cache); out_destroy_rud_cache: kmem_cache_destroy(xfs_rud_cache); out_destroy_icreate_cache: kmem_cache_destroy(xfs_icreate_cache); out_destroy_ili_cache: kmem_cache_destroy(xfs_ili_cache); out_destroy_inode_cache: kmem_cache_destroy(xfs_inode_cache); out_destroy_efi_cache: kmem_cache_destroy(xfs_efi_cache); out_destroy_efd_cache: kmem_cache_destroy(xfs_efd_cache); out_destroy_buf_item_cache: kmem_cache_destroy(xfs_buf_item_cache); out_destroy_trans_cache: kmem_cache_destroy(xfs_trans_cache); out_destroy_ifork_cache: kmem_cache_destroy(xfs_ifork_cache); out_destroy_da_state_cache: kmem_cache_destroy(xfs_da_state_cache); out_destroy_defer_item_cache: xfs_defer_destroy_item_caches(); out_destroy_rcbagbt_cur_cache: rcbagbt_destroy_cur_cache(); out_destroy_btree_cur_cache: xfs_btree_destroy_cur_caches(); out_destroy_log_ticket_cache: kmem_cache_destroy(xfs_log_ticket_cache); out_destroy_buf_cache: kmem_cache_destroy(xfs_buf_cache); out: return -ENOMEM; } STATIC void xfs_destroy_caches(void) { /* * Make sure all delayed rcu free are flushed before we * destroy caches. */ rcu_barrier(); kmem_cache_destroy(xfs_parent_args_cache); kmem_cache_destroy(xfs_xmd_cache); kmem_cache_destroy(xfs_xmi_cache); kmem_cache_destroy(xfs_iunlink_cache); kmem_cache_destroy(xfs_attri_cache); kmem_cache_destroy(xfs_attrd_cache); kmem_cache_destroy(xfs_bui_cache); kmem_cache_destroy(xfs_bud_cache); kmem_cache_destroy(xfs_cui_cache); kmem_cache_destroy(xfs_cud_cache); kmem_cache_destroy(xfs_rui_cache); kmem_cache_destroy(xfs_rud_cache); kmem_cache_destroy(xfs_icreate_cache); kmem_cache_destroy(xfs_ili_cache); kmem_cache_destroy(xfs_inode_cache); kmem_cache_destroy(xfs_efi_cache); kmem_cache_destroy(xfs_efd_cache); kmem_cache_destroy(xfs_buf_item_cache); kmem_cache_destroy(xfs_trans_cache); kmem_cache_destroy(xfs_ifork_cache); kmem_cache_destroy(xfs_da_state_cache); xfs_defer_destroy_item_caches(); rcbagbt_destroy_cur_cache(); xfs_btree_destroy_cur_caches(); kmem_cache_destroy(xfs_log_ticket_cache); kmem_cache_destroy(xfs_buf_cache); } STATIC int __init xfs_init_workqueues(void) { /* * The allocation workqueue can be used in memory reclaim situations * (writepage path), and parallelism is only limited by the number of * AGs in all the filesystems mounted. Hence use the default large * max_active value for this workqueue. */ xfs_alloc_wq = alloc_workqueue("xfsalloc", XFS_WQFLAGS(WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_PERCPU), 0); if (!xfs_alloc_wq) return -ENOMEM; xfs_discard_wq = alloc_workqueue("xfsdiscard", XFS_WQFLAGS(WQ_UNBOUND), 0); if (!xfs_discard_wq) goto out_free_alloc_wq; return 0; out_free_alloc_wq: destroy_workqueue(xfs_alloc_wq); return -ENOMEM; } STATIC void xfs_destroy_workqueues(void) { destroy_workqueue(xfs_discard_wq); destroy_workqueue(xfs_alloc_wq); } STATIC int __init init_xfs_fs(void) { int error; xfs_check_ondisk_structs(); error = xfs_dahash_test(); if (error) return error; printk(KERN_INFO XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n"); xfs_dir_startup(); error = xfs_init_caches(); if (error) goto out; error = xfs_init_workqueues(); if (error) goto out_destroy_caches; error = xfs_mru_cache_init(); if (error) goto out_destroy_wq; error = xfs_init_procfs(); if (error) goto out_mru_cache_uninit; error = xfs_sysctl_register(); if (error) goto out_cleanup_procfs; xfs_debugfs = xfs_debugfs_mkdir("xfs", NULL); xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj); if (!xfs_kset) { error = -ENOMEM; goto out_debugfs_unregister; } xfsstats.xs_kobj.kobject.kset = xfs_kset; xfsstats.xs_stats = alloc_percpu(struct xfsstats); if (!xfsstats.xs_stats) { error = -ENOMEM; goto out_kset_unregister; } error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL, "stats"); if (error) goto out_free_stats; error = xchk_global_stats_setup(xfs_debugfs); if (error) goto out_remove_stats_kobj; #ifdef DEBUG xfs_dbg_kobj.kobject.kset = xfs_kset; error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug"); if (error) goto out_remove_scrub_stats; #endif error = xfs_qm_init(); if (error) goto out_remove_dbg_kobj; error = register_filesystem(&xfs_fs_type); if (error) goto out_qm_exit; return 0; out_qm_exit: xfs_qm_exit(); out_remove_dbg_kobj: #ifdef DEBUG xfs_sysfs_del(&xfs_dbg_kobj); out_remove_scrub_stats: #endif xchk_global_stats_teardown(); out_remove_stats_kobj: xfs_sysfs_del(&xfsstats.xs_kobj); out_free_stats: free_percpu(xfsstats.xs_stats); out_kset_unregister: kset_unregister(xfs_kset); out_debugfs_unregister: debugfs_remove(xfs_debugfs); xfs_sysctl_unregister(); out_cleanup_procfs: xfs_cleanup_procfs(); out_mru_cache_uninit: xfs_mru_cache_uninit(); out_destroy_wq: xfs_destroy_workqueues(); out_destroy_caches: xfs_destroy_caches(); out: return error; } STATIC void __exit exit_xfs_fs(void) { xfs_qm_exit(); unregister_filesystem(&xfs_fs_type); #ifdef DEBUG xfs_sysfs_del(&xfs_dbg_kobj); #endif xchk_global_stats_teardown(); xfs_sysfs_del(&xfsstats.xs_kobj); free_percpu(xfsstats.xs_stats); kset_unregister(xfs_kset); debugfs_remove(xfs_debugfs); xfs_sysctl_unregister(); xfs_cleanup_procfs(); xfs_mru_cache_uninit(); xfs_destroy_workqueues(); xfs_destroy_caches(); xfs_uuid_table_free(); } module_init(init_xfs_fs); module_exit(exit_xfs_fs); MODULE_AUTHOR("Silicon Graphics, Inc."); MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled"); MODULE_LICENSE("GPL");
7 36 37 36 122 121 121 182 182 182 186 185 186 186 187 187 25 25 25 122 120 122 3 3 36 37 8 8 8 1 1 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 // SPDX-License-Identifier: GPL-2.0 #include "blk-rq-qos.h" /* * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded, * false if 'v' + 1 would be bigger than 'below'. */ static bool atomic_inc_below(atomic_t *v, unsigned int below) { unsigned int cur = atomic_read(v); do { if (cur >= below) return false; } while (!atomic_try_cmpxchg(v, &cur, cur + 1)); return true; } bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit) { return atomic_inc_below(&rq_wait->inflight, limit); } void __rq_qos_cleanup(struct rq_qos *rqos, struct bio *bio) { do { if (rqos->ops->cleanup) rqos->ops->cleanup(rqos, bio); rqos = rqos->next; } while (rqos); } void __rq_qos_done(struct rq_qos *rqos, struct request *rq) { do { if (rqos->ops->done) rqos->ops->done(rqos, rq); rqos = rqos->next; } while (rqos); } void __rq_qos_issue(struct rq_qos *rqos, struct request *rq) { do { if (rqos->ops->issue) rqos->ops->issue(rqos, rq); rqos = rqos->next; } while (rqos); } void __rq_qos_requeue(struct rq_qos *rqos, struct request *rq) { do { if (rqos->ops->requeue) rqos->ops->requeue(rqos, rq); rqos = rqos->next; } while (rqos); } void __rq_qos_throttle(struct rq_qos *rqos, struct bio *bio) { do { if (rqos->ops->throttle) rqos->ops->throttle(rqos, bio); rqos = rqos->next; } while (rqos); } void __rq_qos_track(struct rq_qos *rqos, struct request *rq, struct bio *bio) { do { if (rqos->ops->track) rqos->ops->track(rqos, rq, bio); rqos = rqos->next; } while (rqos); } void __rq_qos_merge(struct rq_qos *rqos, struct request *rq, struct bio *bio) { do { if (rqos->ops->merge) rqos->ops->merge(rqos, rq, bio); rqos = rqos->next; } while (rqos); } void __rq_qos_done_bio(struct rq_qos *rqos, struct bio *bio) { do { if (rqos->ops->done_bio) rqos->ops->done_bio(rqos, bio); rqos = rqos->next; } while (rqos); } void __rq_qos_queue_depth_changed(struct rq_qos *rqos) { do { if (rqos->ops->queue_depth_changed) rqos->ops->queue_depth_changed(rqos); rqos = rqos->next; } while (rqos); } /* * Return true, if we can't increase the depth further by scaling */ bool rq_depth_calc_max_depth(struct rq_depth *rqd) { unsigned int depth; bool ret = false; /* * For QD=1 devices, this is a special case. It's important for those * to have one request ready when one completes, so force a depth of * 2 for those devices. On the backend, it'll be a depth of 1 anyway, * since the device can't have more than that in flight. If we're * scaling down, then keep a setting of 1/1/1. */ if (rqd->queue_depth == 1) { if (rqd->scale_step > 0) rqd->max_depth = 1; else { rqd->max_depth = 2; ret = true; } } else { /* * scale_step == 0 is our default state. If we have suffered * latency spikes, step will be > 0, and we shrink the * allowed write depths. If step is < 0, we're only doing * writes, and we allow a temporarily higher depth to * increase performance. */ depth = min_t(unsigned int, rqd->default_depth, rqd->queue_depth); if (rqd->scale_step > 0) depth = 1 + ((depth - 1) >> min(31, rqd->scale_step)); else if (rqd->scale_step < 0) { unsigned int maxd = 3 * rqd->queue_depth / 4; depth = 1 + ((depth - 1) << -rqd->scale_step); if (depth > maxd) { depth = maxd; ret = true; } } rqd->max_depth = depth; } return ret; } /* Returns true on success and false if scaling up wasn't possible */ bool rq_depth_scale_up(struct rq_depth *rqd) { /* * Hit max in previous round, stop here */ if (rqd->scaled_max) return false; rqd->scale_step--; rqd->scaled_max = rq_depth_calc_max_depth(rqd); return true; } /* * Scale rwb down. If 'hard_throttle' is set, do it quicker, since we * had a latency violation. Returns true on success and returns false if * scaling down wasn't possible. */ bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle) { /* * Stop scaling down when we've hit the limit. This also prevents * ->scale_step from going to crazy values, if the device can't * keep up. */ if (rqd->max_depth == 1) return false; if (rqd->scale_step < 0 && hard_throttle) rqd->scale_step = 0; else rqd->scale_step++; rqd->scaled_max = false; rq_depth_calc_max_depth(rqd); return true; } struct rq_qos_wait_data { struct wait_queue_entry wq; struct rq_wait *rqw; acquire_inflight_cb_t *cb; void *private_data; bool got_token; }; static int rq_qos_wake_function(struct wait_queue_entry *curr, unsigned int mode, int wake_flags, void *key) { struct rq_qos_wait_data *data = container_of(curr, struct rq_qos_wait_data, wq); /* * If we fail to get a budget, return -1 to interrupt the wake up loop * in __wake_up_common. */ if (!data->cb(data->rqw, data->private_data)) return -1; data->got_token = true; /* * autoremove_wake_function() removes the wait entry only when it * actually changed the task state. We want the wait always removed. * Remove explicitly and use default_wake_function(). */ default_wake_function(curr, mode, wake_flags, key); /* * Note that the order of operations is important as finish_wait() * tests whether @curr is removed without grabbing the lock. This * should be the last thing to do to make sure we will not have a * UAF access to @data. And the semantics of memory barrier in it * also make sure the waiter will see the latest @data->got_token * once list_empty_careful() in finish_wait() returns true. */ list_del_init_careful(&curr->entry); return 1; } /** * rq_qos_wait - throttle on a rqw if we need to * @rqw: rqw to throttle on * @private_data: caller provided specific data * @acquire_inflight_cb: inc the rqw->inflight counter if we can * @cleanup_cb: the callback to cleanup in case we race with a waker * * This provides a uniform place for the rq_qos users to do their throttling. * Since you can end up with a lot of things sleeping at once, this manages the * waking up based on the resources available. The acquire_inflight_cb should * inc the rqw->inflight if we have the ability to do so, or return false if not * and then we will sleep until the room becomes available. * * cleanup_cb is in case that we race with a waker and need to cleanup the * inflight count accordingly. */ void rq_qos_wait(struct rq_wait *rqw, void *private_data, acquire_inflight_cb_t *acquire_inflight_cb, cleanup_cb_t *cleanup_cb) { struct rq_qos_wait_data data = { .rqw = rqw, .cb = acquire_inflight_cb, .private_data = private_data, .got_token = false, }; bool first_waiter; /* * If there are no waiters in the waiting queue, try to increase the * inflight counter if we can. Otherwise, prepare for adding ourselves * to the waiting queue. */ if (!waitqueue_active(&rqw->wait) && acquire_inflight_cb(rqw, private_data)) return; init_wait_func(&data.wq, rq_qos_wake_function); first_waiter = prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE); /* * Make sure there is at least one inflight process; otherwise, waiters * will never be woken up. Since there may be no inflight process before * adding ourselves to the waiting queue above, we need to try to * increase the inflight counter for ourselves. And it is sufficient to * guarantee that at least the first waiter to enter the waiting queue * will re-check the waiting condition before going to sleep, thus * ensuring forward progress. */ if (!data.got_token && first_waiter && acquire_inflight_cb(rqw, private_data)) { finish_wait(&rqw->wait, &data.wq); /* * We raced with rq_qos_wake_function() getting a token, * which means we now have two. Put our local token * and wake anyone else potentially waiting for one. * * Enough memory barrier in list_empty_careful() in * finish_wait() is paired with list_del_init_careful() * in rq_qos_wake_function() to make sure we will see * the latest @data->got_token. */ if (data.got_token) cleanup_cb(rqw, private_data); return; } /* we are now relying on the waker to increase our inflight counter. */ do { if (data.got_token) break; io_schedule(); set_current_state(TASK_UNINTERRUPTIBLE); } while (1); finish_wait(&rqw->wait, &data.wq); } void rq_qos_exit(struct request_queue *q) { mutex_lock(&q->rq_qos_mutex); while (q->rq_qos) { struct rq_qos *rqos = q->rq_qos; q->rq_qos = rqos->next; rqos->ops->exit(rqos); } blk_queue_flag_clear(QUEUE_FLAG_QOS_ENABLED, q); mutex_unlock(&q->rq_qos_mutex); } int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, const struct rq_qos_ops *ops) { struct request_queue *q = disk->queue; unsigned int memflags; lockdep_assert_held(&q->rq_qos_mutex); rqos->disk = disk; rqos->id = id; rqos->ops = ops; /* * No IO can be in-flight when adding rqos, so freeze queue, which * is fine since we only support rq_qos for blk-mq queue. */ memflags = blk_mq_freeze_queue(q); if (rq_qos_id(q, rqos->id)) goto ebusy; rqos->next = q->rq_qos; q->rq_qos = rqos; blk_queue_flag_set(QUEUE_FLAG_QOS_ENABLED, q); blk_mq_unfreeze_queue(q, memflags); if (rqos->ops->debugfs_attrs) { mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_rqos(rqos); mutex_unlock(&q->debugfs_mutex); } return 0; ebusy: blk_mq_unfreeze_queue(q, memflags); return -EBUSY; } void rq_qos_del(struct rq_qos *rqos) { struct request_queue *q = rqos->disk->queue; struct rq_qos **cur; unsigned int memflags; lockdep_assert_held(&q->rq_qos_mutex); memflags = blk_mq_freeze_queue(q); for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) { if (*cur == rqos) { *cur = rqos->next; break; } } if (!q->rq_qos) blk_queue_flag_clear(QUEUE_FLAG_QOS_ENABLED, q); blk_mq_unfreeze_queue(q, memflags); mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_rqos(rqos); mutex_unlock(&q->debugfs_mutex); }
34 102 101 1 4 1 1 1 3 101 42 106 106 105 105 2 106 105 1 1 1 1 1 1 1 105 105 1 2 1 103 104 105 103 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 105 5 3 3 3 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 // SPDX-License-Identifier: GPL-2.0-or-later /* * drivers/net/bond/bond_netlink.c - Netlink interface for bonding * Copyright (c) 2013 Jiri Pirko <jiri@resnulli.us> * Copyright (c) 2013 Scott Feldman <sfeldma@cumulusnetworks.com> */ #include <linux/module.h> #include <linux/errno.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/if_link.h> #include <linux/if_ether.h> #include <net/netlink.h> #include <net/rtnetlink.h> #include <net/bonding.h> #include <net/ipv6.h> static size_t bond_get_slave_size(const struct net_device *bond_dev, const struct net_device *slave_dev) { return nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_STATE */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_MII_STATUS */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_SLAVE_LINK_FAILURE_COUNT */ nla_total_size(MAX_ADDR_LEN) + /* IFLA_BOND_SLAVE_PERM_HWADDR */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_QUEUE_ID */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_AD_AGGREGATOR_ID */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE */ nla_total_size(sizeof(s32)) + /* IFLA_BOND_SLAVE_PRIO */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_ACTOR_PORT_PRIO */ 0; } static int bond_fill_slave_info(struct sk_buff *skb, const struct net_device *bond_dev, const struct net_device *slave_dev) { struct slave *slave = bond_slave_get_rtnl(slave_dev); if (nla_put_u8(skb, IFLA_BOND_SLAVE_STATE, bond_slave_state(slave))) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_SLAVE_MII_STATUS, slave->link)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_SLAVE_LINK_FAILURE_COUNT, slave->link_failure_count)) goto nla_put_failure; if (nla_put(skb, IFLA_BOND_SLAVE_PERM_HWADDR, slave_dev->addr_len, slave->perm_hwaddr)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BOND_SLAVE_QUEUE_ID, READ_ONCE(slave->queue_id))) goto nla_put_failure; if (nla_put_s32(skb, IFLA_BOND_SLAVE_PRIO, slave->prio)) goto nla_put_failure; if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { const struct aggregator *agg; const struct port *ad_port; ad_port = &SLAVE_AD_INFO(slave)->port; agg = SLAVE_AD_INFO(slave)->port.aggregator; if (agg) { if (nla_put_u16(skb, IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, agg->aggregator_identifier)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, ad_port->actor_oper_port_state)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, ad_port->partner_oper.port_state)) goto nla_put_failure; } if (nla_put_u16(skb, IFLA_BOND_SLAVE_ACTOR_PORT_PRIO, SLAVE_AD_INFO(slave)->port_priority)) goto nla_put_failure; } return 0; nla_put_failure: return -EMSGSIZE; } /* Limit the max delay range to 300s */ static const struct netlink_range_validation delay_range = { .max = 300000, }; static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { [IFLA_BOND_MODE] = { .type = NLA_U8 }, [IFLA_BOND_ACTIVE_SLAVE] = { .type = NLA_U32 }, [IFLA_BOND_MIIMON] = { .type = NLA_U32 }, [IFLA_BOND_UPDELAY] = { .type = NLA_U32 }, [IFLA_BOND_DOWNDELAY] = { .type = NLA_U32 }, [IFLA_BOND_USE_CARRIER] = { .type = NLA_U8 }, [IFLA_BOND_ARP_INTERVAL] = { .type = NLA_U32 }, [IFLA_BOND_ARP_IP_TARGET] = { .type = NLA_NESTED }, [IFLA_BOND_ARP_VALIDATE] = { .type = NLA_U32 }, [IFLA_BOND_ARP_ALL_TARGETS] = { .type = NLA_U32 }, [IFLA_BOND_PRIMARY] = { .type = NLA_U32 }, [IFLA_BOND_PRIMARY_RESELECT] = { .type = NLA_U8 }, [IFLA_BOND_FAIL_OVER_MAC] = { .type = NLA_U8 }, [IFLA_BOND_XMIT_HASH_POLICY] = { .type = NLA_U8 }, [IFLA_BOND_RESEND_IGMP] = { .type = NLA_U32 }, [IFLA_BOND_NUM_PEER_NOTIF] = { .type = NLA_U8 }, [IFLA_BOND_ALL_SLAVES_ACTIVE] = { .type = NLA_U8 }, [IFLA_BOND_MIN_LINKS] = { .type = NLA_U32 }, [IFLA_BOND_LP_INTERVAL] = { .type = NLA_U32 }, [IFLA_BOND_PACKETS_PER_SLAVE] = { .type = NLA_U32 }, [IFLA_BOND_AD_LACP_ACTIVE] = { .type = NLA_U8 }, [IFLA_BOND_AD_LACP_RATE] = { .type = NLA_U8 }, [IFLA_BOND_AD_SELECT] = { .type = NLA_U8 }, [IFLA_BOND_AD_INFO] = { .type = NLA_NESTED }, [IFLA_BOND_AD_ACTOR_SYS_PRIO] = { .type = NLA_U16 }, [IFLA_BOND_AD_USER_PORT_KEY] = { .type = NLA_U16 }, [IFLA_BOND_AD_ACTOR_SYSTEM] = { .type = NLA_BINARY, .len = ETH_ALEN }, [IFLA_BOND_TLB_DYNAMIC_LB] = { .type = NLA_U8 }, [IFLA_BOND_PEER_NOTIF_DELAY] = NLA_POLICY_FULL_RANGE(NLA_U32, &delay_range), [IFLA_BOND_MISSED_MAX] = { .type = NLA_U8 }, [IFLA_BOND_NS_IP6_TARGET] = { .type = NLA_NESTED }, [IFLA_BOND_COUPLED_CONTROL] = { .type = NLA_U8 }, [IFLA_BOND_BROADCAST_NEIGH] = { .type = NLA_U8 }, }; static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = { [IFLA_BOND_SLAVE_QUEUE_ID] = { .type = NLA_U16 }, [IFLA_BOND_SLAVE_PRIO] = { .type = NLA_S32 }, [IFLA_BOND_SLAVE_ACTOR_PORT_PRIO] = { .type = NLA_U16 }, }; static int bond_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { if (tb[IFLA_ADDRESS]) { if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) return -EINVAL; if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) return -EADDRNOTAVAIL; } return 0; } static int bond_slave_changelink(struct net_device *bond_dev, struct net_device *slave_dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct bonding *bond = netdev_priv(bond_dev); struct bond_opt_value newval; int err; if (!data) return 0; if (data[IFLA_BOND_SLAVE_QUEUE_ID]) { u16 queue_id = nla_get_u16(data[IFLA_BOND_SLAVE_QUEUE_ID]); char queue_id_str[IFNAMSIZ + 7]; /* queue_id option setting expects slave_name:queue_id */ snprintf(queue_id_str, sizeof(queue_id_str), "%s:%u\n", slave_dev->name, queue_id); bond_opt_initstr(&newval, queue_id_str); err = __bond_opt_set(bond, BOND_OPT_QUEUE_ID, &newval, data[IFLA_BOND_SLAVE_QUEUE_ID], extack); if (err) return err; } if (data[IFLA_BOND_SLAVE_PRIO]) { int prio = nla_get_s32(data[IFLA_BOND_SLAVE_PRIO]); bond_opt_slave_initval(&newval, &slave_dev, prio); err = __bond_opt_set(bond, BOND_OPT_PRIO, &newval, data[IFLA_BOND_SLAVE_PRIO], extack); if (err) return err; } if (data[IFLA_BOND_SLAVE_ACTOR_PORT_PRIO]) { u16 ad_prio = nla_get_u16(data[IFLA_BOND_SLAVE_ACTOR_PORT_PRIO]); bond_opt_slave_initval(&newval, &slave_dev, ad_prio); err = __bond_opt_set(bond, BOND_OPT_ACTOR_PORT_PRIO, &newval, data[IFLA_BOND_SLAVE_ACTOR_PORT_PRIO], extack); if (err) return err; } return 0; } static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct bonding *bond = netdev_priv(bond_dev); struct bond_opt_value newval; int miimon = 0; int err; if (!data) return 0; if (data[IFLA_BOND_MODE]) { int mode = nla_get_u8(data[IFLA_BOND_MODE]); bond_opt_initval(&newval, mode); err = __bond_opt_set(bond, BOND_OPT_MODE, &newval, data[IFLA_BOND_MODE], extack); if (err) return err; } if (data[IFLA_BOND_ACTIVE_SLAVE]) { int ifindex = nla_get_u32(data[IFLA_BOND_ACTIVE_SLAVE]); struct net_device *slave_dev; char *active_slave = ""; if (ifindex != 0) { slave_dev = __dev_get_by_index(dev_net(bond_dev), ifindex); if (!slave_dev) return -ENODEV; active_slave = slave_dev->name; } bond_opt_initstr(&newval, active_slave); err = __bond_opt_set(bond, BOND_OPT_ACTIVE_SLAVE, &newval, data[IFLA_BOND_ACTIVE_SLAVE], extack); if (err) return err; } if (data[IFLA_BOND_MIIMON]) { miimon = nla_get_u32(data[IFLA_BOND_MIIMON]); bond_opt_initval(&newval, miimon); err = __bond_opt_set(bond, BOND_OPT_MIIMON, &newval, data[IFLA_BOND_MIIMON], extack); if (err) return err; } if (data[IFLA_BOND_UPDELAY]) { int updelay = nla_get_u32(data[IFLA_BOND_UPDELAY]); bond_opt_initval(&newval, updelay); err = __bond_opt_set(bond, BOND_OPT_UPDELAY, &newval, data[IFLA_BOND_UPDELAY], extack); if (err) return err; } if (data[IFLA_BOND_DOWNDELAY]) { int downdelay = nla_get_u32(data[IFLA_BOND_DOWNDELAY]); bond_opt_initval(&newval, downdelay); err = __bond_opt_set(bond, BOND_OPT_DOWNDELAY, &newval, data[IFLA_BOND_DOWNDELAY], extack); if (err) return err; } if (data[IFLA_BOND_PEER_NOTIF_DELAY]) { int delay = nla_get_u32(data[IFLA_BOND_PEER_NOTIF_DELAY]); bond_opt_initval(&newval, delay); err = __bond_opt_set(bond, BOND_OPT_PEER_NOTIF_DELAY, &newval, data[IFLA_BOND_PEER_NOTIF_DELAY], extack); if (err) return err; } if (data[IFLA_BOND_USE_CARRIER]) { if (nla_get_u8(data[IFLA_BOND_USE_CARRIER]) != 1) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_BOND_USE_CARRIER], "option obsolete, use_carrier cannot be disabled"); return -EINVAL; } } if (data[IFLA_BOND_ARP_INTERVAL]) { int arp_interval = nla_get_u32(data[IFLA_BOND_ARP_INTERVAL]); if (arp_interval && miimon) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_BOND_ARP_INTERVAL], "ARP monitoring cannot be used with MII monitoring"); return -EINVAL; } bond_opt_initval(&newval, arp_interval); err = __bond_opt_set(bond, BOND_OPT_ARP_INTERVAL, &newval, data[IFLA_BOND_ARP_INTERVAL], extack); if (err) return err; } if (data[IFLA_BOND_ARP_IP_TARGET]) { struct nlattr *attr; int i = 0, rem; bond_option_arp_ip_targets_clear(bond); nla_for_each_nested(attr, data[IFLA_BOND_ARP_IP_TARGET], rem) { __be32 target; if (nla_len(attr) < sizeof(target)) return -EINVAL; target = nla_get_be32(attr); bond_opt_initval(&newval, (__force u64)target); err = __bond_opt_set(bond, BOND_OPT_ARP_TARGETS, &newval, data[IFLA_BOND_ARP_IP_TARGET], extack); if (err) break; i++; } if (i == 0 && bond->params.arp_interval) netdev_warn(bond->dev, "Removing last arp target with arp_interval on\n"); if (err) return err; } #if IS_ENABLED(CONFIG_IPV6) if (data[IFLA_BOND_NS_IP6_TARGET]) { struct nlattr *attr; int i = 0, rem; bond_option_ns_ip6_targets_clear(bond); nla_for_each_nested(attr, data[IFLA_BOND_NS_IP6_TARGET], rem) { struct in6_addr addr6; if (nla_len(attr) < sizeof(addr6)) { NL_SET_ERR_MSG(extack, "Invalid IPv6 address"); return -EINVAL; } addr6 = nla_get_in6_addr(attr); bond_opt_initextra(&newval, &addr6, sizeof(addr6)); err = __bond_opt_set(bond, BOND_OPT_NS_TARGETS, &newval, data[IFLA_BOND_NS_IP6_TARGET], extack); if (err) break; i++; } if (i == 0 && bond->params.arp_interval) netdev_warn(bond->dev, "Removing last ns target with arp_interval on\n"); if (err) return err; } #endif if (data[IFLA_BOND_ARP_VALIDATE]) { int arp_validate = nla_get_u32(data[IFLA_BOND_ARP_VALIDATE]); if (arp_validate && miimon) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_BOND_ARP_INTERVAL], "ARP validating cannot be used with MII monitoring"); return -EINVAL; } bond_opt_initval(&newval, arp_validate); err = __bond_opt_set(bond, BOND_OPT_ARP_VALIDATE, &newval, data[IFLA_BOND_ARP_VALIDATE], extack); if (err) return err; } if (data[IFLA_BOND_ARP_ALL_TARGETS]) { int arp_all_targets = nla_get_u32(data[IFLA_BOND_ARP_ALL_TARGETS]); bond_opt_initval(&newval, arp_all_targets); err = __bond_opt_set(bond, BOND_OPT_ARP_ALL_TARGETS, &newval, data[IFLA_BOND_ARP_ALL_TARGETS], extack); if (err) return err; } if (data[IFLA_BOND_PRIMARY]) { int ifindex = nla_get_u32(data[IFLA_BOND_PRIMARY]); struct net_device *dev; char *primary = ""; dev = __dev_get_by_index(dev_net(bond_dev), ifindex); if (dev) primary = dev->name; bond_opt_initstr(&newval, primary); err = __bond_opt_set(bond, BOND_OPT_PRIMARY, &newval, data[IFLA_BOND_PRIMARY], extack); if (err) return err; } if (data[IFLA_BOND_PRIMARY_RESELECT]) { int primary_reselect = nla_get_u8(data[IFLA_BOND_PRIMARY_RESELECT]); bond_opt_initval(&newval, primary_reselect); err = __bond_opt_set(bond, BOND_OPT_PRIMARY_RESELECT, &newval, data[IFLA_BOND_PRIMARY_RESELECT], extack); if (err) return err; } if (data[IFLA_BOND_FAIL_OVER_MAC]) { int fail_over_mac = nla_get_u8(data[IFLA_BOND_FAIL_OVER_MAC]); bond_opt_initval(&newval, fail_over_mac); err = __bond_opt_set(bond, BOND_OPT_FAIL_OVER_MAC, &newval, data[IFLA_BOND_FAIL_OVER_MAC], extack); if (err) return err; } if (data[IFLA_BOND_XMIT_HASH_POLICY]) { int xmit_hash_policy = nla_get_u8(data[IFLA_BOND_XMIT_HASH_POLICY]); bond_opt_initval(&newval, xmit_hash_policy); err = __bond_opt_set(bond, BOND_OPT_XMIT_HASH, &newval, data[IFLA_BOND_XMIT_HASH_POLICY], extack); if (err) return err; } if (data[IFLA_BOND_RESEND_IGMP]) { int resend_igmp = nla_get_u32(data[IFLA_BOND_RESEND_IGMP]); bond_opt_initval(&newval, resend_igmp); err = __bond_opt_set(bond, BOND_OPT_RESEND_IGMP, &newval, data[IFLA_BOND_RESEND_IGMP], extack); if (err) return err; } if (data[IFLA_BOND_NUM_PEER_NOTIF]) { int num_peer_notif = nla_get_u8(data[IFLA_BOND_NUM_PEER_NOTIF]); bond_opt_initval(&newval, num_peer_notif); err = __bond_opt_set(bond, BOND_OPT_NUM_PEER_NOTIF, &newval, data[IFLA_BOND_NUM_PEER_NOTIF], extack); if (err) return err; } if (data[IFLA_BOND_ALL_SLAVES_ACTIVE]) { int all_slaves_active = nla_get_u8(data[IFLA_BOND_ALL_SLAVES_ACTIVE]); bond_opt_initval(&newval, all_slaves_active); err = __bond_opt_set(bond, BOND_OPT_ALL_SLAVES_ACTIVE, &newval, data[IFLA_BOND_ALL_SLAVES_ACTIVE], extack); if (err) return err; } if (data[IFLA_BOND_MIN_LINKS]) { int min_links = nla_get_u32(data[IFLA_BOND_MIN_LINKS]); bond_opt_initval(&newval, min_links); err = __bond_opt_set(bond, BOND_OPT_MINLINKS, &newval, data[IFLA_BOND_MIN_LINKS], extack); if (err) return err; } if (data[IFLA_BOND_LP_INTERVAL]) { int lp_interval = nla_get_u32(data[IFLA_BOND_LP_INTERVAL]); bond_opt_initval(&newval, lp_interval); err = __bond_opt_set(bond, BOND_OPT_LP_INTERVAL, &newval, data[IFLA_BOND_LP_INTERVAL], extack); if (err) return err; } if (data[IFLA_BOND_PACKETS_PER_SLAVE]) { int packets_per_slave = nla_get_u32(data[IFLA_BOND_PACKETS_PER_SLAVE]); bond_opt_initval(&newval, packets_per_slave); err = __bond_opt_set(bond, BOND_OPT_PACKETS_PER_SLAVE, &newval, data[IFLA_BOND_PACKETS_PER_SLAVE], extack); if (err) return err; } if (data[IFLA_BOND_AD_LACP_ACTIVE]) { int lacp_active = nla_get_u8(data[IFLA_BOND_AD_LACP_ACTIVE]); bond_opt_initval(&newval, lacp_active); err = __bond_opt_set(bond, BOND_OPT_LACP_ACTIVE, &newval, data[IFLA_BOND_AD_LACP_ACTIVE], extack); if (err) return err; } if (data[IFLA_BOND_AD_LACP_RATE]) { int lacp_rate = nla_get_u8(data[IFLA_BOND_AD_LACP_RATE]); bond_opt_initval(&newval, lacp_rate); err = __bond_opt_set(bond, BOND_OPT_LACP_RATE, &newval, data[IFLA_BOND_AD_LACP_RATE], extack); if (err) return err; } if (data[IFLA_BOND_AD_SELECT]) { int ad_select = nla_get_u8(data[IFLA_BOND_AD_SELECT]); bond_opt_initval(&newval, ad_select); err = __bond_opt_set(bond, BOND_OPT_AD_SELECT, &newval, data[IFLA_BOND_AD_SELECT], extack); if (err) return err; } if (data[IFLA_BOND_AD_ACTOR_SYS_PRIO]) { int actor_sys_prio = nla_get_u16(data[IFLA_BOND_AD_ACTOR_SYS_PRIO]); bond_opt_initval(&newval, actor_sys_prio); err = __bond_opt_set(bond, BOND_OPT_AD_ACTOR_SYS_PRIO, &newval, data[IFLA_BOND_AD_ACTOR_SYS_PRIO], extack); if (err) return err; } if (data[IFLA_BOND_AD_USER_PORT_KEY]) { int port_key = nla_get_u16(data[IFLA_BOND_AD_USER_PORT_KEY]); bond_opt_initval(&newval, port_key); err = __bond_opt_set(bond, BOND_OPT_AD_USER_PORT_KEY, &newval, data[IFLA_BOND_AD_USER_PORT_KEY], extack); if (err) return err; } if (data[IFLA_BOND_AD_ACTOR_SYSTEM]) { if (nla_len(data[IFLA_BOND_AD_ACTOR_SYSTEM]) != ETH_ALEN) return -EINVAL; bond_opt_initval(&newval, nla_get_u64(data[IFLA_BOND_AD_ACTOR_SYSTEM])); err = __bond_opt_set(bond, BOND_OPT_AD_ACTOR_SYSTEM, &newval, data[IFLA_BOND_AD_ACTOR_SYSTEM], extack); if (err) return err; } if (data[IFLA_BOND_TLB_DYNAMIC_LB]) { int dynamic_lb = nla_get_u8(data[IFLA_BOND_TLB_DYNAMIC_LB]); bond_opt_initval(&newval, dynamic_lb); err = __bond_opt_set(bond, BOND_OPT_TLB_DYNAMIC_LB, &newval, data[IFLA_BOND_TLB_DYNAMIC_LB], extack); if (err) return err; } if (data[IFLA_BOND_MISSED_MAX]) { int missed_max = nla_get_u8(data[IFLA_BOND_MISSED_MAX]); bond_opt_initval(&newval, missed_max); err = __bond_opt_set(bond, BOND_OPT_MISSED_MAX, &newval, data[IFLA_BOND_MISSED_MAX], extack); if (err) return err; } if (data[IFLA_BOND_COUPLED_CONTROL]) { int coupled_control = nla_get_u8(data[IFLA_BOND_COUPLED_CONTROL]); bond_opt_initval(&newval, coupled_control); err = __bond_opt_set(bond, BOND_OPT_COUPLED_CONTROL, &newval, data[IFLA_BOND_COUPLED_CONTROL], extack); if (err) return err; } if (data[IFLA_BOND_BROADCAST_NEIGH]) { int broadcast_neigh = nla_get_u8(data[IFLA_BOND_BROADCAST_NEIGH]); bond_opt_initval(&newval, broadcast_neigh); err = __bond_opt_set(bond, BOND_OPT_BROADCAST_NEIGH, &newval, data[IFLA_BOND_BROADCAST_NEIGH], extack); if (err) return err; } return 0; } static int bond_newlink(struct net_device *bond_dev, struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { struct bonding *bond = netdev_priv(bond_dev); struct nlattr **data = params->data; struct nlattr **tb = params->tb; int err; err = register_netdevice(bond_dev); if (err) return err; netif_carrier_off(bond_dev); bond_work_init_all(bond); err = bond_changelink(bond_dev, tb, data, extack); if (err) { bond_work_cancel_all(bond); unregister_netdevice(bond_dev); } return err; } static size_t bond_get_size(const struct net_device *bond_dev) { return nla_total_size(sizeof(u8)) + /* IFLA_BOND_MODE */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_ACTIVE_SLAVE */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_MIIMON */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_UPDELAY */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_DOWNDELAY */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_USE_CARRIER */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_ARP_INTERVAL */ /* IFLA_BOND_ARP_IP_TARGET */ nla_total_size(sizeof(struct nlattr)) + nla_total_size(sizeof(u32)) * BOND_MAX_ARP_TARGETS + nla_total_size(sizeof(u32)) + /* IFLA_BOND_ARP_VALIDATE */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_ARP_ALL_TARGETS */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_PRIMARY */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_PRIMARY_RESELECT */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_FAIL_OVER_MAC */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_XMIT_HASH_POLICY */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_RESEND_IGMP */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_NUM_PEER_NOTIF */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_ALL_SLAVES_ACTIVE */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_MIN_LINKS */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_LP_INTERVAL */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_PACKETS_PER_SLAVE */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_LACP_ACTIVE */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_LACP_RATE */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_SELECT */ nla_total_size(sizeof(struct nlattr)) + /* IFLA_BOND_AD_INFO */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_AGGREGATOR */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_NUM_PORTS */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_ACTOR_KEY */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_PARTNER_KEY*/ nla_total_size(ETH_ALEN) + /* IFLA_BOND_AD_INFO_PARTNER_MAC*/ nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_ACTOR_SYS_PRIO */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_USER_PORT_KEY */ nla_total_size(ETH_ALEN) + /* IFLA_BOND_AD_ACTOR_SYSTEM */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_TLB_DYNAMIC_LB */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_PEER_NOTIF_DELAY */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_MISSED_MAX */ /* IFLA_BOND_NS_IP6_TARGET */ nla_total_size(sizeof(struct nlattr)) + nla_total_size(sizeof(struct in6_addr)) * BOND_MAX_NS_TARGETS + nla_total_size(sizeof(u8)) + /* IFLA_BOND_COUPLED_CONTROL */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_BROADCAST_NEIGH */ 0; } static int bond_option_active_slave_get_ifindex(struct bonding *bond) { const struct net_device *slave; int ifindex; rcu_read_lock(); slave = bond_option_active_slave_get_rcu(bond); ifindex = slave ? slave->ifindex : 0; rcu_read_unlock(); return ifindex; } static int bond_fill_info(struct sk_buff *skb, const struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); unsigned int packets_per_slave; int ifindex, i, targets_added; struct nlattr *targets; struct slave *primary; if (nla_put_u8(skb, IFLA_BOND_MODE, BOND_MODE(bond))) goto nla_put_failure; ifindex = bond_option_active_slave_get_ifindex(bond); if (ifindex && nla_put_u32(skb, IFLA_BOND_ACTIVE_SLAVE, ifindex)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_MIIMON, bond->params.miimon)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_UPDELAY, bond->params.updelay * bond->params.miimon)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_DOWNDELAY, bond->params.downdelay * bond->params.miimon)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_PEER_NOTIF_DELAY, bond->params.peer_notif_delay * bond->params.miimon)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_USE_CARRIER, 1)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_ARP_INTERVAL, bond->params.arp_interval)) goto nla_put_failure; targets = nla_nest_start_noflag(skb, IFLA_BOND_ARP_IP_TARGET); if (!targets) goto nla_put_failure; targets_added = 0; for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) { if (bond->params.arp_targets[i]) { if (nla_put_be32(skb, i, bond->params.arp_targets[i])) goto nla_put_failure; targets_added = 1; } } if (targets_added) nla_nest_end(skb, targets); else nla_nest_cancel(skb, targets); if (nla_put_u32(skb, IFLA_BOND_ARP_VALIDATE, bond->params.arp_validate)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_ARP_ALL_TARGETS, bond->params.arp_all_targets)) goto nla_put_failure; #if IS_ENABLED(CONFIG_IPV6) targets = nla_nest_start(skb, IFLA_BOND_NS_IP6_TARGET); if (!targets) goto nla_put_failure; targets_added = 0; for (i = 0; i < BOND_MAX_NS_TARGETS; i++) { if (!ipv6_addr_any(&bond->params.ns_targets[i])) { if (nla_put_in6_addr(skb, i, &bond->params.ns_targets[i])) goto nla_put_failure; targets_added = 1; } } if (targets_added) nla_nest_end(skb, targets); else nla_nest_cancel(skb, targets); #endif primary = rtnl_dereference(bond->primary_slave); if (primary && nla_put_u32(skb, IFLA_BOND_PRIMARY, primary->dev->ifindex)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_PRIMARY_RESELECT, bond->params.primary_reselect)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_FAIL_OVER_MAC, bond->params.fail_over_mac)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_XMIT_HASH_POLICY, bond->params.xmit_policy)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_RESEND_IGMP, bond->params.resend_igmp)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_NUM_PEER_NOTIF, bond->params.num_peer_notif)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_ALL_SLAVES_ACTIVE, bond->params.all_slaves_active)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_MIN_LINKS, bond->params.min_links)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_LP_INTERVAL, bond->params.lp_interval)) goto nla_put_failure; packets_per_slave = bond->params.packets_per_slave; if (nla_put_u32(skb, IFLA_BOND_PACKETS_PER_SLAVE, packets_per_slave)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_AD_LACP_ACTIVE, bond->params.lacp_active)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_AD_LACP_RATE, bond->params.lacp_fast)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_AD_SELECT, bond->params.ad_select)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_TLB_DYNAMIC_LB, bond->params.tlb_dynamic_lb)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_MISSED_MAX, bond->params.missed_max)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_COUPLED_CONTROL, bond->params.coupled_control)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_BROADCAST_NEIGH, bond->params.broadcast_neighbor)) goto nla_put_failure; if (BOND_MODE(bond) == BOND_MODE_8023AD) { struct ad_info info; if (capable(CAP_NET_ADMIN)) { if (nla_put_u16(skb, IFLA_BOND_AD_ACTOR_SYS_PRIO, bond->params.ad_actor_sys_prio)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BOND_AD_USER_PORT_KEY, bond->params.ad_user_port_key)) goto nla_put_failure; if (nla_put(skb, IFLA_BOND_AD_ACTOR_SYSTEM, ETH_ALEN, &bond->params.ad_actor_system)) goto nla_put_failure; } if (!bond_3ad_get_active_agg_info(bond, &info)) { struct nlattr *nest; nest = nla_nest_start_noflag(skb, IFLA_BOND_AD_INFO); if (!nest) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BOND_AD_INFO_AGGREGATOR, info.aggregator_id)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BOND_AD_INFO_NUM_PORTS, info.ports)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BOND_AD_INFO_ACTOR_KEY, info.actor_key)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BOND_AD_INFO_PARTNER_KEY, info.partner_key)) goto nla_put_failure; if (nla_put(skb, IFLA_BOND_AD_INFO_PARTNER_MAC, sizeof(info.partner_system), &info.partner_system)) goto nla_put_failure; nla_nest_end(skb, nest); } } return 0; nla_put_failure: return -EMSGSIZE; } static size_t bond_get_linkxstats_size(const struct net_device *dev, int attr) { switch (attr) { case IFLA_STATS_LINK_XSTATS: case IFLA_STATS_LINK_XSTATS_SLAVE: break; default: return 0; } return bond_3ad_stats_size() + nla_total_size(0); } static int bond_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev, int *prividx, int attr) { struct nlattr *nla __maybe_unused; struct slave *slave = NULL; struct nlattr *nest, *nest2; struct bonding *bond; switch (attr) { case IFLA_STATS_LINK_XSTATS: bond = netdev_priv(dev); break; case IFLA_STATS_LINK_XSTATS_SLAVE: slave = bond_slave_get_rtnl(dev); if (!slave) return 0; bond = slave->bond; break; default: return -EINVAL; } nest = nla_nest_start_noflag(skb, LINK_XSTATS_TYPE_BOND); if (!nest) return -EMSGSIZE; if (BOND_MODE(bond) == BOND_MODE_8023AD) { struct bond_3ad_stats *stats; if (slave) stats = &SLAVE_AD_INFO(slave)->stats; else stats = &BOND_AD_INFO(bond).stats; nest2 = nla_nest_start_noflag(skb, BOND_XSTATS_3AD); if (!nest2) { nla_nest_end(skb, nest); return -EMSGSIZE; } if (bond_3ad_stats_fill(skb, stats)) { nla_nest_cancel(skb, nest2); nla_nest_end(skb, nest); return -EMSGSIZE; } nla_nest_end(skb, nest2); } nla_nest_end(skb, nest); return 0; } struct rtnl_link_ops bond_link_ops __read_mostly = { .kind = "bond", .priv_size = sizeof(struct bonding), .setup = bond_setup, .maxtype = IFLA_BOND_MAX, .policy = bond_policy, .validate = bond_validate, .newlink = bond_newlink, .changelink = bond_changelink, .get_size = bond_get_size, .fill_info = bond_fill_info, .get_num_tx_queues = bond_get_num_tx_queues, .get_num_rx_queues = bond_get_num_tx_queues, /* Use the same number as for TX queues */ .fill_linkxstats = bond_fill_linkxstats, .get_linkxstats_size = bond_get_linkxstats_size, .slave_maxtype = IFLA_BOND_SLAVE_MAX, .slave_policy = bond_slave_policy, .slave_changelink = bond_slave_changelink, .get_slave_size = bond_get_slave_size, .fill_slave_info = bond_fill_slave_info, }; int __init bond_netlink_init(void) { return rtnl_link_register(&bond_link_ops); } void bond_netlink_fini(void) { rtnl_link_unregister(&bond_link_ops); } MODULE_ALIAS_RTNL_LINK("bond");
16 16 16 16 12 12 16 16 12 15 16 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 // SPDX-License-Identifier: GPL-2.0-or-later /* * Poly1305 authenticator algorithm, RFC7539 * * Copyright (C) 2015 Martin Willi * * Based on public domain code by Andrew Moon and Daniel J. Bernstein. */ #include <crypto/internal/poly1305.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/string.h> #include <linux/unaligned.h> #ifdef CONFIG_CRYPTO_LIB_POLY1305_ARCH #include "poly1305.h" /* $(SRCARCH)/poly1305.h */ #else #define poly1305_block_init poly1305_block_init_generic #define poly1305_blocks poly1305_blocks_generic #define poly1305_emit poly1305_emit_generic #endif void poly1305_init(struct poly1305_desc_ctx *desc, const u8 key[POLY1305_KEY_SIZE]) { desc->s[0] = get_unaligned_le32(key + 16); desc->s[1] = get_unaligned_le32(key + 20); desc->s[2] = get_unaligned_le32(key + 24); desc->s[3] = get_unaligned_le32(key + 28); desc->buflen = 0; poly1305_block_init(&desc->state, key); } EXPORT_SYMBOL(poly1305_init); void poly1305_update(struct poly1305_desc_ctx *desc, const u8 *src, unsigned int nbytes) { if (desc->buflen + nbytes >= POLY1305_BLOCK_SIZE) { unsigned int bulk_len; if (desc->buflen) { unsigned int l = POLY1305_BLOCK_SIZE - desc->buflen; memcpy(&desc->buf[desc->buflen], src, l); src += l; nbytes -= l; poly1305_blocks(&desc->state, desc->buf, POLY1305_BLOCK_SIZE, 1); desc->buflen = 0; } bulk_len = round_down(nbytes, POLY1305_BLOCK_SIZE); nbytes %= POLY1305_BLOCK_SIZE; if (bulk_len) { poly1305_blocks(&desc->state, src, bulk_len, 1); src += bulk_len; } } if (nbytes) { memcpy(&desc->buf[desc->buflen], src, nbytes); desc->buflen += nbytes; } } EXPORT_SYMBOL(poly1305_update); void poly1305_final(struct poly1305_desc_ctx *desc, u8 *dst) { if (unlikely(desc->buflen)) { desc->buf[desc->buflen++] = 1; memset(desc->buf + desc->buflen, 0, POLY1305_BLOCK_SIZE - desc->buflen); poly1305_blocks(&desc->state, desc->buf, POLY1305_BLOCK_SIZE, 0); } poly1305_emit(&desc->state.h, dst, desc->s); *desc = (struct poly1305_desc_ctx){}; } EXPORT_SYMBOL(poly1305_final); #ifdef poly1305_mod_init_arch static int __init poly1305_mod_init(void) { poly1305_mod_init_arch(); return 0; } subsys_initcall(poly1305_mod_init); static void __exit poly1305_mod_exit(void) { } module_exit(poly1305_mod_exit); #endif MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Poly1305 authenticator algorithm, RFC7539");
21 1 21 19 9 5 5 1 1 5 19 19 9 1 5 1 1 7 13 13 13 14 14 14 13 13 14 13 5 5 5 5 5 5 5 5 5 5 5 5 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 // SPDX-License-Identifier: GPL-2.0-or-later /* Decoder for ASN.1 BER/DER/CER encoded bytestream * * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/export.h> #include <linux/kernel.h> #include <linux/errno.h> #include <linux/module.h> #include <linux/asn1_decoder.h> #include <linux/asn1_ber_bytecode.h> static const unsigned char asn1_op_lengths[ASN1_OP__NR] = { /* OPC TAG JMP ACT */ [ASN1_OP_MATCH] = 1 + 1, [ASN1_OP_MATCH_OR_SKIP] = 1 + 1, [ASN1_OP_MATCH_ACT] = 1 + 1 + 1, [ASN1_OP_MATCH_ACT_OR_SKIP] = 1 + 1 + 1, [ASN1_OP_MATCH_JUMP] = 1 + 1 + 1, [ASN1_OP_MATCH_JUMP_OR_SKIP] = 1 + 1 + 1, [ASN1_OP_MATCH_ANY] = 1, [ASN1_OP_MATCH_ANY_OR_SKIP] = 1, [ASN1_OP_MATCH_ANY_ACT] = 1 + 1, [ASN1_OP_MATCH_ANY_ACT_OR_SKIP] = 1 + 1, [ASN1_OP_COND_MATCH_OR_SKIP] = 1 + 1, [ASN1_OP_COND_MATCH_ACT_OR_SKIP] = 1 + 1 + 1, [ASN1_OP_COND_MATCH_JUMP_OR_SKIP] = 1 + 1 + 1, [ASN1_OP_COND_MATCH_ANY] = 1, [ASN1_OP_COND_MATCH_ANY_OR_SKIP] = 1, [ASN1_OP_COND_MATCH_ANY_ACT] = 1 + 1, [ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP] = 1 + 1, [ASN1_OP_COND_FAIL] = 1, [ASN1_OP_COMPLETE] = 1, [ASN1_OP_ACT] = 1 + 1, [ASN1_OP_MAYBE_ACT] = 1 + 1, [ASN1_OP_RETURN] = 1, [ASN1_OP_END_SEQ] = 1, [ASN1_OP_END_SEQ_OF] = 1 + 1, [ASN1_OP_END_SET] = 1, [ASN1_OP_END_SET_OF] = 1 + 1, [ASN1_OP_END_SEQ_ACT] = 1 + 1, [ASN1_OP_END_SEQ_OF_ACT] = 1 + 1 + 1, [ASN1_OP_END_SET_ACT] = 1 + 1, [ASN1_OP_END_SET_OF_ACT] = 1 + 1 + 1, }; /* * Find the length of an indefinite length object * @data: The data buffer * @datalen: The end of the innermost containing element in the buffer * @_dp: The data parse cursor (updated before returning) * @_len: Where to return the size of the element. * @_errmsg: Where to return a pointer to an error message on error */ static int asn1_find_indefinite_length(const unsigned char *data, size_t datalen, size_t *_dp, size_t *_len, const char **_errmsg) { unsigned char tag, tmp; size_t dp = *_dp, len, n; int indef_level = 1; next_tag: if (unlikely(datalen - dp < 2)) { if (datalen == dp) goto missing_eoc; goto data_overrun_error; } /* Extract a tag from the data */ tag = data[dp++]; if (tag == ASN1_EOC) { /* It appears to be an EOC. */ if (data[dp++] != 0) goto invalid_eoc; if (--indef_level <= 0) { *_len = dp - *_dp; *_dp = dp; return 0; } goto next_tag; } if (unlikely((tag & 0x1f) == ASN1_LONG_TAG)) { do { if (unlikely(datalen - dp < 2)) goto data_overrun_error; tmp = data[dp++]; } while (tmp & 0x80); } /* Extract the length */ len = data[dp++]; if (len <= 0x7f) goto check_length; if (unlikely(len == ASN1_INDEFINITE_LENGTH)) { /* Indefinite length */ if (unlikely((tag & ASN1_CONS_BIT) == ASN1_PRIM << 5)) goto indefinite_len_primitive; indef_level++; goto next_tag; } n = len - 0x80; if (unlikely(n > sizeof(len) - 1)) goto length_too_long; if (unlikely(n > datalen - dp)) goto data_overrun_error; len = 0; for (; n > 0; n--) { len <<= 8; len |= data[dp++]; } check_length: if (len > datalen - dp) goto data_overrun_error; dp += len; goto next_tag; length_too_long: *_errmsg = "Unsupported length"; goto error; indefinite_len_primitive: *_errmsg = "Indefinite len primitive not permitted"; goto error; invalid_eoc: *_errmsg = "Invalid length EOC"; goto error; data_overrun_error: *_errmsg = "Data overrun error"; goto error; missing_eoc: *_errmsg = "Missing EOC in indefinite len cons"; error: *_dp = dp; return -1; } /** * asn1_ber_decoder - Decoder BER/DER/CER ASN.1 according to pattern * @decoder: The decoder definition (produced by asn1_compiler) * @context: The caller's context (to be passed to the action functions) * @data: The encoded data * @datalen: The size of the encoded data * * Decode BER/DER/CER encoded ASN.1 data according to a bytecode pattern * produced by asn1_compiler. Action functions are called on marked tags to * allow the caller to retrieve significant data. * * LIMITATIONS: * * To keep down the amount of stack used by this function, the following limits * have been imposed: * * (1) This won't handle datalen > 65535 without increasing the size of the * cons stack elements and length_too_long checking. * * (2) The stack of constructed types is 10 deep. If the depth of non-leaf * constructed types exceeds this, the decode will fail. * * (3) The SET type (not the SET OF type) isn't really supported as tracking * what members of the set have been seen is a pain. */ int asn1_ber_decoder(const struct asn1_decoder *decoder, void *context, const unsigned char *data, size_t datalen) { const unsigned char *machine = decoder->machine; const asn1_action_t *actions = decoder->actions; size_t machlen = decoder->machlen; enum asn1_opcode op; unsigned char tag = 0, csp = 0, jsp = 0, optag = 0, hdr = 0; const char *errmsg; size_t pc = 0, dp = 0, tdp = 0, len = 0; int ret; unsigned char flags = 0; #define FLAG_INDEFINITE_LENGTH 0x01 #define FLAG_MATCHED 0x02 #define FLAG_LAST_MATCHED 0x04 /* Last tag matched */ #define FLAG_CONS 0x20 /* Corresponds to CONS bit in the opcode tag * - ie. whether or not we are going to parse * a compound type. */ #define NR_CONS_STACK 10 unsigned short cons_dp_stack[NR_CONS_STACK]; unsigned short cons_datalen_stack[NR_CONS_STACK]; unsigned char cons_hdrlen_stack[NR_CONS_STACK]; #define NR_JUMP_STACK 10 unsigned char jump_stack[NR_JUMP_STACK]; if (datalen > 65535) return -EMSGSIZE; next_op: pr_debug("next_op: pc=\e[32m%zu\e[m/%zu dp=\e[33m%zu\e[m/%zu C=%d J=%d\n", pc, machlen, dp, datalen, csp, jsp); if (unlikely(pc >= machlen)) goto machine_overrun_error; op = machine[pc]; if (unlikely(pc + asn1_op_lengths[op] > machlen)) goto machine_overrun_error; /* If this command is meant to match a tag, then do that before * evaluating the command. */ if (op <= ASN1_OP__MATCHES_TAG) { unsigned char tmp; /* Skip conditional matches if possible */ if ((op & ASN1_OP_MATCH__COND && flags & FLAG_MATCHED) || (op & ASN1_OP_MATCH__SKIP && dp == datalen)) { flags &= ~FLAG_LAST_MATCHED; pc += asn1_op_lengths[op]; goto next_op; } flags = 0; hdr = 2; /* Extract a tag from the data */ if (unlikely(datalen - dp < 2)) goto data_overrun_error; tag = data[dp++]; if (unlikely((tag & 0x1f) == ASN1_LONG_TAG)) goto long_tag_not_supported; if (op & ASN1_OP_MATCH__ANY) { pr_debug("- any %02x\n", tag); } else { /* Extract the tag from the machine * - Either CONS or PRIM are permitted in the data if * CONS is not set in the op stream, otherwise CONS * is mandatory. */ optag = machine[pc + 1]; flags |= optag & FLAG_CONS; /* Determine whether the tag matched */ tmp = optag ^ tag; tmp &= ~(optag & ASN1_CONS_BIT); pr_debug("- match? %02x %02x %02x\n", tag, optag, tmp); if (tmp != 0) { /* All odd-numbered tags are MATCH_OR_SKIP. */ if (op & ASN1_OP_MATCH__SKIP) { pc += asn1_op_lengths[op]; dp--; goto next_op; } goto tag_mismatch; } } flags |= FLAG_MATCHED; len = data[dp++]; if (len > 0x7f) { if (unlikely(len == ASN1_INDEFINITE_LENGTH)) { /* Indefinite length */ if (unlikely(!(tag & ASN1_CONS_BIT))) goto indefinite_len_primitive; flags |= FLAG_INDEFINITE_LENGTH; if (unlikely(2 > datalen - dp)) goto data_overrun_error; } else { int n = len - 0x80; if (unlikely(n > 2)) goto length_too_long; if (unlikely(n > datalen - dp)) goto data_overrun_error; hdr += n; for (len = 0; n > 0; n--) { len <<= 8; len |= data[dp++]; } if (unlikely(len > datalen - dp)) goto data_overrun_error; } } else { if (unlikely(len > datalen - dp)) goto data_overrun_error; } if (flags & FLAG_CONS) { /* For expected compound forms, we stack the positions * of the start and end of the data. */ if (unlikely(csp >= NR_CONS_STACK)) goto cons_stack_overflow; cons_dp_stack[csp] = dp; cons_hdrlen_stack[csp] = hdr; if (!(flags & FLAG_INDEFINITE_LENGTH)) { cons_datalen_stack[csp] = datalen; datalen = dp + len; } else { cons_datalen_stack[csp] = 0; } csp++; } pr_debug("- TAG: %02x %zu%s\n", tag, len, flags & FLAG_CONS ? " CONS" : ""); tdp = dp; } /* Decide how to handle the operation */ switch (op) { case ASN1_OP_MATCH: case ASN1_OP_MATCH_OR_SKIP: case ASN1_OP_MATCH_ACT: case ASN1_OP_MATCH_ACT_OR_SKIP: case ASN1_OP_MATCH_ANY: case ASN1_OP_MATCH_ANY_OR_SKIP: case ASN1_OP_MATCH_ANY_ACT: case ASN1_OP_MATCH_ANY_ACT_OR_SKIP: case ASN1_OP_COND_MATCH_OR_SKIP: case ASN1_OP_COND_MATCH_ACT_OR_SKIP: case ASN1_OP_COND_MATCH_ANY: case ASN1_OP_COND_MATCH_ANY_OR_SKIP: case ASN1_OP_COND_MATCH_ANY_ACT: case ASN1_OP_COND_MATCH_ANY_ACT_OR_SKIP: if (!(flags & FLAG_CONS)) { if (flags & FLAG_INDEFINITE_LENGTH) { size_t tmp = dp; ret = asn1_find_indefinite_length( data, datalen, &tmp, &len, &errmsg); if (ret < 0) goto error; } pr_debug("- LEAF: %zu\n", len); } if (op & ASN1_OP_MATCH__ACT) { unsigned char act; if (op & ASN1_OP_MATCH__ANY) act = machine[pc + 1]; else act = machine[pc + 2]; ret = actions[act](context, hdr, tag, data + dp, len); if (ret < 0) return ret; } if (!(flags & FLAG_CONS)) dp += len; pc += asn1_op_lengths[op]; goto next_op; case ASN1_OP_MATCH_JUMP: case ASN1_OP_MATCH_JUMP_OR_SKIP: case ASN1_OP_COND_MATCH_JUMP_OR_SKIP: pr_debug("- MATCH_JUMP\n"); if (unlikely(jsp == NR_JUMP_STACK)) goto jump_stack_overflow; jump_stack[jsp++] = pc + asn1_op_lengths[op]; pc = machine[pc + 2]; goto next_op; case ASN1_OP_COND_FAIL: if (unlikely(!(flags & FLAG_MATCHED))) goto tag_mismatch; pc += asn1_op_lengths[op]; goto next_op; case ASN1_OP_COMPLETE: if (unlikely(jsp != 0 || csp != 0)) { pr_err("ASN.1 decoder error: Stacks not empty at completion (%u, %u)\n", jsp, csp); return -EBADMSG; } return 0; case ASN1_OP_END_SET: case ASN1_OP_END_SET_ACT: if (unlikely(!(flags & FLAG_MATCHED))) goto tag_mismatch; fallthrough; case ASN1_OP_END_SEQ: case ASN1_OP_END_SET_OF: case ASN1_OP_END_SEQ_OF: case ASN1_OP_END_SEQ_ACT: case ASN1_OP_END_SET_OF_ACT: case ASN1_OP_END_SEQ_OF_ACT: if (unlikely(csp <= 0)) goto cons_stack_underflow; csp--; tdp = cons_dp_stack[csp]; hdr = cons_hdrlen_stack[csp]; len = datalen; datalen = cons_datalen_stack[csp]; pr_debug("- end cons t=%zu dp=%zu l=%zu/%zu\n", tdp, dp, len, datalen); if (datalen == 0) { /* Indefinite length - check for the EOC. */ datalen = len; if (unlikely(datalen - dp < 2)) goto data_overrun_error; if (data[dp++] != 0) { if (op & ASN1_OP_END__OF) { dp--; csp++; pc = machine[pc + 1]; pr_debug("- continue\n"); goto next_op; } goto missing_eoc; } if (data[dp++] != 0) goto invalid_eoc; len = dp - tdp - 2; } else { if (dp < len && (op & ASN1_OP_END__OF)) { datalen = len; csp++; pc = machine[pc + 1]; pr_debug("- continue\n"); goto next_op; } if (dp != len) goto cons_length_error; len -= tdp; pr_debug("- cons len l=%zu d=%zu\n", len, dp - tdp); } if (op & ASN1_OP_END__ACT) { unsigned char act; if (op & ASN1_OP_END__OF) act = machine[pc + 2]; else act = machine[pc + 1]; ret = actions[act](context, hdr, 0, data + tdp, len); if (ret < 0) return ret; } pc += asn1_op_lengths[op]; goto next_op; case ASN1_OP_MAYBE_ACT: if (!(flags & FLAG_LAST_MATCHED)) { pc += asn1_op_lengths[op]; goto next_op; } fallthrough; case ASN1_OP_ACT: ret = actions[machine[pc + 1]](context, hdr, tag, data + tdp, len); if (ret < 0) return ret; pc += asn1_op_lengths[op]; goto next_op; case ASN1_OP_RETURN: if (unlikely(jsp <= 0)) goto jump_stack_underflow; pc = jump_stack[--jsp]; flags |= FLAG_MATCHED | FLAG_LAST_MATCHED; goto next_op; default: break; } /* Shouldn't reach here */ pr_err("ASN.1 decoder error: Found reserved opcode (%u) pc=%zu\n", op, pc); return -EBADMSG; data_overrun_error: errmsg = "Data overrun error"; goto error; machine_overrun_error: errmsg = "Machine overrun error"; goto error; jump_stack_underflow: errmsg = "Jump stack underflow"; goto error; jump_stack_overflow: errmsg = "Jump stack overflow"; goto error; cons_stack_underflow: errmsg = "Cons stack underflow"; goto error; cons_stack_overflow: errmsg = "Cons stack overflow"; goto error; cons_length_error: errmsg = "Cons length error"; goto error; missing_eoc: errmsg = "Missing EOC in indefinite len cons"; goto error; invalid_eoc: errmsg = "Invalid length EOC"; goto error; length_too_long: errmsg = "Unsupported length"; goto error; indefinite_len_primitive: errmsg = "Indefinite len primitive not permitted"; goto error; tag_mismatch: errmsg = "Unexpected tag"; goto error; long_tag_not_supported: errmsg = "Long tag not supported"; error: pr_debug("\nASN1: %s [m=%zu d=%zu ot=%02x t=%02x l=%zu]\n", errmsg, pc, dp, optag, tag, len); return -EBADMSG; } EXPORT_SYMBOL_GPL(asn1_ber_decoder); MODULE_DESCRIPTION("Decoder for ASN.1 BER/DER/CER encoded bytestream"); MODULE_LICENSE("GPL");
3242 437 5 69 681 789 2 3705 2779 128 3518 753 3696 7 6 6 611 611 284 23 39 21 15 14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Red Black Trees (C) 1999 Andrea Arcangeli <andrea@suse.de> linux/include/linux/rbtree.h To use rbtrees you'll have to implement your own insert and search cores. This will avoid us to use callbacks and to drop drammatically performances. I know it's not the cleaner way, but in C (not in C++) to get performances and genericity... See Documentation/core-api/rbtree.rst for documentation and samples. */ #ifndef _LINUX_RBTREE_H #define _LINUX_RBTREE_H #include <linux/container_of.h> #include <linux/rbtree_types.h> #include <linux/stddef.h> #include <linux/rcupdate.h> #define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) #define rb_entry(ptr, type, member) container_of(ptr, type, member) #define RB_EMPTY_ROOT(root) (READ_ONCE((root)->rb_node) == NULL) /* 'empty' nodes are nodes that are known not to be inserted in an rbtree */ #define RB_EMPTY_NODE(node) \ ((node)->__rb_parent_color == (unsigned long)(node)) #define RB_CLEAR_NODE(node) \ ((node)->__rb_parent_color = (unsigned long)(node)) extern void rb_insert_color(struct rb_node *, struct rb_root *); extern void rb_erase(struct rb_node *, struct rb_root *); /* Find logical next and previous nodes in a tree */ extern struct rb_node *rb_next(const struct rb_node *); extern struct rb_node *rb_prev(const struct rb_node *); /* * This function returns the first node (in sort order) of the tree. */ static inline struct rb_node *rb_first(const struct rb_root *root) { struct rb_node *n; n = root->rb_node; if (!n) return NULL; while (n->rb_left) n = n->rb_left; return n; } /* * This function returns the last node (in sort order) of the tree. */ static inline struct rb_node *rb_last(const struct rb_root *root) { struct rb_node *n; n = root->rb_node; if (!n) return NULL; while (n->rb_right) n = n->rb_right; return n; } /* Postorder iteration - always visit the parent after its children */ extern struct rb_node *rb_first_postorder(const struct rb_root *); extern struct rb_node *rb_next_postorder(const struct rb_node *); /* Fast replacement of a single node without remove/rebalance/add/rebalance */ extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root); extern void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new, struct rb_root *root); static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, struct rb_node **rb_link) { node->__rb_parent_color = (unsigned long)parent; node->rb_left = node->rb_right = NULL; *rb_link = node; } static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent, struct rb_node **rb_link) { node->__rb_parent_color = (unsigned long)parent; node->rb_left = node->rb_right = NULL; rcu_assign_pointer(*rb_link, node); } #define rb_entry_safe(ptr, type, member) \ ({ typeof(ptr) ____ptr = (ptr); \ ____ptr ? rb_entry(____ptr, type, member) : NULL; \ }) /** * rbtree_postorder_for_each_entry_safe - iterate in post-order over rb_root of * given type allowing the backing memory of @pos to be invalidated * * @pos: the 'type *' to use as a loop cursor. * @n: another 'type *' to use as temporary storage * @root: 'rb_root *' of the rbtree. * @field: the name of the rb_node field within 'type'. * * rbtree_postorder_for_each_entry_safe() provides a similar guarantee as * list_for_each_entry_safe() and allows the iteration to continue independent * of changes to @pos by the body of the loop. * * Note, however, that it cannot handle other modifications that re-order the * rbtree it is iterating over. This includes calling rb_erase() on @pos, as * rb_erase() may rebalance the tree, causing us to miss some nodes. */ #define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \ for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \ pos && ({ n = rb_entry_safe(rb_next_postorder(&pos->field), \ typeof(*pos), field); 1; }); \ pos = n) /* Same as rb_first(), but O(1) */ #define rb_first_cached(root) (root)->rb_leftmost static inline void rb_insert_color_cached(struct rb_node *node, struct rb_root_cached *root, bool leftmost) { if (leftmost) root->rb_leftmost = node; rb_insert_color(node, &root->rb_root); } static inline struct rb_node * rb_erase_cached(struct rb_node *node, struct rb_root_cached *root) { struct rb_node *leftmost = NULL; if (root->rb_leftmost == node) leftmost = root->rb_leftmost = rb_next(node); rb_erase(node, &root->rb_root); return leftmost; } static inline void rb_replace_node_cached(struct rb_node *victim, struct rb_node *new, struct rb_root_cached *root) { if (root->rb_leftmost == victim) root->rb_leftmost = new; rb_replace_node(victim, new, &root->rb_root); } /* * The below helper functions use 2 operators with 3 different * calling conventions. The operators are related like: * * comp(a->key,b) < 0 := less(a,b) * comp(a->key,b) > 0 := less(b,a) * comp(a->key,b) == 0 := !less(a,b) && !less(b,a) * * If these operators define a partial order on the elements we make no * guarantee on which of the elements matching the key is found. See * rb_find(). * * The reason for this is to allow the find() interface without requiring an * on-stack dummy object, which might not be feasible due to object size. */ /** * rb_add_cached() - insert @node into the leftmost cached tree @tree * @node: node to insert * @tree: leftmost cached tree to insert @node into * @less: operator defining the (partial) node order * * Returns @node when it is the new leftmost, or NULL. */ static __always_inline struct rb_node * rb_add_cached(struct rb_node *node, struct rb_root_cached *tree, bool (*less)(struct rb_node *, const struct rb_node *)) { struct rb_node **link = &tree->rb_root.rb_node; struct rb_node *parent = NULL; bool leftmost = true; while (*link) { parent = *link; if (less(node, parent)) { link = &parent->rb_left; } else { link = &parent->rb_right; leftmost = false; } } rb_link_node(node, parent, link); rb_insert_color_cached(node, tree, leftmost); return leftmost ? node : NULL; } /** * rb_add() - insert @node into @tree * @node: node to insert * @tree: tree to insert @node into * @less: operator defining the (partial) node order */ static __always_inline void rb_add(struct rb_node *node, struct rb_root *tree, bool (*less)(struct rb_node *, const struct rb_node *)) { struct rb_node **link = &tree->rb_node; struct rb_node *parent = NULL; while (*link) { parent = *link; if (less(node, parent)) link = &parent->rb_left; else link = &parent->rb_right; } rb_link_node(node, parent, link); rb_insert_color(node, tree); } /** * rb_find_add_cached() - find equivalent @node in @tree, or add @node * @node: node to look-for / insert * @tree: tree to search / modify * @cmp: operator defining the node order * * Returns the rb_node matching @node, or NULL when no match is found and @node * is inserted. */ static __always_inline struct rb_node * rb_find_add_cached(struct rb_node *node, struct rb_root_cached *tree, int (*cmp)(const struct rb_node *new, const struct rb_node *exist)) { bool leftmost = true; struct rb_node **link = &tree->rb_root.rb_node; struct rb_node *parent = NULL; int c; while (*link) { parent = *link; c = cmp(node, parent); if (c < 0) { link = &parent->rb_left; } else if (c > 0) { link = &parent->rb_right; leftmost = false; } else { return parent; } } rb_link_node(node, parent, link); rb_insert_color_cached(node, tree, leftmost); return NULL; } /** * rb_find_add() - find equivalent @node in @tree, or add @node * @node: node to look-for / insert * @tree: tree to search / modify * @cmp: operator defining the node order * * Returns the rb_node matching @node, or NULL when no match is found and @node * is inserted. */ static __always_inline struct rb_node * rb_find_add(struct rb_node *node, struct rb_root *tree, int (*cmp)(struct rb_node *, const struct rb_node *)) { struct rb_node **link = &tree->rb_node; struct rb_node *parent = NULL; int c; while (*link) { parent = *link; c = cmp(node, parent); if (c < 0) link = &parent->rb_left; else if (c > 0) link = &parent->rb_right; else return parent; } rb_link_node(node, parent, link); rb_insert_color(node, tree); return NULL; } /** * rb_find_add_rcu() - find equivalent @node in @tree, or add @node * @node: node to look-for / insert * @tree: tree to search / modify * @cmp: operator defining the node order * * Adds a Store-Release for link_node. * * Returns the rb_node matching @node, or NULL when no match is found and @node * is inserted. */ static __always_inline struct rb_node * rb_find_add_rcu(struct rb_node *node, struct rb_root *tree, int (*cmp)(struct rb_node *, const struct rb_node *)) { struct rb_node **link = &tree->rb_node; struct rb_node *parent = NULL; int c; while (*link) { parent = *link; c = cmp(node, parent); if (c < 0) link = &parent->rb_left; else if (c > 0) link = &parent->rb_right; else return parent; } rb_link_node_rcu(node, parent, link); rb_insert_color(node, tree); return NULL; } /** * rb_find() - find @key in tree @tree * @key: key to match * @tree: tree to search * @cmp: operator defining the node order * * Returns the rb_node matching @key or NULL. */ static __always_inline struct rb_node * rb_find(const void *key, const struct rb_root *tree, int (*cmp)(const void *key, const struct rb_node *)) { struct rb_node *node = tree->rb_node; while (node) { int c = cmp(key, node); if (c < 0) node = node->rb_left; else if (c > 0) node = node->rb_right; else return node; } return NULL; } /** * rb_find_rcu() - find @key in tree @tree * @key: key to match * @tree: tree to search * @cmp: operator defining the node order * * Notably, tree descent vs concurrent tree rotations is unsound and can result * in false-negatives. * * Returns the rb_node matching @key or NULL. */ static __always_inline struct rb_node * rb_find_rcu(const void *key, const struct rb_root *tree, int (*cmp)(const void *key, const struct rb_node *)) { struct rb_node *node = tree->rb_node; while (node) { int c = cmp(key, node); if (c < 0) node = rcu_dereference_raw(node->rb_left); else if (c > 0) node = rcu_dereference_raw(node->rb_right); else return node; } return NULL; } /** * rb_find_first() - find the first @key in @tree * @key: key to match * @tree: tree to search * @cmp: operator defining node order * * Returns the leftmost node matching @key, or NULL. */ static __always_inline struct rb_node * rb_find_first(const void *key, const struct rb_root *tree, int (*cmp)(const void *key, const struct rb_node *)) { struct rb_node *node = tree->rb_node; struct rb_node *match = NULL; while (node) { int c = cmp(key, node); if (c <= 0) { if (!c) match = node; node = node->rb_left; } else if (c > 0) { node = node->rb_right; } } return match; } /** * rb_next_match() - find the next @key in @tree * @key: key to match * @tree: tree to search * @cmp: operator defining node order * * Returns the next node matching @key, or NULL. */ static __always_inline struct rb_node * rb_next_match(const void *key, struct rb_node *node, int (*cmp)(const void *key, const struct rb_node *)) { node = rb_next(node); if (node && cmp(key, node)) node = NULL; return node; } /** * rb_for_each() - iterates a subtree matching @key * @node: iterator * @key: key to match * @tree: tree to search * @cmp: operator defining node order */ #define rb_for_each(node, key, tree, cmp) \ for ((node) = rb_find_first((key), (tree), (cmp)); \ (node); (node) = rb_next_match((key), (node), (cmp))) #endif /* _LINUX_RBTREE_H */
5 2 4 4 3 3 3 5 5 5 5 5 5 5 5 5 5 5 2 3 3 3 3 2 1 3 4 4 4 2 4 4 4 4 4 2 4 1 1 1 1 1 4 4 2 2 4 4 4 4 4 4 4 4 4 1 5 5 5 5 5 5 1 1 1 1 19 19 18 19 18 19 19 19 19 19 19 19 19 18 18 19 14 5 3 18 3 5 5 5 18 13 5 19 5 2 15 4 18 18 3 3 17 18 20 20 20 18 20 3 15 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2010 Red Hat, Inc. * Copyright (C) 2016-2023 Christoph Hellwig. */ #include <linux/iomap.h> #include <linux/buffer_head.h> #include <linux/writeback.h> #include <linux/swap.h> #include <linux/migrate.h> #include <linux/fserror.h> #include "internal.h" #include "trace.h" #include "../internal.h" /* * Structure allocated for each folio to track per-block uptodate, dirty state * and I/O completions. */ struct iomap_folio_state { spinlock_t state_lock; unsigned int read_bytes_pending; atomic_t write_bytes_pending; /* * Each block has two bits in this bitmap: * Bits [0..blocks_per_folio) has the uptodate status. * Bits [b_p_f...(2*b_p_f)) has the dirty status. */ unsigned long state[]; }; static inline bool ifs_is_fully_uptodate(struct folio *folio, struct iomap_folio_state *ifs) { struct inode *inode = folio->mapping->host; return bitmap_full(ifs->state, i_blocks_per_folio(inode, folio)); } /* * Find the next uptodate block in the folio. end_blk is inclusive. * If no uptodate block is found, this will return end_blk + 1. */ static unsigned ifs_next_uptodate_block(struct folio *folio, unsigned start_blk, unsigned end_blk) { struct iomap_folio_state *ifs = folio->private; return find_next_bit(ifs->state, end_blk + 1, start_blk); } /* * Find the next non-uptodate block in the folio. end_blk is inclusive. * If no non-uptodate block is found, this will return end_blk + 1. */ static unsigned ifs_next_nonuptodate_block(struct folio *folio, unsigned start_blk, unsigned end_blk) { struct iomap_folio_state *ifs = folio->private; return find_next_zero_bit(ifs->state, end_blk + 1, start_blk); } static bool ifs_set_range_uptodate(struct folio *folio, struct iomap_folio_state *ifs, size_t off, size_t len) { struct inode *inode = folio->mapping->host; unsigned int first_blk = off >> inode->i_blkbits; unsigned int last_blk = (off + len - 1) >> inode->i_blkbits; unsigned int nr_blks = last_blk - first_blk + 1; bitmap_set(ifs->state, first_blk, nr_blks); return ifs_is_fully_uptodate(folio, ifs); } static void iomap_set_range_uptodate(struct folio *folio, size_t off, size_t len) { struct iomap_folio_state *ifs = folio->private; unsigned long flags; bool uptodate = true; if (folio_test_uptodate(folio)) return; if (ifs) { spin_lock_irqsave(&ifs->state_lock, flags); uptodate = ifs_set_range_uptodate(folio, ifs, off, len); spin_unlock_irqrestore(&ifs->state_lock, flags); } if (uptodate) folio_mark_uptodate(folio); } /* * Find the next dirty block in the folio. end_blk is inclusive. * If no dirty block is found, this will return end_blk + 1. */ static unsigned ifs_next_dirty_block(struct folio *folio, unsigned start_blk, unsigned end_blk) { struct iomap_folio_state *ifs = folio->private; struct inode *inode = folio->mapping->host; unsigned int blks = i_blocks_per_folio(inode, folio); return find_next_bit(ifs->state, blks + end_blk + 1, blks + start_blk) - blks; } /* * Find the next clean block in the folio. end_blk is inclusive. * If no clean block is found, this will return end_blk + 1. */ static unsigned ifs_next_clean_block(struct folio *folio, unsigned start_blk, unsigned end_blk) { struct iomap_folio_state *ifs = folio->private; struct inode *inode = folio->mapping->host; unsigned int blks = i_blocks_per_folio(inode, folio); return find_next_zero_bit(ifs->state, blks + end_blk + 1, blks + start_blk) - blks; } static unsigned ifs_find_dirty_range(struct folio *folio, struct iomap_folio_state *ifs, u64 *range_start, u64 range_end) { struct inode *inode = folio->mapping->host; unsigned start_blk = offset_in_folio(folio, *range_start) >> inode->i_blkbits; unsigned end_blk = min_not_zero( offset_in_folio(folio, range_end) >> inode->i_blkbits, i_blocks_per_folio(inode, folio)) - 1; unsigned nblks; start_blk = ifs_next_dirty_block(folio, start_blk, end_blk); if (start_blk > end_blk) return 0; if (start_blk == end_blk) nblks = 1; else nblks = ifs_next_clean_block(folio, start_blk + 1, end_blk) - start_blk; *range_start = folio_pos(folio) + (start_blk << inode->i_blkbits); return nblks << inode->i_blkbits; } static unsigned iomap_find_dirty_range(struct folio *folio, u64 *range_start, u64 range_end) { struct iomap_folio_state *ifs = folio->private; if (*range_start >= range_end) return 0; if (ifs) return ifs_find_dirty_range(folio, ifs, range_start, range_end); return range_end - *range_start; } static void ifs_clear_range_dirty(struct folio *folio, struct iomap_folio_state *ifs, size_t off, size_t len) { struct inode *inode = folio->mapping->host; unsigned int blks_per_folio = i_blocks_per_folio(inode, folio); unsigned int first_blk = (off >> inode->i_blkbits); unsigned int last_blk = (off + len - 1) >> inode->i_blkbits; unsigned int nr_blks = last_blk - first_blk + 1; unsigned long flags; spin_lock_irqsave(&ifs->state_lock, flags); bitmap_clear(ifs->state, first_blk + blks_per_folio, nr_blks); spin_unlock_irqrestore(&ifs->state_lock, flags); } static void iomap_clear_range_dirty(struct folio *folio, size_t off, size_t len) { struct iomap_folio_state *ifs = folio->private; if (ifs) ifs_clear_range_dirty(folio, ifs, off, len); } static void ifs_set_range_dirty(struct folio *folio, struct iomap_folio_state *ifs, size_t off, size_t len) { struct inode *inode = folio->mapping->host; unsigned int blks_per_folio = i_blocks_per_folio(inode, folio); unsigned int first_blk = (off >> inode->i_blkbits); unsigned int last_blk = (off + len - 1) >> inode->i_blkbits; unsigned int nr_blks = last_blk - first_blk + 1; unsigned long flags; spin_lock_irqsave(&ifs->state_lock, flags); bitmap_set(ifs->state, first_blk + blks_per_folio, nr_blks); spin_unlock_irqrestore(&ifs->state_lock, flags); } static void iomap_set_range_dirty(struct folio *folio, size_t off, size_t len) { struct iomap_folio_state *ifs = folio->private; if (ifs) ifs_set_range_dirty(folio, ifs, off, len); } static struct iomap_folio_state *ifs_alloc(struct inode *inode, struct folio *folio, unsigned int flags) { struct iomap_folio_state *ifs = folio->private; unsigned int nr_blocks = i_blocks_per_folio(inode, folio); gfp_t gfp; if (ifs || nr_blocks <= 1) return ifs; if (flags & IOMAP_NOWAIT) gfp = GFP_NOWAIT; else gfp = GFP_NOFS | __GFP_NOFAIL; /* * ifs->state tracks two sets of state flags when the * filesystem block size is smaller than the folio size. * The first state tracks per-block uptodate and the * second tracks per-block dirty state. */ ifs = kzalloc(struct_size(ifs, state, BITS_TO_LONGS(2 * nr_blocks)), gfp); if (!ifs) return ifs; spin_lock_init(&ifs->state_lock); if (folio_test_uptodate(folio)) bitmap_set(ifs->state, 0, nr_blocks); if (folio_test_dirty(folio)) bitmap_set(ifs->state, nr_blocks, nr_blocks); folio_attach_private(folio, ifs); return ifs; } static void ifs_free(struct folio *folio) { struct iomap_folio_state *ifs = folio_detach_private(folio); if (!ifs) return; WARN_ON_ONCE(ifs->read_bytes_pending != 0); WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending)); WARN_ON_ONCE(ifs_is_fully_uptodate(folio, ifs) != folio_test_uptodate(folio)); kfree(ifs); } /* * Calculate how many bytes to truncate based off the number of blocks to * truncate and the end position to start truncating from. */ static size_t iomap_bytes_to_truncate(loff_t end_pos, unsigned block_bits, unsigned blocks_truncated) { unsigned block_size = 1 << block_bits; unsigned block_offset = end_pos & (block_size - 1); if (!block_offset) return blocks_truncated << block_bits; return ((blocks_truncated - 1) << block_bits) + block_offset; } /* * Calculate the range inside the folio that we actually need to read. */ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio, loff_t *pos, loff_t length, size_t *offp, size_t *lenp) { struct iomap_folio_state *ifs = folio->private; loff_t orig_pos = *pos; loff_t isize = i_size_read(inode); unsigned block_bits = inode->i_blkbits; unsigned block_size = (1 << block_bits); size_t poff = offset_in_folio(folio, *pos); size_t plen = min_t(loff_t, folio_size(folio) - poff, length); size_t orig_plen = plen; unsigned first = poff >> block_bits; unsigned last = (poff + plen - 1) >> block_bits; /* * If the block size is smaller than the page size, we need to check the * per-block uptodate status and adjust the offset and length if needed * to avoid reading in already uptodate ranges. */ if (ifs) { unsigned int next, blocks_skipped; next = ifs_next_nonuptodate_block(folio, first, last); blocks_skipped = next - first; if (blocks_skipped) { unsigned long block_offset = *pos & (block_size - 1); unsigned bytes_skipped = (blocks_skipped << block_bits) - block_offset; *pos += bytes_skipped; poff += bytes_skipped; plen -= bytes_skipped; } first = next; /* truncate len if we find any trailing uptodate block(s) */ if (++next <= last) { next = ifs_next_uptodate_block(folio, next, last); if (next <= last) { plen -= iomap_bytes_to_truncate(*pos + plen, block_bits, last - next + 1); last = next - 1; } } } /* * If the extent spans the block that contains the i_size, we need to * handle both halves separately so that we properly zero data in the * page cache for blocks that are entirely outside of i_size. */ if (orig_pos <= isize && orig_pos + orig_plen > isize) { unsigned end = offset_in_folio(folio, isize - 1) >> block_bits; if (first <= end && last > end) plen -= iomap_bytes_to_truncate(*pos + plen, block_bits, last - end); } *offp = poff; *lenp = plen; } static inline bool iomap_block_needs_zeroing(const struct iomap_iter *iter, loff_t pos) { const struct iomap *srcmap = iomap_iter_srcmap(iter); return srcmap->type != IOMAP_MAPPED || (srcmap->flags & IOMAP_F_NEW) || pos >= i_size_read(iter->inode); } /** * iomap_read_inline_data - copy inline data into the page cache * @iter: iteration structure * @folio: folio to copy to * * Copy the inline data in @iter into @folio and zero out the rest of the folio. * Only a single IOMAP_INLINE extent is allowed at the end of each file. * Returns zero for success to complete the read, or the usual negative errno. */ static int iomap_read_inline_data(const struct iomap_iter *iter, struct folio *folio) { const struct iomap *iomap = iomap_iter_srcmap(iter); size_t size = i_size_read(iter->inode) - iomap->offset; size_t offset = offset_in_folio(folio, iomap->offset); if (WARN_ON_ONCE(!iomap->inline_data)) return -EIO; if (folio_test_uptodate(folio)) return 0; if (WARN_ON_ONCE(size > iomap->length)) { fserror_report_io(iter->inode, FSERR_BUFFERED_READ, iomap->offset, size, -EIO, GFP_NOFS); return -EIO; } if (offset > 0) ifs_alloc(iter->inode, folio, iter->flags); folio_fill_tail(folio, offset, iomap->inline_data, size); iomap_set_range_uptodate(folio, offset, folio_size(folio) - offset); return 0; } void iomap_finish_folio_read(struct folio *folio, size_t off, size_t len, int error) { struct iomap_folio_state *ifs = folio->private; bool uptodate = !error; bool finished = true; if (ifs) { unsigned long flags; spin_lock_irqsave(&ifs->state_lock, flags); if (!error) uptodate = ifs_set_range_uptodate(folio, ifs, off, len); ifs->read_bytes_pending -= len; finished = !ifs->read_bytes_pending; spin_unlock_irqrestore(&ifs->state_lock, flags); } if (error) fserror_report_io(folio->mapping->host, FSERR_BUFFERED_READ, folio_pos(folio) + off, len, error, GFP_ATOMIC); if (finished) folio_end_read(folio, uptodate); } EXPORT_SYMBOL_GPL(iomap_finish_folio_read); static void iomap_read_init(struct folio *folio) { struct iomap_folio_state *ifs = folio->private; if (ifs) { size_t len = folio_size(folio); /* * ifs->read_bytes_pending is used to track how many bytes are * read in asynchronously by the IO helper. We need to track * this so that we can know when the IO helper has finished * reading in all the necessary ranges of the folio and can end * the read. * * Increase ->read_bytes_pending by the folio size to start, and * add a +1 bias. We'll subtract the bias and any uptodate / * zeroed ranges that did not require IO in iomap_read_end() * after we're done processing the folio. * * We do this because otherwise, we would have to increment * ifs->read_bytes_pending every time a range in the folio needs * to be read in, which can get expensive since the spinlock * needs to be held whenever modifying ifs->read_bytes_pending. * * We add the bias to ensure the read has not been ended on the * folio when iomap_read_end() is called, even if the IO helper * has already finished reading in the entire folio. */ spin_lock_irq(&ifs->state_lock); WARN_ON_ONCE(ifs->read_bytes_pending != 0); ifs->read_bytes_pending = len + 1; spin_unlock_irq(&ifs->state_lock); } } /* * This ends IO if no bytes were submitted to an IO helper. * * Otherwise, this calibrates ifs->read_bytes_pending to represent only the * submitted bytes (see comment in iomap_read_init()). If all bytes submitted * have already been completed by the IO helper, then this will end the read. * Else the IO helper will end the read after all submitted ranges have been * read. */ static void iomap_read_end(struct folio *folio, size_t bytes_submitted) { struct iomap_folio_state *ifs = folio->private; if (ifs) { bool end_read, uptodate; spin_lock_irq(&ifs->state_lock); if (!ifs->read_bytes_pending) { WARN_ON_ONCE(bytes_submitted); spin_unlock_irq(&ifs->state_lock); folio_unlock(folio); return; } /* * Subtract any bytes that were initially accounted to * read_bytes_pending but skipped for IO. The +1 accounts for * the bias we added in iomap_read_init(). */ ifs->read_bytes_pending -= (folio_size(folio) + 1 - bytes_submitted); /* * If !ifs->read_bytes_pending, this means all pending reads by * the IO helper have already completed, which means we need to * end the folio read here. If ifs->read_bytes_pending != 0, * the IO helper will end the folio read. */ end_read = !ifs->read_bytes_pending; if (end_read) uptodate = ifs_is_fully_uptodate(folio, ifs); spin_unlock_irq(&ifs->state_lock); if (end_read) folio_end_read(folio, uptodate); } else if (!bytes_submitted) { /* * If there were no bytes submitted, this means we are * responsible for unlocking the folio here, since no IO helper * has taken ownership of it. If there were bytes submitted, * then the IO helper will end the read via * iomap_finish_folio_read(). */ folio_unlock(folio); } } static int iomap_read_folio_iter(struct iomap_iter *iter, struct iomap_read_folio_ctx *ctx, size_t *bytes_submitted) { const struct iomap *iomap = &iter->iomap; loff_t pos = iter->pos; loff_t length = iomap_length(iter); struct folio *folio = ctx->cur_folio; size_t poff, plen; loff_t pos_diff; int ret; if (iomap->type == IOMAP_INLINE) { ret = iomap_read_inline_data(iter, folio); if (ret) return ret; return iomap_iter_advance(iter, length); } ifs_alloc(iter->inode, folio, iter->flags); length = min_t(loff_t, length, folio_size(folio) - offset_in_folio(folio, pos)); while (length) { iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff, &plen); pos_diff = pos - iter->pos; if (WARN_ON_ONCE(pos_diff + plen > length)) return -EIO; ret = iomap_iter_advance(iter, pos_diff); if (ret) return ret; if (plen == 0) return 0; /* zero post-eof blocks as the page may be mapped */ if (iomap_block_needs_zeroing(iter, pos)) { folio_zero_range(folio, poff, plen); iomap_set_range_uptodate(folio, poff, plen); } else { if (!*bytes_submitted) iomap_read_init(folio); ret = ctx->ops->read_folio_range(iter, ctx, plen); if (ret < 0) fserror_report_io(iter->inode, FSERR_BUFFERED_READ, pos, plen, ret, GFP_NOFS); if (ret) return ret; *bytes_submitted += plen; } ret = iomap_iter_advance(iter, plen); if (ret) return ret; length -= pos_diff + plen; pos = iter->pos; } return 0; } void iomap_read_folio(const struct iomap_ops *ops, struct iomap_read_folio_ctx *ctx, void *private) { struct folio *folio = ctx->cur_folio; struct iomap_iter iter = { .inode = folio->mapping->host, .pos = folio_pos(folio), .len = folio_size(folio), .private = private, }; size_t bytes_submitted = 0; int ret; trace_iomap_readpage(iter.inode, 1); while ((ret = iomap_iter(&iter, ops)) > 0) iter.status = iomap_read_folio_iter(&iter, ctx, &bytes_submitted); if (ctx->ops->submit_read) ctx->ops->submit_read(ctx); iomap_read_end(folio, bytes_submitted); } EXPORT_SYMBOL_GPL(iomap_read_folio); static int iomap_readahead_iter(struct iomap_iter *iter, struct iomap_read_folio_ctx *ctx, size_t *cur_bytes_submitted) { int ret; while (iomap_length(iter)) { if (ctx->cur_folio && offset_in_folio(ctx->cur_folio, iter->pos) == 0) { iomap_read_end(ctx->cur_folio, *cur_bytes_submitted); ctx->cur_folio = NULL; } if (!ctx->cur_folio) { ctx->cur_folio = readahead_folio(ctx->rac); if (WARN_ON_ONCE(!ctx->cur_folio)) return -EINVAL; *cur_bytes_submitted = 0; } ret = iomap_read_folio_iter(iter, ctx, cur_bytes_submitted); if (ret) return ret; } return 0; } /** * iomap_readahead - Attempt to read pages from a file. * @ops: The operations vector for the filesystem. * @ctx: The ctx used for issuing readahead. * * This function is for filesystems to call to implement their readahead * address_space operation. * * Context: The @ops callbacks may submit I/O (eg to read the addresses of * blocks from disc), and may wait for it. The caller may be trying to * access a different page, and so sleeping excessively should be avoided. * It may allocate memory, but should avoid costly allocations. This * function is called with memalloc_nofs set, so allocations will not cause * the filesystem to be reentered. */ void iomap_readahead(const struct iomap_ops *ops, struct iomap_read_folio_ctx *ctx, void *private) { struct readahead_control *rac = ctx->rac; struct iomap_iter iter = { .inode = rac->mapping->host, .pos = readahead_pos(rac), .len = readahead_length(rac), .private = private, }; size_t cur_bytes_submitted; trace_iomap_readahead(rac->mapping->host, readahead_count(rac)); while (iomap_iter(&iter, ops) > 0) iter.status = iomap_readahead_iter(&iter, ctx, &cur_bytes_submitted); if (ctx->ops->submit_read) ctx->ops->submit_read(ctx); if (ctx->cur_folio) iomap_read_end(ctx->cur_folio, cur_bytes_submitted); } EXPORT_SYMBOL_GPL(iomap_readahead); /* * iomap_is_partially_uptodate checks whether blocks within a folio are * uptodate or not. * * Returns true if all blocks which correspond to the specified part * of the folio are uptodate. */ bool iomap_is_partially_uptodate(struct folio *folio, size_t from, size_t count) { struct iomap_folio_state *ifs = folio->private; struct inode *inode = folio->mapping->host; unsigned first, last; if (!ifs) return false; /* Caller's range may extend past the end of this folio */ count = min(folio_size(folio) - from, count); /* First and last blocks in range within folio */ first = from >> inode->i_blkbits; last = (from + count - 1) >> inode->i_blkbits; return ifs_next_nonuptodate_block(folio, first, last) > last; } EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate); /** * iomap_get_folio - get a folio reference for writing * @iter: iteration structure * @pos: start offset of write * @len: Suggested size of folio to create. * * Returns a locked reference to the folio at @pos, or an error pointer if the * folio could not be obtained. */ struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len) { fgf_t fgp = FGP_WRITEBEGIN | FGP_NOFS; if (iter->flags & IOMAP_NOWAIT) fgp |= FGP_NOWAIT; if (iter->flags & IOMAP_DONTCACHE) fgp |= FGP_DONTCACHE; fgp |= fgf_set_order(len); return __filemap_get_folio(iter->inode->i_mapping, pos >> PAGE_SHIFT, fgp, mapping_gfp_mask(iter->inode->i_mapping)); } EXPORT_SYMBOL_GPL(iomap_get_folio); bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags) { trace_iomap_release_folio(folio->mapping->host, folio_pos(folio), folio_size(folio)); /* * If the folio is dirty, we refuse to release our metadata because * it may be partially dirty. Once we track per-block dirty state, * we can release the metadata if every block is dirty. */ if (folio_test_dirty(folio)) return false; ifs_free(folio); return true; } EXPORT_SYMBOL_GPL(iomap_release_folio); void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len) { trace_iomap_invalidate_folio(folio->mapping->host, folio_pos(folio) + offset, len); /* * If we're invalidating the entire folio, clear the dirty state * from it and release it to avoid unnecessary buildup of the LRU. */ if (offset == 0 && len == folio_size(folio)) { WARN_ON_ONCE(folio_test_writeback(folio)); folio_cancel_dirty(folio); ifs_free(folio); } } EXPORT_SYMBOL_GPL(iomap_invalidate_folio); bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio) { struct inode *inode = mapping->host; size_t len = folio_size(folio); ifs_alloc(inode, folio, 0); iomap_set_range_dirty(folio, 0, len); return filemap_dirty_folio(mapping, folio); } EXPORT_SYMBOL_GPL(iomap_dirty_folio); static void iomap_write_failed(struct inode *inode, loff_t pos, unsigned len) { loff_t i_size = i_size_read(inode); /* * Only truncate newly allocated pages beyoned EOF, even if the * write started inside the existing inode size. */ if (pos + len > i_size) truncate_pagecache_range(inode, max(pos, i_size), pos + len - 1); } static int __iomap_write_begin(const struct iomap_iter *iter, const struct iomap_write_ops *write_ops, size_t len, struct folio *folio) { struct iomap_folio_state *ifs; loff_t pos = iter->pos; loff_t block_size = i_blocksize(iter->inode); loff_t block_start = round_down(pos, block_size); loff_t block_end = round_up(pos + len, block_size); unsigned int nr_blocks = i_blocks_per_folio(iter->inode, folio); size_t from = offset_in_folio(folio, pos), to = from + len; size_t poff, plen; /* * If the write or zeroing completely overlaps the current folio, then * entire folio will be dirtied so there is no need for * per-block state tracking structures to be attached to this folio. * For the unshare case, we must read in the ondisk contents because we * are not changing pagecache contents. */ if (!(iter->flags & IOMAP_UNSHARE) && pos <= folio_pos(folio) && pos + len >= folio_next_pos(folio)) return 0; ifs = ifs_alloc(iter->inode, folio, iter->flags); if ((iter->flags & IOMAP_NOWAIT) && !ifs && nr_blocks > 1) return -EAGAIN; if (folio_test_uptodate(folio)) return 0; do { iomap_adjust_read_range(iter->inode, folio, &block_start, block_end - block_start, &poff, &plen); if (plen == 0) break; /* * If the read range will be entirely overwritten by the write, * we can skip having to zero/read it in. */ if (!(iter->flags & IOMAP_UNSHARE) && from <= poff && to >= poff + plen) continue; if (iomap_block_needs_zeroing(iter, block_start)) { if (WARN_ON_ONCE(iter->flags & IOMAP_UNSHARE)) return -EIO; folio_zero_segments(folio, poff, from, to, poff + plen); } else { int status; if (iter->flags & IOMAP_NOWAIT) return -EAGAIN; if (write_ops && write_ops->read_folio_range) status = write_ops->read_folio_range(iter, folio, block_start, plen); else status = iomap_bio_read_folio_range_sync(iter, folio, block_start, plen); if (status < 0) fserror_report_io(iter->inode, FSERR_BUFFERED_READ, pos, len, status, GFP_NOFS); if (status) return status; } iomap_set_range_uptodate(folio, poff, plen); } while ((block_start += plen) < block_end); return 0; } static struct folio *__iomap_get_folio(struct iomap_iter *iter, const struct iomap_write_ops *write_ops, size_t len) { loff_t pos = iter->pos; if (!mapping_large_folio_support(iter->inode->i_mapping)) len = min_t(size_t, len, PAGE_SIZE - offset_in_page(pos)); if (iter->iomap.flags & IOMAP_F_FOLIO_BATCH) { struct folio *folio = folio_batch_next(iter->fbatch); if (!folio) return NULL; /* * The folio mapping generally shouldn't have changed based on * fs locks, but be consistent with filemap lookup and retry * the iter if it does. */ folio_lock(folio); if (unlikely(folio->mapping != iter->inode->i_mapping)) { iter->iomap.flags |= IOMAP_F_STALE; folio_unlock(folio); return NULL; } folio_get(folio); folio_wait_stable(folio); return folio; } if (write_ops && write_ops->get_folio) return write_ops->get_folio(iter, pos, len); return iomap_get_folio(iter, pos, len); } static void __iomap_put_folio(struct iomap_iter *iter, const struct iomap_write_ops *write_ops, size_t ret, struct folio *folio) { loff_t pos = iter->pos; if (write_ops && write_ops->put_folio) { write_ops->put_folio(iter->inode, pos, ret, folio); } else { folio_unlock(folio); folio_put(folio); } } /* trim pos and bytes to within a given folio */ static loff_t iomap_trim_folio_range(struct iomap_iter *iter, struct folio *folio, size_t *offset, u64 *bytes) { loff_t pos = iter->pos; size_t fsize = folio_size(folio); WARN_ON_ONCE(pos < folio_pos(folio)); WARN_ON_ONCE(pos >= folio_pos(folio) + fsize); *offset = offset_in_folio(folio, pos); *bytes = min(*bytes, fsize - *offset); return pos; } static int iomap_write_begin_inline(const struct iomap_iter *iter, struct folio *folio) { /* needs more work for the tailpacking case; disable for now */ if (WARN_ON_ONCE(iomap_iter_srcmap(iter)->offset != 0)) return -EIO; return iomap_read_inline_data(iter, folio); } /* * Grab and prepare a folio for write based on iter state. Returns the folio, * offset, and length. Callers can optionally pass a max length *plen, * otherwise init to zero. */ static int iomap_write_begin(struct iomap_iter *iter, const struct iomap_write_ops *write_ops, struct folio **foliop, size_t *poffset, u64 *plen) { const struct iomap *srcmap = iomap_iter_srcmap(iter); loff_t pos; u64 len = min_t(u64, SIZE_MAX, iomap_length(iter)); struct folio *folio; int status = 0; len = min_not_zero(len, *plen); *foliop = NULL; *plen = 0; if (fatal_signal_pending(current)) return -EINTR; folio = __iomap_get_folio(iter, write_ops, len); if (IS_ERR(folio)) return PTR_ERR(folio); /* * No folio means we're done with a batch. We still have range to * process so return and let the caller iterate and refill the batch. */ if (!folio) { WARN_ON_ONCE(!(iter->iomap.flags & IOMAP_F_FOLIO_BATCH)); return 0; } /* * Now we have a locked folio, before we do anything with it we need to * check that the iomap we have cached is not stale. The inode extent * mapping can change due to concurrent IO in flight (e.g. * IOMAP_UNWRITTEN state can change and memory reclaim could have * reclaimed a previously partially written page at this index after IO * completion before this write reaches this file offset) and hence we * could do the wrong thing here (zero a page range incorrectly or fail * to zero) and corrupt data. */ if (write_ops && write_ops->iomap_valid) { bool iomap_valid = write_ops->iomap_valid(iter->inode, &iter->iomap); if (!iomap_valid) { iter->iomap.flags |= IOMAP_F_STALE; status = 0; goto out_unlock; } } /* * The folios in a batch may not be contiguous. If we've skipped * forward, advance the iter to the pos of the current folio. If the * folio starts beyond the end of the mapping, it may have been trimmed * since the lookup for whatever reason. Return a NULL folio to * terminate the op. */ if (folio_pos(folio) > iter->pos) { len = min_t(u64, folio_pos(folio) - iter->pos, iomap_length(iter)); status = iomap_iter_advance(iter, len); len = iomap_length(iter); if (status || !len) goto out_unlock; } pos = iomap_trim_folio_range(iter, folio, poffset, &len); if (srcmap->type == IOMAP_INLINE) status = iomap_write_begin_inline(iter, folio); else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) status = __block_write_begin_int(folio, pos, len, NULL, srcmap); else status = __iomap_write_begin(iter, write_ops, len, folio); if (unlikely(status)) goto out_unlock; *foliop = folio; *plen = len; return 0; out_unlock: __iomap_put_folio(iter, write_ops, 0, folio); return status; } static bool __iomap_write_end(struct inode *inode, loff_t pos, size_t len, size_t copied, struct folio *folio) { flush_dcache_folio(folio); /* * The blocks that were entirely written will now be uptodate, so we * don't have to worry about a read_folio reading them and overwriting a * partial write. However, if we've encountered a short write and only * partially written into a block, it will not be marked uptodate, so a * read_folio might come in and destroy our partial write. * * Do the simplest thing and just treat any short write to a * non-uptodate page as a zero-length write, and force the caller to * redo the whole thing. */ if (unlikely(copied < len && !folio_test_uptodate(folio))) return false; iomap_set_range_uptodate(folio, offset_in_folio(folio, pos), len); iomap_set_range_dirty(folio, offset_in_folio(folio, pos), copied); filemap_dirty_folio(inode->i_mapping, folio); return true; } static bool iomap_write_end_inline(const struct iomap_iter *iter, struct folio *folio, loff_t pos, size_t copied) { const struct iomap *iomap = &iter->iomap; void *addr; WARN_ON_ONCE(!folio_test_uptodate(folio)); BUG_ON(!iomap_inline_data_valid(iomap)); if (WARN_ON_ONCE(!iomap->inline_data)) return false; flush_dcache_folio(folio); addr = kmap_local_folio(folio, pos); memcpy(iomap_inline_data(iomap, pos), addr, copied); kunmap_local(addr); mark_inode_dirty(iter->inode); return true; } /* * Returns true if all copied bytes have been written to the pagecache, * otherwise return false. */ static bool iomap_write_end(struct iomap_iter *iter, size_t len, size_t copied, struct folio *folio) { const struct iomap *srcmap = iomap_iter_srcmap(iter); loff_t pos = iter->pos; if (srcmap->type == IOMAP_INLINE) return iomap_write_end_inline(iter, folio, pos, copied); if (srcmap->flags & IOMAP_F_BUFFER_HEAD) { size_t bh_written; bh_written = block_write_end(pos, len, copied, folio); WARN_ON_ONCE(bh_written != copied && bh_written != 0); return bh_written == copied; } return __iomap_write_end(iter->inode, pos, len, copied, folio); } static int iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i, const struct iomap_write_ops *write_ops) { ssize_t total_written = 0; int status = 0; struct address_space *mapping = iter->inode->i_mapping; size_t chunk = mapping_max_folio_size(mapping); unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0; do { struct folio *folio; loff_t old_size; size_t offset; /* Offset into folio */ u64 bytes; /* Bytes to write to folio */ size_t copied; /* Bytes copied from user */ u64 written; /* Bytes have been written */ loff_t pos; bytes = iov_iter_count(i); retry: offset = iter->pos & (chunk - 1); bytes = min(chunk - offset, bytes); status = balance_dirty_pages_ratelimited_flags(mapping, bdp_flags); if (unlikely(status)) break; if (bytes > iomap_length(iter)) bytes = iomap_length(iter); /* * Bring in the user page that we'll copy from _first_. * Otherwise there's a nasty deadlock on copying from the * same page as we're writing to, without it being marked * up-to-date. * * For async buffered writes the assumption is that the user * page has already been faulted in. This can be optimized by * faulting the user page. */ if (unlikely(fault_in_iov_iter_readable(i, bytes) == bytes)) { status = -EFAULT; break; } status = iomap_write_begin(iter, write_ops, &folio, &offset, &bytes); if (unlikely(status)) { iomap_write_failed(iter->inode, iter->pos, bytes); break; } if (iter->iomap.flags & IOMAP_F_STALE) break; pos = iter->pos; if (mapping_writably_mapped(mapping)) flush_dcache_folio(folio); copied = copy_folio_from_iter_atomic(folio, offset, bytes, i); written = iomap_write_end(iter, bytes, copied, folio) ? copied : 0; /* * Update the in-memory inode size after copying the data into * the page cache. It's up to the file system to write the * updated size to disk, preferably after I/O completion so that * no stale data is exposed. Only once that's done can we * unlock and release the folio. */ old_size = iter->inode->i_size; if (pos + written > old_size) { i_size_write(iter->inode, pos + written); iter->iomap.flags |= IOMAP_F_SIZE_CHANGED; } __iomap_put_folio(iter, write_ops, written, folio); if (old_size < pos) pagecache_isize_extended(iter->inode, old_size, pos); cond_resched(); if (unlikely(written == 0)) { /* * A short copy made iomap_write_end() reject the * thing entirely. Might be memory poisoning * halfway through, might be a race with munmap, * might be severe memory pressure. */ iomap_write_failed(iter->inode, pos, bytes); iov_iter_revert(i, copied); if (chunk > PAGE_SIZE) chunk /= 2; if (copied) { bytes = copied; goto retry; } } else { total_written += written; iomap_iter_advance(iter, written); } } while (iov_iter_count(i) && iomap_length(iter)); return total_written ? 0 : status; } ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i, const struct iomap_ops *ops, const struct iomap_write_ops *write_ops, void *private) { struct iomap_iter iter = { .inode = iocb->ki_filp->f_mapping->host, .pos = iocb->ki_pos, .len = iov_iter_count(i), .flags = IOMAP_WRITE, .private = private, }; ssize_t ret; if (iocb->ki_flags & IOCB_NOWAIT) iter.flags |= IOMAP_NOWAIT; if (iocb->ki_flags & IOCB_DONTCACHE) iter.flags |= IOMAP_DONTCACHE; while ((ret = iomap_iter(&iter, ops)) > 0) iter.status = iomap_write_iter(&iter, i, write_ops); if (unlikely(iter.pos == iocb->ki_pos)) return ret; ret = iter.pos - iocb->ki_pos; iocb->ki_pos = iter.pos; return ret; } EXPORT_SYMBOL_GPL(iomap_file_buffered_write); static void iomap_write_delalloc_ifs_punch(struct inode *inode, struct folio *folio, loff_t start_byte, loff_t end_byte, struct iomap *iomap, iomap_punch_t punch) { unsigned int first_blk, last_blk; loff_t last_byte; u8 blkbits = inode->i_blkbits; struct iomap_folio_state *ifs; /* * When we have per-block dirty tracking, there can be * blocks within a folio which are marked uptodate * but not dirty. In that case it is necessary to punch * out such blocks to avoid leaking any delalloc blocks. */ ifs = folio->private; if (!ifs) return; last_byte = min_t(loff_t, end_byte - 1, folio_next_pos(folio) - 1); first_blk = offset_in_folio(folio, start_byte) >> blkbits; last_blk = offset_in_folio(folio, last_byte) >> blkbits; while ((first_blk = ifs_next_clean_block(folio, first_blk, last_blk)) <= last_blk) { punch(inode, folio_pos(folio) + (first_blk << blkbits), 1 << blkbits, iomap); first_blk++; } } static void iomap_write_delalloc_punch(struct inode *inode, struct folio *folio, loff_t *punch_start_byte, loff_t start_byte, loff_t end_byte, struct iomap *iomap, iomap_punch_t punch) { if (!folio_test_dirty(folio)) return; /* if dirty, punch up to offset */ if (start_byte > *punch_start_byte) { punch(inode, *punch_start_byte, start_byte - *punch_start_byte, iomap); } /* Punch non-dirty blocks within folio */ iomap_write_delalloc_ifs_punch(inode, folio, start_byte, end_byte, iomap, punch); /* * Make sure the next punch start is correctly bound to * the end of this data range, not the end of the folio. */ *punch_start_byte = min_t(loff_t, end_byte, folio_next_pos(folio)); } /* * Scan the data range passed to us for dirty page cache folios. If we find a * dirty folio, punch out the preceding range and update the offset from which * the next punch will start from. * * We can punch out storage reservations under clean pages because they either * contain data that has been written back - in which case the delalloc punch * over that range is a no-op - or they have been read faults in which case they * contain zeroes and we can remove the delalloc backing range and any new * writes to those pages will do the normal hole filling operation... * * This makes the logic simple: we only need to keep the delalloc extents only * over the dirty ranges of the page cache. * * This function uses [start_byte, end_byte) intervals (i.e. open ended) to * simplify range iterations. */ static void iomap_write_delalloc_scan(struct inode *inode, loff_t *punch_start_byte, loff_t start_byte, loff_t end_byte, struct iomap *iomap, iomap_punch_t punch) { while (start_byte < end_byte) { struct folio *folio; /* grab locked page */ folio = filemap_lock_folio(inode->i_mapping, start_byte >> PAGE_SHIFT); if (IS_ERR(folio)) { start_byte = ALIGN_DOWN(start_byte, PAGE_SIZE) + PAGE_SIZE; continue; } iomap_write_delalloc_punch(inode, folio, punch_start_byte, start_byte, end_byte, iomap, punch); /* move offset to start of next folio in range */ start_byte = folio_next_pos(folio); folio_unlock(folio); folio_put(folio); } } /* * When a short write occurs, the filesystem might need to use ->iomap_end * to remove space reservations created in ->iomap_begin. * * For filesystems that use delayed allocation, there can be dirty pages over * the delalloc extent outside the range of a short write but still within the * delalloc extent allocated for this iomap if the write raced with page * faults. * * Punch out all the delalloc blocks in the range given except for those that * have dirty data still pending in the page cache - those are going to be * written and so must still retain the delalloc backing for writeback. * * The punch() callback *must* only punch delalloc extents in the range passed * to it. It must skip over all other types of extents in the range and leave * them completely unchanged. It must do this punch atomically with respect to * other extent modifications. * * The punch() callback may be called with a folio locked to prevent writeback * extent allocation racing at the edge of the range we are currently punching. * The locked folio may or may not cover the range being punched, so it is not * safe for the punch() callback to lock folios itself. * * Lock order is: * * inode->i_rwsem (shared or exclusive) * inode->i_mapping->invalidate_lock (exclusive) * folio_lock() * ->punch * internal filesystem allocation lock * * As we are scanning the page cache for data, we don't need to reimplement the * wheel - mapping_seek_hole_data() does exactly what we need to identify the * start and end of data ranges correctly even for sub-folio block sizes. This * byte range based iteration is especially convenient because it means we * don't have to care about variable size folios, nor where the start or end of * the data range lies within a folio, if they lie within the same folio or even * if there are multiple discontiguous data ranges within the folio. * * It should be noted that mapping_seek_hole_data() is not aware of EOF, and so * can return data ranges that exist in the cache beyond EOF. e.g. a page fault * spanning EOF will initialise the post-EOF data to zeroes and mark it up to * date. A write page fault can then mark it dirty. If we then fail a write() * beyond EOF into that up to date cached range, we allocate a delalloc block * beyond EOF and then have to punch it out. Because the range is up to date, * mapping_seek_hole_data() will return it, and we will skip the punch because * the folio is dirty. THis is incorrect - we always need to punch out delalloc * beyond EOF in this case as writeback will never write back and covert that * delalloc block beyond EOF. Hence we limit the cached data scan range to EOF, * resulting in always punching out the range from the EOF to the end of the * range the iomap spans. * * Intervals are of the form [start_byte, end_byte) (i.e. open ended) because it * matches the intervals returned by mapping_seek_hole_data(). i.e. SEEK_DATA * returns the start of a data range (start_byte), and SEEK_HOLE(start_byte) * returns the end of the data range (data_end). Using closed intervals would * require sprinkling this code with magic "+ 1" and "- 1" arithmetic and expose * the code to subtle off-by-one bugs.... */ void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte, loff_t end_byte, unsigned flags, struct iomap *iomap, iomap_punch_t punch) { loff_t punch_start_byte = start_byte; loff_t scan_end_byte = min(i_size_read(inode), end_byte); /* * The caller must hold invalidate_lock to avoid races with page faults * re-instantiating folios and dirtying them via ->page_mkwrite whilst * we walk the cache and perform delalloc extent removal. Failing to do * this can leave dirty pages with no space reservation in the cache. */ lockdep_assert_held_write(&inode->i_mapping->invalidate_lock); while (start_byte < scan_end_byte) { loff_t data_end; start_byte = mapping_seek_hole_data(inode->i_mapping, start_byte, scan_end_byte, SEEK_DATA); /* * If there is no more data to scan, all that is left is to * punch out the remaining range. * * Note that mapping_seek_hole_data is only supposed to return * either an offset or -ENXIO, so WARN on any other error as * that would be an API change without updating the callers. */ if (start_byte == -ENXIO || start_byte == scan_end_byte) break; if (WARN_ON_ONCE(start_byte < 0)) return; WARN_ON_ONCE(start_byte < punch_start_byte); WARN_ON_ONCE(start_byte > scan_end_byte); /* * We find the end of this contiguous cached data range by * seeking from start_byte to the beginning of the next hole. */ data_end = mapping_seek_hole_data(inode->i_mapping, start_byte, scan_end_byte, SEEK_HOLE); if (WARN_ON_ONCE(data_end < 0)) return; /* * If we race with post-direct I/O invalidation of the page cache, * there might be no data left at start_byte. */ if (data_end == start_byte) continue; WARN_ON_ONCE(data_end < start_byte); WARN_ON_ONCE(data_end > scan_end_byte); iomap_write_delalloc_scan(inode, &punch_start_byte, start_byte, data_end, iomap, punch); /* The next data search starts at the end of this one. */ start_byte = data_end; } if (punch_start_byte < end_byte) punch(inode, punch_start_byte, end_byte - punch_start_byte, iomap); } EXPORT_SYMBOL_GPL(iomap_write_delalloc_release); static int iomap_unshare_iter(struct iomap_iter *iter, const struct iomap_write_ops *write_ops) { struct iomap *iomap = &iter->iomap; u64 bytes = iomap_length(iter); int status; if (!iomap_want_unshare_iter(iter)) return iomap_iter_advance(iter, bytes); do { struct folio *folio; size_t offset; bool ret; bytes = min_t(u64, SIZE_MAX, bytes); status = iomap_write_begin(iter, write_ops, &folio, &offset, &bytes); if (unlikely(status)) return status; if (iomap->flags & IOMAP_F_STALE) break; ret = iomap_write_end(iter, bytes, bytes, folio); __iomap_put_folio(iter, write_ops, bytes, folio); if (WARN_ON_ONCE(!ret)) return -EIO; cond_resched(); balance_dirty_pages_ratelimited(iter->inode->i_mapping); status = iomap_iter_advance(iter, bytes); if (status) break; } while ((bytes = iomap_length(iter)) > 0); return status; } int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops, const struct iomap_write_ops *write_ops) { struct iomap_iter iter = { .inode = inode, .pos = pos, .flags = IOMAP_WRITE | IOMAP_UNSHARE, }; loff_t size = i_size_read(inode); int ret; if (pos < 0 || pos >= size) return 0; iter.len = min(len, size - pos); while ((ret = iomap_iter(&iter, ops)) > 0) iter.status = iomap_unshare_iter(&iter, write_ops); return ret; } EXPORT_SYMBOL_GPL(iomap_file_unshare); /* * Flush the remaining range of the iter and mark the current mapping stale. * This is used when zero range sees an unwritten mapping that may have had * dirty pagecache over it. */ static inline int iomap_zero_iter_flush_and_stale(struct iomap_iter *i) { struct address_space *mapping = i->inode->i_mapping; loff_t end = i->pos + i->len - 1; i->iomap.flags |= IOMAP_F_STALE; return filemap_write_and_wait_range(mapping, i->pos, end); } static int iomap_zero_iter(struct iomap_iter *iter, bool *did_zero, const struct iomap_write_ops *write_ops) { u64 bytes = iomap_length(iter); int status; do { struct folio *folio; size_t offset; bool ret; bytes = min_t(u64, SIZE_MAX, bytes); status = iomap_write_begin(iter, write_ops, &folio, &offset, &bytes); if (status) return status; if (iter->iomap.flags & IOMAP_F_STALE) break; /* a NULL folio means we're done with a folio batch */ if (!folio) { status = iomap_iter_advance_full(iter); break; } /* warn about zeroing folios beyond eof that won't write back */ WARN_ON_ONCE(folio_pos(folio) > iter->inode->i_size); trace_iomap_zero_iter(iter->inode, folio_pos(folio) + offset, bytes); folio_zero_range(folio, offset, bytes); folio_mark_accessed(folio); ret = iomap_write_end(iter, bytes, bytes, folio); __iomap_put_folio(iter, write_ops, bytes, folio); if (WARN_ON_ONCE(!ret)) return -EIO; status = iomap_iter_advance(iter, bytes); if (status) break; } while ((bytes = iomap_length(iter)) > 0); if (did_zero) *did_zero = true; return status; } /** * iomap_fill_dirty_folios - fill a folio batch with dirty folios * @iter: Iteration structure * @start: Start offset of range. Updated based on lookup progress. * @end: End offset of range * @iomap_flags: Flags to set on the associated iomap to track the batch. * * Returns the folio count directly. Also returns the associated control flag if * the the batch lookup is performed and the expected offset of a subsequent * lookup via out params. The caller is responsible to set the flag on the * associated iomap. */ unsigned int iomap_fill_dirty_folios( struct iomap_iter *iter, loff_t *start, loff_t end, unsigned int *iomap_flags) { struct address_space *mapping = iter->inode->i_mapping; pgoff_t pstart = *start >> PAGE_SHIFT; pgoff_t pend = (end - 1) >> PAGE_SHIFT; unsigned int count; if (!iter->fbatch) { *start = end; return 0; } count = filemap_get_folios_dirty(mapping, &pstart, pend, iter->fbatch); *start = (pstart << PAGE_SHIFT); *iomap_flags |= IOMAP_F_FOLIO_BATCH; return count; } EXPORT_SYMBOL_GPL(iomap_fill_dirty_folios); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, const struct iomap_ops *ops, const struct iomap_write_ops *write_ops, void *private) { struct folio_batch fbatch; struct iomap_iter iter = { .inode = inode, .pos = pos, .len = len, .flags = IOMAP_ZERO, .private = private, .fbatch = &fbatch, }; struct address_space *mapping = inode->i_mapping; int ret; bool range_dirty; folio_batch_init(&fbatch); /* * To avoid an unconditional flush, check pagecache state and only flush * if dirty and the fs returns a mapping that might convert on * writeback. */ range_dirty = filemap_range_needs_writeback(mapping, iter.pos, iter.pos + iter.len - 1); while ((ret = iomap_iter(&iter, ops)) > 0) { const struct iomap *srcmap = iomap_iter_srcmap(&iter); if (WARN_ON_ONCE((iter.iomap.flags & IOMAP_F_FOLIO_BATCH) && srcmap->type != IOMAP_UNWRITTEN)) return -EIO; if (!(iter.iomap.flags & IOMAP_F_FOLIO_BATCH) && (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)) { s64 status; if (range_dirty) { range_dirty = false; status = iomap_zero_iter_flush_and_stale(&iter); } else { status = iomap_iter_advance_full(&iter); } iter.status = status; continue; } iter.status = iomap_zero_iter(&iter, did_zero, write_ops); } return ret; } EXPORT_SYMBOL_GPL(iomap_zero_range); int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, const struct iomap_ops *ops, const struct iomap_write_ops *write_ops, void *private) { unsigned int blocksize = i_blocksize(inode); unsigned int off = pos & (blocksize - 1); /* Block boundary? Nothing to do */ if (!off) return 0; return iomap_zero_range(inode, pos, blocksize - off, did_zero, ops, write_ops, private); } EXPORT_SYMBOL_GPL(iomap_truncate_page); static int iomap_folio_mkwrite_iter(struct iomap_iter *iter, struct folio *folio) { loff_t length = iomap_length(iter); int ret; if (iter->iomap.flags & IOMAP_F_BUFFER_HEAD) { ret = __block_write_begin_int(folio, iter->pos, length, NULL, &iter->iomap); if (ret) return ret; block_commit_write(folio, 0, length); } else { WARN_ON_ONCE(!folio_test_uptodate(folio)); folio_mark_dirty(folio); } return iomap_iter_advance(iter, length); } vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops, void *private) { struct iomap_iter iter = { .inode = file_inode(vmf->vma->vm_file), .flags = IOMAP_WRITE | IOMAP_FAULT, .private = private, }; struct folio *folio = page_folio(vmf->page); ssize_t ret; folio_lock(folio); ret = folio_mkwrite_check_truncate(folio, iter.inode); if (ret < 0) goto out_unlock; iter.pos = folio_pos(folio); iter.len = ret; while ((ret = iomap_iter(&iter, ops)) > 0) iter.status = iomap_folio_mkwrite_iter(&iter, folio); if (ret < 0) goto out_unlock; folio_wait_stable(folio); return VM_FAULT_LOCKED; out_unlock: folio_unlock(folio); return vmf_fs_error(ret); } EXPORT_SYMBOL_GPL(iomap_page_mkwrite); static void iomap_writeback_init(struct inode *inode, struct folio *folio) { struct iomap_folio_state *ifs = folio->private; WARN_ON_ONCE(i_blocks_per_folio(inode, folio) > 1 && !ifs); if (ifs) { WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending) != 0); /* * Set this to the folio size. After processing the folio for * writeback in iomap_writeback_folio(), we'll subtract any * ranges not written back. * * We do this because otherwise, we would have to atomically * increment ifs->write_bytes_pending every time a range in the * folio needs to be written back. */ atomic_set(&ifs->write_bytes_pending, folio_size(folio)); } } void iomap_finish_folio_write(struct inode *inode, struct folio *folio, size_t len) { struct iomap_folio_state *ifs = folio->private; WARN_ON_ONCE(i_blocks_per_folio(inode, folio) > 1 && !ifs); WARN_ON_ONCE(ifs && atomic_read(&ifs->write_bytes_pending) <= 0); if (!ifs || atomic_sub_and_test(len, &ifs->write_bytes_pending)) folio_end_writeback(folio); } EXPORT_SYMBOL_GPL(iomap_finish_folio_write); static int iomap_writeback_range(struct iomap_writepage_ctx *wpc, struct folio *folio, u64 pos, u32 rlen, u64 end_pos, size_t *bytes_submitted) { do { ssize_t ret; ret = wpc->ops->writeback_range(wpc, folio, pos, rlen, end_pos); if (WARN_ON_ONCE(ret == 0 || ret > rlen)) return -EIO; if (ret < 0) return ret; rlen -= ret; pos += ret; /* * Holes are not written back by ->writeback_range, so track * if we did handle anything that is not a hole here. */ if (wpc->iomap.type != IOMAP_HOLE) *bytes_submitted += ret; } while (rlen); return 0; } /* * Check interaction of the folio with the file end. * * If the folio is entirely beyond i_size, return false. If it straddles * i_size, adjust end_pos and zero all data beyond i_size. */ static bool iomap_writeback_handle_eof(struct folio *folio, struct inode *inode, u64 *end_pos) { u64 isize = i_size_read(inode); if (*end_pos > isize) { size_t poff = offset_in_folio(folio, isize); pgoff_t end_index = isize >> PAGE_SHIFT; /* * If the folio is entirely ouside of i_size, skip it. * * This can happen due to a truncate operation that is in * progress and in that case truncate will finish it off once * we've dropped the folio lock. * * Note that the pgoff_t used for end_index is an unsigned long. * If the given offset is greater than 16TB on a 32-bit system, * then if we checked if the folio is fully outside i_size with * "if (folio->index >= end_index + 1)", "end_index + 1" would * overflow and evaluate to 0. Hence this folio would be * redirtied and written out repeatedly, which would result in * an infinite loop; the user program performing this operation * would hang. Instead, we can detect this situation by * checking if the folio is totally beyond i_size or if its * offset is just equal to the EOF. */ if (folio->index > end_index || (folio->index == end_index && poff == 0)) return false; /* * The folio straddles i_size. * * It must be zeroed out on each and every writepage invocation * because it may be mmapped: * * A file is mapped in multiples of the page size. For a * file that is not a multiple of the page size, the * remaining memory is zeroed when mapped, and writes to that * region are not written out to the file. * * Also adjust the end_pos to the end of file and skip writeback * for all blocks entirely beyond i_size. */ folio_zero_segment(folio, poff, folio_size(folio)); *end_pos = isize; } return true; } int iomap_writeback_folio(struct iomap_writepage_ctx *wpc, struct folio *folio) { struct iomap_folio_state *ifs = folio->private; struct inode *inode = wpc->inode; u64 pos = folio_pos(folio); u64 end_pos = pos + folio_size(folio); u64 end_aligned = 0; loff_t orig_pos = pos; size_t bytes_submitted = 0; int error = 0; u32 rlen; WARN_ON_ONCE(!folio_test_locked(folio)); WARN_ON_ONCE(folio_test_dirty(folio)); WARN_ON_ONCE(folio_test_writeback(folio)); trace_iomap_writeback_folio(inode, pos, folio_size(folio)); if (!iomap_writeback_handle_eof(folio, inode, &end_pos)) return 0; WARN_ON_ONCE(end_pos <= pos); if (i_blocks_per_folio(inode, folio) > 1) { if (!ifs) { ifs = ifs_alloc(inode, folio, 0); iomap_set_range_dirty(folio, 0, end_pos - pos); } iomap_writeback_init(inode, folio); } /* * Set the writeback bit ASAP, as the I/O completion for the single * block per folio case happen hit as soon as we're submitting the bio. */ folio_start_writeback(folio); /* * Walk through the folio to find dirty areas to write back. */ end_aligned = round_up(end_pos, i_blocksize(inode)); while ((rlen = iomap_find_dirty_range(folio, &pos, end_aligned))) { error = iomap_writeback_range(wpc, folio, pos, rlen, end_pos, &bytes_submitted); if (error) break; pos += rlen; } if (bytes_submitted) wpc->nr_folios++; if (error && pos > orig_pos) fserror_report_io(inode, FSERR_BUFFERED_WRITE, orig_pos, 0, error, GFP_NOFS); /* * We can have dirty bits set past end of file in page_mkwrite path * while mapping the last partial folio. Hence it's better to clear * all the dirty bits in the folio here. */ iomap_clear_range_dirty(folio, 0, folio_size(folio)); /* * Usually the writeback bit is cleared by the I/O completion handler. * But we may end up either not actually writing any blocks, or (when * there are multiple blocks in a folio) all I/O might have finished * already at this point. In that case we need to clear the writeback * bit ourselves right after unlocking the page. */ if (ifs) { /* * Subtract any bytes that were initially accounted to * write_bytes_pending but skipped for writeback. */ size_t bytes_not_submitted = folio_size(folio) - bytes_submitted; if (bytes_not_submitted) iomap_finish_folio_write(inode, folio, bytes_not_submitted); } else if (!bytes_submitted) { folio_end_writeback(folio); } mapping_set_error(inode->i_mapping, error); return error; } EXPORT_SYMBOL_GPL(iomap_writeback_folio); int iomap_writepages(struct iomap_writepage_ctx *wpc) { struct address_space *mapping = wpc->inode->i_mapping; struct folio *folio = NULL; int error; /* * Writeback from reclaim context should never happen except in the case * of a VM regression so warn about it and refuse to write the data. */ if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC | PF_KSWAPD)) == PF_MEMALLOC)) return -EIO; while ((folio = writeback_iter(mapping, wpc->wbc, folio, &error))) { error = iomap_writeback_folio(wpc, folio); folio_unlock(folio); } /* * If @error is non-zero, it means that we have a situation where some * part of the submission process has failed after we've marked pages * for writeback. * * We cannot cancel the writeback directly in that case, so always call * ->writeback_submit to run the I/O completion handler to clear the * writeback bit and let the file system proess the errors. */ if (wpc->wb_ctx) return wpc->ops->writeback_submit(wpc, error); return error; } EXPORT_SYMBOL_GPL(iomap_writepages);
1 1 6 6 3 2 1 4 3 5 5 4 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 // SPDX-License-Identifier: GPL-2.0-only /* * Sync File validation framework and debug information * * Copyright (C) 2012 Google, Inc. */ #include <linux/debugfs.h> #include "sync_debug.h" static struct dentry *dbgfs; static LIST_HEAD(sync_timeline_list_head); static DEFINE_SPINLOCK(sync_timeline_list_lock); void sync_timeline_debug_add(struct sync_timeline *obj) { unsigned long flags; spin_lock_irqsave(&sync_timeline_list_lock, flags); list_add_tail(&obj->sync_timeline_list, &sync_timeline_list_head); spin_unlock_irqrestore(&sync_timeline_list_lock, flags); } void sync_timeline_debug_remove(struct sync_timeline *obj) { unsigned long flags; spin_lock_irqsave(&sync_timeline_list_lock, flags); list_del(&obj->sync_timeline_list); spin_unlock_irqrestore(&sync_timeline_list_lock, flags); } static const char *sync_status_str(int status) { if (status < 0) return "error"; if (status > 0) return "signaled"; return "active"; } static void sync_print_fence(struct seq_file *s, struct dma_fence *fence, bool show) { struct sync_timeline *parent = dma_fence_parent(fence); int status; status = dma_fence_get_status_locked(fence); seq_printf(s, " %s%sfence %s", show ? parent->name : "", show ? "_" : "", sync_status_str(status)); if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags)) { struct timespec64 ts64 = ktime_to_timespec64(fence->timestamp); seq_printf(s, "@%ptSp", &ts64); } seq_printf(s, ": %lld", fence->seqno); seq_printf(s, " / %d", parent->value); seq_putc(s, '\n'); } static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj) { struct list_head *pos; seq_printf(s, "%s: %d\n", obj->name, obj->value); spin_lock(&obj->lock); /* Caller already disabled IRQ. */ list_for_each(pos, &obj->pt_list) { struct sync_pt *pt = container_of(pos, struct sync_pt, link); sync_print_fence(s, &pt->base, false); } spin_unlock(&obj->lock); } static int sync_info_debugfs_show(struct seq_file *s, void *unused) { struct list_head *pos; seq_puts(s, "objs:\n--------------\n"); spin_lock_irq(&sync_timeline_list_lock); list_for_each(pos, &sync_timeline_list_head) { struct sync_timeline *obj = container_of(pos, struct sync_timeline, sync_timeline_list); sync_print_obj(s, obj); seq_putc(s, '\n'); } spin_unlock_irq(&sync_timeline_list_lock); seq_puts(s, "fences:\n--------------\n"); return 0; } DEFINE_SHOW_ATTRIBUTE(sync_info_debugfs); static __init int sync_debugfs_init(void) { dbgfs = debugfs_create_dir("sync", NULL); /* * The debugfs files won't ever get removed and thus, there is * no need to protect it against removal races. The use of * debugfs_create_file_unsafe() is actually safe here. */ debugfs_create_file_unsafe("info", 0444, dbgfs, NULL, &sync_info_debugfs_fops); debugfs_create_file_unsafe("sw_sync", 0644, dbgfs, NULL, &sw_sync_debugfs_fops); return 0; } late_initcall(sync_debugfs_init);
1 2 1 4 4 1 1 2 2 2 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 // SPDX-License-Identifier: GPL-2.0-or-later /* * SQ905C subdriver * * Copyright (C) 2009 Theodore Kilgore */ /* * * This driver uses work done in * libgphoto2/camlibs/digigr8, Copyright (C) Theodore Kilgore. * * This driver has also used as a base the sq905c driver * and may contain code fragments from it. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "sq905c" #include <linux/workqueue.h> #include <linux/slab.h> #include "gspca.h" MODULE_AUTHOR("Theodore Kilgore <kilgota@auburn.edu>"); MODULE_DESCRIPTION("GSPCA/SQ905C USB Camera Driver"); MODULE_LICENSE("GPL"); /* Default timeouts, in ms */ #define SQ905C_CMD_TIMEOUT 500 #define SQ905C_DATA_TIMEOUT 1000 /* Maximum transfer size to use. */ #define SQ905C_MAX_TRANSFER 0x8000 #define FRAME_HEADER_LEN 0x50 /* Commands. These go in the "value" slot. */ #define SQ905C_CLEAR 0xa0 /* clear everything */ #define SQ905C_GET_ID 0x14f4 /* Read version number */ #define SQ905C_CAPTURE_LOW 0xa040 /* Starts capture at 160x120 */ #define SQ905C_CAPTURE_MED 0x1440 /* Starts capture at 320x240 */ #define SQ905C_CAPTURE_HI 0x2840 /* Starts capture at 320x240 */ /* For capture, this must go in the "index" slot. */ #define SQ905C_CAPTURE_INDEX 0x110f /* Structure to hold all of our device specific stuff */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ const struct v4l2_pix_format *cap_mode; /* Driver stuff */ struct work_struct work_struct; struct workqueue_struct *work_thread; }; /* * Most of these cameras will do 640x480 and 320x240. 160x120 works * in theory but gives very poor output. Therefore, not supported. * The 0x2770:0x9050 cameras have max resolution of 320x240. */ static struct v4l2_pix_format sq905c_mode[] = { { 320, 240, V4L2_PIX_FMT_SQ905C, V4L2_FIELD_NONE, .bytesperline = 320, .sizeimage = 320 * 240, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, { 640, 480, V4L2_PIX_FMT_SQ905C, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0} }; /* Send a command to the camera. */ static int sq905c_command(struct gspca_dev *gspca_dev, u16 command, u16 index) { int ret; ret = usb_control_msg(gspca_dev->dev, usb_sndctrlpipe(gspca_dev->dev, 0), USB_REQ_SYNCH_FRAME, /* request */ USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, command, index, NULL, 0, SQ905C_CMD_TIMEOUT); if (ret < 0) { pr_err("%s: usb_control_msg failed (%d)\n", __func__, ret); return ret; } return 0; } static int sq905c_read(struct gspca_dev *gspca_dev, u16 command, u16 index, int size) { int ret; ret = usb_control_msg(gspca_dev->dev, usb_rcvctrlpipe(gspca_dev->dev, 0), USB_REQ_SYNCH_FRAME, /* request */ USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, command, index, gspca_dev->usb_buf, size, SQ905C_CMD_TIMEOUT); if (ret < 0) { pr_err("%s: usb_control_msg failed (%d)\n", __func__, ret); return ret; } return 0; } /* * This function is called as a workqueue function and runs whenever the camera * is streaming data. Because it is a workqueue function it is allowed to sleep * so we can use synchronous USB calls. To avoid possible collisions with other * threads attempting to use gspca_dev->usb_buf we take the usb_lock when * performing USB operations using it. In practice we don't really need this * as the camera doesn't provide any controls. */ static void sq905c_dostream(struct work_struct *work) { struct sd *dev = container_of(work, struct sd, work_struct); struct gspca_dev *gspca_dev = &dev->gspca_dev; int bytes_left; /* bytes remaining in current frame. */ int data_len; /* size to use for the next read. */ int act_len; int packet_type; int ret; u8 *buffer; buffer = kmalloc(SQ905C_MAX_TRANSFER, GFP_KERNEL); if (!buffer) { pr_err("Couldn't allocate USB buffer\n"); goto quit_stream; } while (gspca_dev->present && gspca_dev->streaming) { #ifdef CONFIG_PM if (gspca_dev->frozen) break; #endif /* Request the header, which tells the size to download */ ret = usb_bulk_msg(gspca_dev->dev, usb_rcvbulkpipe(gspca_dev->dev, 0x81), buffer, FRAME_HEADER_LEN, &act_len, SQ905C_DATA_TIMEOUT); gspca_dbg(gspca_dev, D_STREAM, "Got %d bytes out of %d for header\n", act_len, FRAME_HEADER_LEN); if (ret < 0 || act_len < FRAME_HEADER_LEN) goto quit_stream; /* size is read from 4 bytes starting 0x40, little endian */ bytes_left = buffer[0x40]|(buffer[0x41]<<8)|(buffer[0x42]<<16) |(buffer[0x43]<<24); gspca_dbg(gspca_dev, D_STREAM, "bytes_left = 0x%x\n", bytes_left); /* We keep the header. It has other information, too. */ packet_type = FIRST_PACKET; gspca_frame_add(gspca_dev, packet_type, buffer, FRAME_HEADER_LEN); while (bytes_left > 0 && gspca_dev->present) { data_len = bytes_left > SQ905C_MAX_TRANSFER ? SQ905C_MAX_TRANSFER : bytes_left; ret = usb_bulk_msg(gspca_dev->dev, usb_rcvbulkpipe(gspca_dev->dev, 0x81), buffer, data_len, &act_len, SQ905C_DATA_TIMEOUT); if (ret < 0 || act_len < data_len) goto quit_stream; gspca_dbg(gspca_dev, D_STREAM, "Got %d bytes out of %d for frame\n", data_len, bytes_left); bytes_left -= data_len; if (bytes_left == 0) packet_type = LAST_PACKET; else packet_type = INTER_PACKET; gspca_frame_add(gspca_dev, packet_type, buffer, data_len); } } quit_stream: if (gspca_dev->present) { mutex_lock(&gspca_dev->usb_lock); sq905c_command(gspca_dev, SQ905C_CLEAR, 0); mutex_unlock(&gspca_dev->usb_lock); } kfree(buffer); } /* This function is called at probe time just before sd_init */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct cam *cam = &gspca_dev->cam; struct sd *dev = (struct sd *) gspca_dev; int ret; gspca_dbg(gspca_dev, D_PROBE, "SQ9050 camera detected (vid/pid 0x%04X:0x%04X)\n", id->idVendor, id->idProduct); ret = sq905c_command(gspca_dev, SQ905C_GET_ID, 0); if (ret < 0) { gspca_err(gspca_dev, "Get version command failed\n"); return ret; } ret = sq905c_read(gspca_dev, 0xf5, 0, 20); if (ret < 0) { gspca_err(gspca_dev, "Reading version command failed\n"); return ret; } /* Note we leave out the usb id and the manufacturing date */ gspca_dbg(gspca_dev, D_PROBE, "SQ9050 ID string: %02x - %*ph\n", gspca_dev->usb_buf[3], 6, gspca_dev->usb_buf + 14); cam->cam_mode = sq905c_mode; cam->nmodes = 2; if (gspca_dev->usb_buf[15] == 0) cam->nmodes = 1; /* We don't use the buffer gspca allocates so make it small. */ cam->bulk_size = 32; cam->bulk = 1; INIT_WORK(&dev->work_struct, sq905c_dostream); return 0; } /* called on streamoff with alt==0 and on disconnect */ /* the usb_lock is held at entry - restore on exit */ static void sd_stop0(struct gspca_dev *gspca_dev) { struct sd *dev = (struct sd *) gspca_dev; /* wait for the work queue to terminate */ mutex_unlock(&gspca_dev->usb_lock); /* This waits for sq905c_dostream to finish */ destroy_workqueue(dev->work_thread); dev->work_thread = NULL; mutex_lock(&gspca_dev->usb_lock); } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { /* connect to the camera and reset it. */ return sq905c_command(gspca_dev, SQ905C_CLEAR, 0); } /* Set up for getting frames. */ static int sd_start(struct gspca_dev *gspca_dev) { struct sd *dev = (struct sd *) gspca_dev; int ret; dev->cap_mode = gspca_dev->cam.cam_mode; /* "Open the shutter" and set size, to start capture */ switch (gspca_dev->pixfmt.width) { case 640: gspca_dbg(gspca_dev, D_STREAM, "Start streaming at high resolution\n"); dev->cap_mode++; ret = sq905c_command(gspca_dev, SQ905C_CAPTURE_HI, SQ905C_CAPTURE_INDEX); break; default: /* 320 */ gspca_dbg(gspca_dev, D_STREAM, "Start streaming at medium resolution\n"); ret = sq905c_command(gspca_dev, SQ905C_CAPTURE_MED, SQ905C_CAPTURE_INDEX); } if (ret < 0) { gspca_err(gspca_dev, "Start streaming command failed\n"); return ret; } /* Start the workqueue function to do the streaming */ dev->work_thread = create_singlethread_workqueue(MODULE_NAME); if (!dev->work_thread) return -ENOMEM; queue_work(dev->work_thread, &dev->work_struct); return 0; } /* Table of supported USB devices */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x2770, 0x905c)}, {USB_DEVICE(0x2770, 0x9050)}, {USB_DEVICE(0x2770, 0x9051)}, {USB_DEVICE(0x2770, 0x9052)}, {USB_DEVICE(0x2770, 0x913d)}, {} }; MODULE_DEVICE_TABLE(usb, device_table); /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .start = sd_start, .stop0 = sd_stop0, }; /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver);
7 4 2 11 11 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 // SPDX-License-Identifier: GPL-2.0-or-later /* * Driver for Zarlink DVB-T ZL10353 demodulator * * Copyright (C) 2006, 2007 Christopher Pascoe <c.pascoe@itee.uq.edu.au> */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> #include <linux/delay.h> #include <linux/string.h> #include <linux/slab.h> #include <asm/div64.h> #include <media/dvb_frontend.h> #include "zl10353_priv.h" #include "zl10353.h" struct zl10353_state { struct i2c_adapter *i2c; struct dvb_frontend frontend; struct zl10353_config config; u32 bandwidth; u32 ucblocks; u32 frequency; }; static int debug; #define dprintk(args...) \ do { \ if (debug) printk(KERN_DEBUG "zl10353: " args); \ } while (0) static int debug_regs; static int zl10353_single_write(struct dvb_frontend *fe, u8 reg, u8 val) { struct zl10353_state *state = fe->demodulator_priv; u8 buf[2] = { reg, val }; struct i2c_msg msg = { .addr = state->config.demod_address, .flags = 0, .buf = buf, .len = 2 }; int err = i2c_transfer(state->i2c, &msg, 1); if (err != 1) { printk("zl10353: write to reg %x failed (err = %d)!\n", reg, er