| 1566 951 580 8 856 204 606 97 105 97 1245 9 860 1094 675 1091 759 1090 1112 1107 1419 8 8 1418 371 1419 318 1416 317 1418 159 1417 1418 1417 159 1419 317 1417 318 1416 371 1418 68 1472 70 1418 1418 1419 1418 23 23 23 1419 1191 23 1174 1416 1418 1418 558 1416 23 1417 13 13 1565 831 1563 13 13 13 12 13 13 13 11 13 13 180 179 179 180 1419 1417 5 5 5 5 1 5 18 18 18 18 18 3 18 18 1419 18 1 18 18 18 3 3 18 1219 673 161 162 1204 912 247 912 826 153 827 1220 890 720 606 7 603 890 880 852 71 26 70 883 5 16 3 13 13 1 4 4 1 4 25 541 423 812 218 540 641 418 385 282 643 717 652 206 49 716 423 131 73 228 66 189 307 424 424 1 2 1 4 525 12 10 2 10 2 8 25 5 20 485 526 31 31 1 5 18 31 5 2 3 1416 1419 1419 1419 1419 2 3 3 1419 1 1 1 1 1 1 1418 13 1418 1419 1419 1419 1416 1418 1419 1419 1419 1418 1417 1416 13 13 1417 362 862 861 238 23 889 874 25 20 647 660 530 183 30 169 951 886 23 8 21 23 7 4 17 23 23 15 23 8 8 3 12 11 11 6 23 8 27 10 17 20 122 1 121 7 3 3 4 7 1 8 8 7 6 6 5 1 2 2 1 2 1 2 8 22 1 137 53 53 51 4 47 83 137 72 137 525 1415 424 425 4 419 424 36 32 5 5 5 5 5 26 837 44 1 819 820 8 820 820 8 18 5 186 476 9 36 413 295 184 130 98 474 474 21 21 476 284 92 86 84 7 7 88 1 1 204 1 1 207 1419 138 1417 232 1 1 1418 1417 1418 1419 1419 1103 1417 1419 1419 1417 1419 70 70 70 173 174 175 175 173 69 69 68 70 70 70 70 70 174 70 70 175 13 13 1417 44 1417 1417 1418 1419 1418 1417 13 13 13 1418 1416 1417 1418 1419 1418 1418 1418 1419 1417 1417 1417 1413 1416 1415 1416 1419 1419 44 1417 1419 1419 1418 1419 1417 1418 1563 1560 25 24 1186 10 4 10 8 7 4 4 3 4 6 1201 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 | // SPDX-License-Identifier: GPL-2.0-only /* * BPF JIT compiler * * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com) * Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com */ #include <linux/netdevice.h> #include <linux/filter.h> #include <linux/if_vlan.h> #include <linux/bpf.h> #include <linux/memory.h> #include <linux/sort.h> #include <asm/extable.h> #include <asm/ftrace.h> #include <asm/set_memory.h> #include <asm/nospec-branch.h> #include <asm/text-patching.h> #include <asm/unwind.h> #include <asm/cfi.h> static bool all_callee_regs_used[4] = {true, true, true, true}; static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) { if (len == 1) *ptr = bytes; else if (len == 2) *(u16 *)ptr = bytes; else { *(u32 *)ptr = bytes; barrier(); } return ptr + len; } #define EMIT(bytes, len) \ do { prog = emit_code(prog, bytes, len); } while (0) #define EMIT1(b1) EMIT(b1, 1) #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) #define EMIT5(b1, b2, b3, b4, b5) \ do { EMIT1(b1); EMIT4(b2, b3, b4, b5); } while (0) #define EMIT1_off32(b1, off) \ do { EMIT1(b1); EMIT(off, 4); } while (0) #define EMIT2_off32(b1, b2, off) \ do { EMIT2(b1, b2); EMIT(off, 4); } while (0) #define EMIT3_off32(b1, b2, b3, off) \ do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) #define EMIT4_off32(b1, b2, b3, b4, off) \ do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) #ifdef CONFIG_X86_KERNEL_IBT #define EMIT_ENDBR() EMIT(gen_endbr(), 4) #define EMIT_ENDBR_POISON() EMIT(gen_endbr_poison(), 4) #else #define EMIT_ENDBR() #define EMIT_ENDBR_POISON() #endif static bool is_imm8(int value) { return value <= 127 && value >= -128; } /* * Let us limit the positive offset to be <= 123. * This is to ensure eventual jit convergence For the following patterns: * ... * pass4, final_proglen=4391: * ... * 20e: 48 85 ff test rdi,rdi * 211: 74 7d je 0x290 * 213: 48 8b 77 00 mov rsi,QWORD PTR [rdi+0x0] * ... * 289: 48 85 ff test rdi,rdi * 28c: 74 17 je 0x2a5 * 28e: e9 7f ff ff ff jmp 0x212 * 293: bf 03 00 00 00 mov edi,0x3 * Note that insn at 0x211 is 2-byte cond jump insn for offset 0x7d (-125) * and insn at 0x28e is 5-byte jmp insn with offset -129. * * pass5, final_proglen=4392: * ... * 20e: 48 85 ff test rdi,rdi * 211: 0f 84 80 00 00 00 je 0x297 * 217: 48 8b 77 00 mov rsi,QWORD PTR [rdi+0x0] * ... * 28d: 48 85 ff test rdi,rdi * 290: 74 1a je 0x2ac * 292: eb 84 jmp 0x218 * 294: bf 03 00 00 00 mov edi,0x3 * Note that insn at 0x211 is 6-byte cond jump insn now since its offset * becomes 0x80 based on previous round (0x293 - 0x213 = 0x80). * At the same time, insn at 0x292 is a 2-byte insn since its offset is * -124. * * pass6 will repeat the same code as in pass4 and this will prevent * eventual convergence. * * To fix this issue, we need to break je (2->6 bytes) <-> jmp (5->2 bytes) * cycle in the above. In the above example je offset <= 0x7c should work. * * For other cases, je <-> je needs offset <= 0x7b to avoid no convergence * issue. For jmp <-> je and jmp <-> jmp cases, jmp offset <= 0x7c should * avoid no convergence issue. * * Overall, let us limit the positive offset for 8bit cond/uncond jmp insn * to maximum 123 (0x7b). This way, the jit pass can eventually converge. */ static bool is_imm8_jmp_offset(int value) { return value <= 123 && value >= -128; } static bool is_simm32(s64 value) { return value == (s64)(s32)value; } static bool is_uimm32(u64 value) { return value == (u64)(u32)value; } /* mov dst, src */ #define EMIT_mov(DST, SRC) \ do { \ if (DST != SRC) \ EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \ } while (0) static int bpf_size_to_x86_bytes(int bpf_size) { if (bpf_size == BPF_W) return 4; else if (bpf_size == BPF_H) return 2; else if (bpf_size == BPF_B) return 1; else if (bpf_size == BPF_DW) return 4; /* imm32 */ else return 0; } /* * List of x86 cond jumps opcodes (. + s8) * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) */ #define X86_JB 0x72 #define X86_JAE 0x73 #define X86_JE 0x74 #define X86_JNE 0x75 #define X86_JBE 0x76 #define X86_JA 0x77 #define X86_JL 0x7C #define X86_JGE 0x7D #define X86_JLE 0x7E #define X86_JG 0x7F /* Pick a register outside of BPF range for JIT internal work */ #define AUX_REG (MAX_BPF_JIT_REG + 1) #define X86_REG_R9 (MAX_BPF_JIT_REG + 2) #define X86_REG_R12 (MAX_BPF_JIT_REG + 3) /* * The following table maps BPF registers to x86-64 registers. * * x86-64 register R12 is unused, since if used as base address * register in load/store instructions, it always needs an * extra byte of encoding and is callee saved. * * x86-64 register R9 is not used by BPF programs, but can be used by BPF * trampoline. x86-64 register R10 is used for blinding (if enabled). */ static const int reg2hex[] = { [BPF_REG_0] = 0, /* RAX */ [BPF_REG_1] = 7, /* RDI */ [BPF_REG_2] = 6, /* RSI */ [BPF_REG_3] = 2, /* RDX */ [BPF_REG_4] = 1, /* RCX */ [BPF_REG_5] = 0, /* R8 */ [BPF_REG_6] = 3, /* RBX callee saved */ [BPF_REG_7] = 5, /* R13 callee saved */ [BPF_REG_8] = 6, /* R14 callee saved */ [BPF_REG_9] = 7, /* R15 callee saved */ [BPF_REG_FP] = 5, /* RBP readonly */ [BPF_REG_AX] = 2, /* R10 temp register */ [AUX_REG] = 3, /* R11 temp register */ [X86_REG_R9] = 1, /* R9 register, 6th function argument */ [X86_REG_R12] = 4, /* R12 callee saved */ }; static const int reg2pt_regs[] = { [BPF_REG_0] = offsetof(struct pt_regs, ax), [BPF_REG_1] = offsetof(struct pt_regs, di), [BPF_REG_2] = offsetof(struct pt_regs, si), [BPF_REG_3] = offsetof(struct pt_regs, dx), [BPF_REG_4] = offsetof(struct pt_regs, cx), [BPF_REG_5] = offsetof(struct pt_regs, r8), [BPF_REG_6] = offsetof(struct pt_regs, bx), [BPF_REG_7] = offsetof(struct pt_regs, r13), [BPF_REG_8] = offsetof(struct pt_regs, r14), [BPF_REG_9] = offsetof(struct pt_regs, r15), }; /* * is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15 * which need extra byte of encoding. * rax,rcx,...,rbp have simpler encoding */ static bool is_ereg(u32 reg) { return (1 << reg) & (BIT(BPF_REG_5) | BIT(AUX_REG) | BIT(BPF_REG_7) | BIT(BPF_REG_8) | BIT(BPF_REG_9) | BIT(X86_REG_R9) | BIT(X86_REG_R12) | BIT(BPF_REG_AX)); } /* * is_ereg_8l() == true if BPF register 'reg' is mapped to access x86-64 * lower 8-bit registers dil,sil,bpl,spl,r8b..r15b, which need extra byte * of encoding. al,cl,dl,bl have simpler encoding. */ static bool is_ereg_8l(u32 reg) { return is_ereg(reg) || (1 << reg) & (BIT(BPF_REG_1) | BIT(BPF_REG_2) | BIT(BPF_REG_FP)); } static bool is_axreg(u32 reg) { return reg == BPF_REG_0; } /* Add modifiers if 'reg' maps to x86-64 registers R8..R15 */ static u8 add_1mod(u8 byte, u32 reg) { if (is_ereg(reg)) byte |= 1; return byte; } static u8 add_2mod(u8 byte, u32 r1, u32 r2) { if (is_ereg(r1)) byte |= 1; if (is_ereg(r2)) byte |= 4; return byte; } static u8 add_3mod(u8 byte, u32 r1, u32 r2, u32 index) { if (is_ereg(r1)) byte |= 1; if (is_ereg(index)) byte |= 2; if (is_ereg(r2)) byte |= 4; return byte; } /* Encode 'dst_reg' register into x86-64 opcode 'byte' */ static u8 add_1reg(u8 byte, u32 dst_reg) { return byte + reg2hex[dst_reg]; } /* Encode 'dst_reg' and 'src_reg' registers into x86-64 opcode 'byte' */ static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) { return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); } /* Some 1-byte opcodes for binary ALU operations */ static u8 simple_alu_opcodes[] = { [BPF_ADD] = 0x01, [BPF_SUB] = 0x29, [BPF_AND] = 0x21, [BPF_OR] = 0x09, [BPF_XOR] = 0x31, [BPF_LSH] = 0xE0, [BPF_RSH] = 0xE8, [BPF_ARSH] = 0xF8, }; static void jit_fill_hole(void *area, unsigned int size) { /* Fill whole space with INT3 instructions */ memset(area, 0xcc, size); } int bpf_arch_text_invalidate(void *dst, size_t len) { return IS_ERR_OR_NULL(text_poke_set(dst, 0xcc, len)); } struct jit_context { int cleanup_addr; /* Epilogue code offset */ /* * Program specific offsets of labels in the code; these rely on the * JIT doing at least 2 passes, recording the position on the first * pass, only to generate the correct offset on the second pass. */ int tail_call_direct_label; int tail_call_indirect_label; }; /* Maximum number of bytes emitted while JITing one eBPF insn */ #define BPF_MAX_INSN_SIZE 128 #define BPF_INSN_SAFETY 64 /* Number of bytes emit_patch() needs to generate instructions */ #define X86_PATCH_SIZE 5 /* Number of bytes that will be skipped on tailcall */ #define X86_TAIL_CALL_OFFSET (12 + ENDBR_INSN_SIZE) static void push_r9(u8 **pprog) { u8 *prog = *pprog; EMIT2(0x41, 0x51); /* push r9 */ *pprog = prog; } static void pop_r9(u8 **pprog) { u8 *prog = *pprog; EMIT2(0x41, 0x59); /* pop r9 */ *pprog = prog; } static void push_r12(u8 **pprog) { u8 *prog = *pprog; EMIT2(0x41, 0x54); /* push r12 */ *pprog = prog; } static void push_callee_regs(u8 **pprog, bool *callee_regs_used) { u8 *prog = *pprog; if (callee_regs_used[0]) EMIT1(0x53); /* push rbx */ if (callee_regs_used[1]) EMIT2(0x41, 0x55); /* push r13 */ if (callee_regs_used[2]) EMIT2(0x41, 0x56); /* push r14 */ if (callee_regs_used[3]) EMIT2(0x41, 0x57); /* push r15 */ *pprog = prog; } static void pop_r12(u8 **pprog) { u8 *prog = *pprog; EMIT2(0x41, 0x5C); /* pop r12 */ *pprog = prog; } static void pop_callee_regs(u8 **pprog, bool *callee_regs_used) { u8 *prog = *pprog; if (callee_regs_used[3]) EMIT2(0x41, 0x5F); /* pop r15 */ if (callee_regs_used[2]) EMIT2(0x41, 0x5E); /* pop r14 */ if (callee_regs_used[1]) EMIT2(0x41, 0x5D); /* pop r13 */ if (callee_regs_used[0]) EMIT1(0x5B); /* pop rbx */ *pprog = prog; } static void emit_nops(u8 **pprog, int len) { u8 *prog = *pprog; int i, noplen; while (len > 0) { noplen = len; if (noplen > ASM_NOP_MAX) noplen = ASM_NOP_MAX; for (i = 0; i < noplen; i++) EMIT1(x86_nops[noplen][i]); len -= noplen; } *pprog = prog; } /* * Emit the various CFI preambles, see asm/cfi.h and the comments about FineIBT * in arch/x86/kernel/alternative.c */ static int emit_call(u8 **prog, void *func, void *ip); static void emit_fineibt(u8 **pprog, u8 *ip, u32 hash, int arity) { u8 *prog = *pprog; EMIT_ENDBR(); EMIT3_off32(0x41, 0x81, 0xea, hash); /* subl $hash, %r10d */ if (cfi_bhi) { emit_call(&prog, __bhi_args[arity], ip + 11); } else { EMIT2(0x75, 0xf9); /* jne.d8 .-7 */ EMIT3(0x0f, 0x1f, 0x00); /* nop3 */ } EMIT_ENDBR_POISON(); *pprog = prog; } static void emit_kcfi(u8 **pprog, u32 hash) { u8 *prog = *pprog; EMIT1_off32(0xb8, hash); /* movl $hash, %eax */ #ifdef CONFIG_CALL_PADDING EMIT1(0x90); EMIT1(0x90); EMIT1(0x90); EMIT1(0x90); EMIT1(0x90); EMIT1(0x90); EMIT1(0x90); EMIT1(0x90); EMIT1(0x90); EMIT1(0x90); EMIT1(0x90); #endif EMIT_ENDBR(); *pprog = prog; } static void emit_cfi(u8 **pprog, u8 *ip, u32 hash, int arity) { u8 *prog = *pprog; switch (cfi_mode) { case CFI_FINEIBT: emit_fineibt(&prog, ip, hash, arity); break; case CFI_KCFI: emit_kcfi(&prog, hash); break; default: EMIT_ENDBR(); break; } *pprog = prog; } static void emit_prologue_tail_call(u8 **pprog, bool is_subprog) { u8 *prog = *pprog; if (!is_subprog) { /* cmp rax, MAX_TAIL_CALL_CNT */ EMIT4(0x48, 0x83, 0xF8, MAX_TAIL_CALL_CNT); EMIT2(X86_JA, 6); /* ja 6 */ /* rax is tail_call_cnt if <= MAX_TAIL_CALL_CNT. * case1: entry of main prog. * case2: tail callee of main prog. */ EMIT1(0x50); /* push rax */ /* Make rax as tail_call_cnt_ptr. */ EMIT3(0x48, 0x89, 0xE0); /* mov rax, rsp */ EMIT2(0xEB, 1); /* jmp 1 */ /* rax is tail_call_cnt_ptr if > MAX_TAIL_CALL_CNT. * case: tail callee of subprog. */ EMIT1(0x50); /* push rax */ /* push tail_call_cnt_ptr */ EMIT1(0x50); /* push rax */ } else { /* is_subprog */ /* rax is tail_call_cnt_ptr. */ EMIT1(0x50); /* push rax */ EMIT1(0x50); /* push rax */ } *pprog = prog; } /* * Emit x86-64 prologue code for BPF program. * bpf_tail_call helper will skip the first X86_TAIL_CALL_OFFSET bytes * while jumping to another program */ static void emit_prologue(u8 **pprog, u8 *ip, u32 stack_depth, bool ebpf_from_cbpf, bool tail_call_reachable, bool is_subprog, bool is_exception_cb) { u8 *prog = *pprog; if (is_subprog) { emit_cfi(&prog, ip, cfi_bpf_subprog_hash, 5); } else { emit_cfi(&prog, ip, cfi_bpf_hash, 1); } /* BPF trampoline can be made to work without these nops, * but let's waste 5 bytes for now and optimize later */ emit_nops(&prog, X86_PATCH_SIZE); if (!ebpf_from_cbpf) { if (tail_call_reachable && !is_subprog) /* When it's the entry of the whole tailcall context, * zeroing rax means initialising tail_call_cnt. */ EMIT3(0x48, 0x31, 0xC0); /* xor rax, rax */ else /* Keep the same instruction layout. */ emit_nops(&prog, 3); /* nop3 */ } /* Exception callback receives FP as third parameter */ if (is_exception_cb) { EMIT3(0x48, 0x89, 0xF4); /* mov rsp, rsi */ EMIT3(0x48, 0x89, 0xD5); /* mov rbp, rdx */ /* The main frame must have exception_boundary as true, so we * first restore those callee-saved regs from stack, before * reusing the stack frame. */ pop_callee_regs(&prog, all_callee_regs_used); pop_r12(&prog); /* Reset the stack frame. */ EMIT3(0x48, 0x89, 0xEC); /* mov rsp, rbp */ } else { EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ } /* X86_TAIL_CALL_OFFSET is here */ EMIT_ENDBR(); /* sub rsp, rounded_stack_depth */ if (stack_depth) EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8)); if (tail_call_reachable) emit_prologue_tail_call(&prog, is_subprog); *pprog = prog; } static int emit_patch(u8 **pprog, void *func, void *ip, u8 opcode) { u8 *prog = *pprog; s64 offset; offset = func - (ip + X86_PATCH_SIZE); if (!is_simm32(offset)) { pr_err("Target call %p is out of range\n", func); return -ERANGE; } EMIT1_off32(opcode, offset); *pprog = prog; return 0; } static int emit_call(u8 **pprog, void *func, void *ip) { return emit_patch(pprog, func, ip, 0xE8); } static int emit_rsb_call(u8 **pprog, void *func, void *ip) { OPTIMIZER_HIDE_VAR(func); ip += x86_call_depth_emit_accounting(pprog, func, ip); return emit_patch(pprog, func, ip, 0xE8); } static int emit_jump(u8 **pprog, void *func, void *ip) { return emit_patch(pprog, func, ip, 0xE9); } static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *old_addr, void *new_addr) { const u8 *nop_insn = x86_nops[5]; u8 old_insn[X86_PATCH_SIZE]; u8 new_insn[X86_PATCH_SIZE]; u8 *prog; int ret; memcpy(old_insn, nop_insn, X86_PATCH_SIZE); if (old_addr) { prog = old_insn; ret = t == BPF_MOD_CALL ? emit_call(&prog, old_addr, ip) : emit_jump(&prog, old_addr, ip); if (ret) return ret; } memcpy(new_insn, nop_insn, X86_PATCH_SIZE); if (new_addr) { prog = new_insn; ret = t == BPF_MOD_CALL ? emit_call(&prog, new_addr, ip) : emit_jump(&prog, new_addr, ip); if (ret) return ret; } ret = -EBUSY; mutex_lock(&text_mutex); if (memcmp(ip, old_insn, X86_PATCH_SIZE)) goto out; ret = 1; if (memcmp(ip, new_insn, X86_PATCH_SIZE)) { smp_text_poke_single(ip, new_insn, X86_PATCH_SIZE, NULL); ret = 0; } out: mutex_unlock(&text_mutex); return ret; } int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *old_addr, void *new_addr) { if (!is_kernel_text((long)ip) && !is_bpf_text_address((long)ip)) /* BPF poking in modules is not supported */ return -EINVAL; /* * See emit_prologue(), for IBT builds the trampoline hook is preceded * with an ENDBR instruction. */ if (is_endbr(ip)) ip += ENDBR_INSN_SIZE; return __bpf_arch_text_poke(ip, t, old_addr, new_addr); } #define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8) static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) { u8 *prog = *pprog; if (cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) { OPTIMIZER_HIDE_VAR(reg); emit_jump(&prog, its_static_thunk(reg), ip); } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { EMIT_LFENCE(); EMIT2(0xFF, 0xE0 + reg); } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { OPTIMIZER_HIDE_VAR(reg); if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH)) emit_jump(&prog, &__x86_indirect_jump_thunk_array[reg], ip); else emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); } else { EMIT2(0xFF, 0xE0 + reg); /* jmp *%\reg */ if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) || IS_ENABLED(CONFIG_MITIGATION_SLS)) EMIT1(0xCC); /* int3 */ } *pprog = prog; } static void emit_return(u8 **pprog, u8 *ip) { u8 *prog = *pprog; if (cpu_wants_rethunk()) { emit_jump(&prog, x86_return_thunk, ip); } else { EMIT1(0xC3); /* ret */ if (IS_ENABLED(CONFIG_MITIGATION_SLS)) EMIT1(0xCC); /* int3 */ } *pprog = prog; } #define BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack) (-16 - round_up(stack, 8)) /* * Generate the following code: * * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... * if (index >= array->map.max_entries) * goto out; * if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT) * goto out; * prog = array->ptrs[index]; * if (prog == NULL) * goto out; * goto *(prog->bpf_func + prologue_size); * out: */ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog, u8 **pprog, bool *callee_regs_used, u32 stack_depth, u8 *ip, struct jit_context *ctx) { int tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack_depth); u8 *prog = *pprog, *start = *pprog; int offset; /* * rdi - pointer to ctx * rsi - pointer to bpf_array * rdx - index in bpf_array */ /* * if (index >= array->map.max_entries) * goto out; */ EMIT2(0x89, 0xD2); /* mov edx, edx */ EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ offsetof(struct bpf_array, map.max_entries)); offset = ctx->tail_call_indirect_label - (prog + 2 - start); EMIT2(X86_JBE, offset); /* jbe out */ /* * if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT) * goto out; */ EMIT3_off32(0x48, 0x8B, 0x85, tcc_ptr_off); /* mov rax, qword ptr [rbp - tcc_ptr_off] */ EMIT4(0x48, 0x83, 0x38, MAX_TAIL_CALL_CNT); /* cmp qword ptr [rax], MAX_TAIL_CALL_CNT */ offset = ctx->tail_call_indirect_label - (prog + 2 - start); EMIT2(X86_JAE, offset); /* jae out */ /* prog = array->ptrs[index]; */ EMIT4_off32(0x48, 0x8B, 0x8C, 0xD6, /* mov rcx, [rsi + rdx * 8 + offsetof(...)] */ offsetof(struct bpf_array, ptrs)); /* * if (prog == NULL) * goto out; */ EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */ offset = ctx->tail_call_indirect_label - (prog + 2 - start); EMIT2(X86_JE, offset); /* je out */ /* Inc tail_call_cnt if the slot is populated. */ EMIT4(0x48, 0x83, 0x00, 0x01); /* add qword ptr [rax], 1 */ if (bpf_prog->aux->exception_boundary) { pop_callee_regs(&prog, all_callee_regs_used); pop_r12(&prog); } else { pop_callee_regs(&prog, callee_regs_used); if (bpf_arena_get_kern_vm_start(bpf_prog->aux->arena)) pop_r12(&prog); } /* Pop tail_call_cnt_ptr. */ EMIT1(0x58); /* pop rax */ /* Pop tail_call_cnt, if it's main prog. * Pop tail_call_cnt_ptr, if it's subprog. */ EMIT1(0x58); /* pop rax */ if (stack_depth) EMIT3_off32(0x48, 0x81, 0xC4, /* add rsp, sd */ round_up(stack_depth, 8)); /* goto *(prog->bpf_func + X86_TAIL_CALL_OFFSET); */ EMIT4(0x48, 0x8B, 0x49, /* mov rcx, qword ptr [rcx + 32] */ offsetof(struct bpf_prog, bpf_func)); EMIT4(0x48, 0x83, 0xC1, /* add rcx, X86_TAIL_CALL_OFFSET */ X86_TAIL_CALL_OFFSET); /* * Now we're ready to jump into next BPF program * rdi == ctx (1st arg) * rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET */ emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start)); /* out: */ ctx->tail_call_indirect_label = prog - start; *pprog = prog; } static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog, struct bpf_jit_poke_descriptor *poke, u8 **pprog, u8 *ip, bool *callee_regs_used, u32 stack_depth, struct jit_context *ctx) { int tcc_ptr_off = BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack_depth); u8 *prog = *pprog, *start = *pprog; int offset; /* * if ((*tcc_ptr)++ >= MAX_TAIL_CALL_CNT) * goto out; */ EMIT3_off32(0x48, 0x8B, 0x85, tcc_ptr_off); /* mov rax, qword ptr [rbp - tcc_ptr_off] */ EMIT4(0x48, 0x83, 0x38, MAX_TAIL_CALL_CNT); /* cmp qword ptr [rax], MAX_TAIL_CALL_CNT */ offset = ctx->tail_call_direct_label - (prog + 2 - start); EMIT2(X86_JAE, offset); /* jae out */ poke->tailcall_bypass = ip + (prog - start); poke->adj_off = X86_TAIL_CALL_OFFSET; poke->tailcall_target = ip + ctx->tail_call_direct_label - X86_PATCH_SIZE; poke->bypass_addr = (u8 *)poke->tailcall_target + X86_PATCH_SIZE; emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE, poke->tailcall_bypass); /* Inc tail_call_cnt if the slot is populated. */ EMIT4(0x48, 0x83, 0x00, 0x01); /* add qword ptr [rax], 1 */ if (bpf_prog->aux->exception_boundary) { pop_callee_regs(&prog, all_callee_regs_used); pop_r12(&prog); } else { pop_callee_regs(&prog, callee_regs_used); if (bpf_arena_get_kern_vm_start(bpf_prog->aux->arena)) pop_r12(&prog); } /* Pop tail_call_cnt_ptr. */ EMIT1(0x58); /* pop rax */ /* Pop tail_call_cnt, if it's main prog. * Pop tail_call_cnt_ptr, if it's subprog. */ EMIT1(0x58); /* pop rax */ if (stack_depth) EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8)); emit_nops(&prog, X86_PATCH_SIZE); /* out: */ ctx->tail_call_direct_label = prog - start; *pprog = prog; } static void bpf_tail_call_direct_fixup(struct bpf_prog *prog) { struct bpf_jit_poke_descriptor *poke; struct bpf_array *array; struct bpf_prog *target; int i, ret; for (i = 0; i < prog->aux->size_poke_tab; i++) { poke = &prog->aux->poke_tab[i]; if (poke->aux && poke->aux != prog->aux) continue; WARN_ON_ONCE(READ_ONCE(poke->tailcall_target_stable)); if (poke->reason != BPF_POKE_REASON_TAIL_CALL) continue; array = container_of(poke->tail_call.map, struct bpf_array, map); mutex_lock(&array->aux->poke_mutex); target = array->ptrs[poke->tail_call.key]; if (target) { ret = __bpf_arch_text_poke(poke->tailcall_target, BPF_MOD_JUMP, NULL, (u8 *)target->bpf_func + poke->adj_off); BUG_ON(ret < 0); ret = __bpf_arch_text_poke(poke->tailcall_bypass, BPF_MOD_JUMP, (u8 *)poke->tailcall_target + X86_PATCH_SIZE, NULL); BUG_ON(ret < 0); } WRITE_ONCE(poke->tailcall_target_stable, true); mutex_unlock(&array->aux->poke_mutex); } } static void emit_mov_imm32(u8 **pprog, bool sign_propagate, u32 dst_reg, const u32 imm32) { u8 *prog = *pprog; u8 b1, b2, b3; /* * Optimization: if imm32 is positive, use 'mov %eax, imm32' * (which zero-extends imm32) to save 2 bytes. */ if (sign_propagate && (s32)imm32 < 0) { /* 'mov %rax, imm32' sign extends imm32 */ b1 = add_1mod(0x48, dst_reg); b2 = 0xC7; b3 = 0xC0; EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32); goto done; } /* * Optimization: if imm32 is zero, use 'xor %eax, %eax' * to save 3 bytes. */ if (imm32 == 0) { if (is_ereg(dst_reg)) EMIT1(add_2mod(0x40, dst_reg, dst_reg)); b2 = 0x31; /* xor */ b3 = 0xC0; EMIT2(b2, add_2reg(b3, dst_reg, dst_reg)); goto done; } /* mov %eax, imm32 */ if (is_ereg(dst_reg)) EMIT1(add_1mod(0x40, dst_reg)); EMIT1_off32(add_1reg(0xB8, dst_reg), imm32); done: *pprog = prog; } static void emit_mov_imm64(u8 **pprog, u32 dst_reg, const u32 imm32_hi, const u32 imm32_lo) { u64 imm64 = ((u64)imm32_hi << 32) | (u32)imm32_lo; u8 *prog = *pprog; if (is_uimm32(imm64)) { /* * For emitting plain u32, where sign bit must not be * propagated LLVM tends to load imm64 over mov32 * directly, so save couple of bytes by just doing * 'mov %eax, imm32' instead. */ emit_mov_imm32(&prog, false, dst_reg, imm32_lo); } else if (is_simm32(imm64)) { emit_mov_imm32(&prog, true, dst_reg, imm32_lo); } else { /* movabsq rax, imm64 */ EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg)); EMIT(imm32_lo, 4); EMIT(imm32_hi, 4); } *pprog = prog; } static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg) { u8 *prog = *pprog; if (is64) { /* mov dst, src */ EMIT_mov(dst_reg, src_reg); } else { /* mov32 dst, src */ if (is_ereg(dst_reg) || is_ereg(src_reg)) EMIT1(add_2mod(0x40, dst_reg, src_reg)); EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg)); } *pprog = prog; } static void emit_movsx_reg(u8 **pprog, int num_bits, bool is64, u32 dst_reg, u32 src_reg) { u8 *prog = *pprog; if (is64) { /* movs[b,w,l]q dst, src */ if (num_bits == 8) EMIT4(add_2mod(0x48, src_reg, dst_reg), 0x0f, 0xbe, add_2reg(0xC0, src_reg, dst_reg)); else if (num_bits == 16) EMIT4(add_2mod(0x48, src_reg, dst_reg), 0x0f, 0xbf, add_2reg(0xC0, src_reg, dst_reg)); else if (num_bits == 32) EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x63, add_2reg(0xC0, src_reg, dst_reg)); } else { /* movs[b,w]l dst, src */ if (num_bits == 8) { EMIT4(add_2mod(0x40, src_reg, dst_reg), 0x0f, 0xbe, add_2reg(0xC0, src_reg, dst_reg)); } else if (num_bits == 16) { if (is_ereg(dst_reg) || is_ereg(src_reg)) EMIT1(add_2mod(0x40, src_reg, dst_reg)); EMIT3(add_2mod(0x0f, src_reg, dst_reg), 0xbf, add_2reg(0xC0, src_reg, dst_reg)); } } *pprog = prog; } /* Emit the suffix (ModR/M etc) for addressing *(ptr_reg + off) and val_reg */ static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off) { u8 *prog = *pprog; if (is_imm8(off)) { /* 1-byte signed displacement. * * If off == 0 we could skip this and save one extra byte, but * special case of x86 R13 which always needs an offset is not * worth the hassle */ EMIT2(add_2reg(0x40, ptr_reg, val_reg), off); } else { /* 4-byte signed displacement */ EMIT1_off32(add_2reg(0x80, ptr_reg, val_reg), off); } *pprog = prog; } static void emit_insn_suffix_SIB(u8 **pprog, u32 ptr_reg, u32 val_reg, u32 index_reg, int off) { u8 *prog = *pprog; if (is_imm8(off)) { EMIT3(add_2reg(0x44, BPF_REG_0, val_reg), add_2reg(0, ptr_reg, index_reg) /* SIB */, off); } else { EMIT2_off32(add_2reg(0x84, BPF_REG_0, val_reg), add_2reg(0, ptr_reg, index_reg) /* SIB */, off); } *pprog = prog; } /* * Emit a REX byte if it will be necessary to address these registers */ static void maybe_emit_mod(u8 **pprog, u32 dst_reg, u32 src_reg, bool is64) { u8 *prog = *pprog; if (is64) EMIT1(add_2mod(0x48, dst_reg, src_reg)); else if (is_ereg(dst_reg) || is_ereg(src_reg)) EMIT1(add_2mod(0x40, dst_reg, src_reg)); *pprog = prog; } /* * Similar version of maybe_emit_mod() for a single register */ static void maybe_emit_1mod(u8 **pprog, u32 reg, bool is64) { u8 *prog = *pprog; if (is64) EMIT1(add_1mod(0x48, reg)); else if (is_ereg(reg)) EMIT1(add_1mod(0x40, reg)); *pprog = prog; } /* LDX: dst_reg = *(u8*)(src_reg + off) */ static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) { u8 *prog = *pprog; switch (size) { case BPF_B: /* Emit 'movzx rax, byte ptr [rax + off]' */ EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); break; case BPF_H: /* Emit 'movzx rax, word ptr [rax + off]' */ EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); break; case BPF_W: /* Emit 'mov eax, dword ptr [rax+0x14]' */ if (is_ereg(dst_reg) || is_ereg(src_reg)) EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); else EMIT1(0x8B); break; case BPF_DW: /* Emit 'mov rax, qword ptr [rax+0x14]' */ EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); break; } emit_insn_suffix(&prog, src_reg, dst_reg, off); *pprog = prog; } /* LDSX: dst_reg = *(s8*)(src_reg + off) */ static void emit_ldsx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) { u8 *prog = *pprog; switch (size) { case BPF_B: /* Emit 'movsx rax, byte ptr [rax + off]' */ EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xBE); break; case BPF_H: /* Emit 'movsx rax, word ptr [rax + off]' */ EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xBF); break; case BPF_W: /* Emit 'movsx rax, dword ptr [rax+0x14]' */ EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x63); break; } emit_insn_suffix(&prog, src_reg, dst_reg, off); *pprog = prog; } static void emit_ldx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 index_reg, int off) { u8 *prog = *pprog; switch (size) { case BPF_B: /* movzx rax, byte ptr [rax + r12 + off] */ EMIT3(add_3mod(0x40, src_reg, dst_reg, index_reg), 0x0F, 0xB6); break; case BPF_H: /* movzx rax, word ptr [rax + r12 + off] */ EMIT3(add_3mod(0x40, src_reg, dst_reg, index_reg), 0x0F, 0xB7); break; case BPF_W: /* mov eax, dword ptr [rax + r12 + off] */ EMIT2(add_3mod(0x40, src_reg, dst_reg, index_reg), 0x8B); break; case BPF_DW: /* mov rax, qword ptr [rax + r12 + off] */ EMIT2(add_3mod(0x48, src_reg, dst_reg, index_reg), 0x8B); break; } emit_insn_suffix_SIB(&prog, src_reg, dst_reg, index_reg, off); *pprog = prog; } static void emit_ldx_r12(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) { emit_ldx_index(pprog, size, dst_reg, src_reg, X86_REG_R12, off); } /* STX: *(u8*)(dst_reg + off) = src_reg */ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) { u8 *prog = *pprog; switch (size) { case BPF_B: /* Emit 'mov byte ptr [rax + off], al' */ if (is_ereg(dst_reg) || is_ereg_8l(src_reg)) /* Add extra byte for eregs or SIL,DIL,BPL in src_reg */ EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); else EMIT1(0x88); break; case BPF_H: if (is_ereg(dst_reg) || is_ereg(src_reg)) EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89); else EMIT2(0x66, 0x89); break; case BPF_W: if (is_ereg(dst_reg) || is_ereg(src_reg)) EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89); else EMIT1(0x89); break; case BPF_DW: EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89); break; } emit_insn_suffix(&prog, dst_reg, src_reg, off); *pprog = prog; } /* STX: *(u8*)(dst_reg + index_reg + off) = src_reg */ static void emit_stx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 index_reg, int off) { u8 *prog = *pprog; switch (size) { case BPF_B: /* mov byte ptr [rax + r12 + off], al */ EMIT2(add_3mod(0x40, dst_reg, src_reg, index_reg), 0x88); break; case BPF_H: /* mov word ptr [rax + r12 + off], ax */ EMIT3(0x66, add_3mod(0x40, dst_reg, src_reg, index_reg), 0x89); break; case BPF_W: /* mov dword ptr [rax + r12 + 1], eax */ EMIT2(add_3mod(0x40, dst_reg, src_reg, index_reg), 0x89); break; case BPF_DW: /* mov qword ptr [rax + r12 + 1], rax */ EMIT2(add_3mod(0x48, dst_reg, src_reg, index_reg), 0x89); break; } emit_insn_suffix_SIB(&prog, dst_reg, src_reg, index_reg, off); *pprog = prog; } static void emit_stx_r12(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) { emit_stx_index(pprog, size, dst_reg, src_reg, X86_REG_R12, off); } /* ST: *(u8*)(dst_reg + index_reg + off) = imm32 */ static void emit_st_index(u8 **pprog, u32 size, u32 dst_reg, u32 index_reg, int off, int imm) { u8 *prog = *pprog; switch (size) { case BPF_B: /* mov byte ptr [rax + r12 + off], imm8 */ EMIT2(add_3mod(0x40, dst_reg, 0, index_reg), 0xC6); break; case BPF_H: /* mov word ptr [rax + r12 + off], imm16 */ EMIT3(0x66, add_3mod(0x40, dst_reg, 0, index_reg), 0xC7); break; case BPF_W: /* mov dword ptr [rax + r12 + 1], imm32 */ EMIT2(add_3mod(0x40, dst_reg, 0, index_reg), 0xC7); break; case BPF_DW: /* mov qword ptr [rax + r12 + 1], imm32 */ EMIT2(add_3mod(0x48, dst_reg, 0, index_reg), 0xC7); break; } emit_insn_suffix_SIB(&prog, dst_reg, 0, index_reg, off); EMIT(imm, bpf_size_to_x86_bytes(size)); *pprog = prog; } static void emit_st_r12(u8 **pprog, u32 size, u32 dst_reg, int off, int imm) { emit_st_index(pprog, size, dst_reg, X86_REG_R12, off, imm); } static int emit_atomic_rmw(u8 **pprog, u32 atomic_op, u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size) { u8 *prog = *pprog; EMIT1(0xF0); /* lock prefix */ maybe_emit_mod(&prog, dst_reg, src_reg, bpf_size == BPF_DW); /* emit opcode */ switch (atomic_op) { case BPF_ADD: case BPF_AND: case BPF_OR: case BPF_XOR: /* lock *(u32/u64*)(dst_reg + off) <op>= src_reg */ EMIT1(simple_alu_opcodes[atomic_op]); break; case BPF_ADD | BPF_FETCH: /* src_reg = atomic_fetch_add(dst_reg + off, src_reg); */ EMIT2(0x0F, 0xC1); break; case BPF_XCHG: /* src_reg = atomic_xchg(dst_reg + off, src_reg); */ EMIT1(0x87); break; case BPF_CMPXCHG: /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */ EMIT2(0x0F, 0xB1); break; default: pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op); return -EFAULT; } emit_insn_suffix(&prog, dst_reg, src_reg, off); *pprog = prog; return 0; } static int emit_atomic_rmw_index(u8 **pprog, u32 atomic_op, u32 size, u32 dst_reg, u32 src_reg, u32 index_reg, int off) { u8 *prog = *pprog; EMIT1(0xF0); /* lock prefix */ switch (size) { case BPF_W: EMIT1(add_3mod(0x40, dst_reg, src_reg, index_reg)); break; case BPF_DW: EMIT1(add_3mod(0x48, dst_reg, src_reg, index_reg)); break; default: pr_err("bpf_jit: 1- and 2-byte RMW atomics are not supported\n"); return -EFAULT; } /* emit opcode */ switch (atomic_op) { case BPF_ADD: case BPF_AND: case BPF_OR: case BPF_XOR: /* lock *(u32/u64*)(dst_reg + idx_reg + off) <op>= src_reg */ EMIT1(simple_alu_opcodes[atomic_op]); break; case BPF_ADD | BPF_FETCH: /* src_reg = atomic_fetch_add(dst_reg + idx_reg + off, src_reg); */ EMIT2(0x0F, 0xC1); break; case BPF_XCHG: /* src_reg = atomic_xchg(dst_reg + idx_reg + off, src_reg); */ EMIT1(0x87); break; case BPF_CMPXCHG: /* r0 = atomic_cmpxchg(dst_reg + idx_reg + off, r0, src_reg); */ EMIT2(0x0F, 0xB1); break; default: pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op); return -EFAULT; } emit_insn_suffix_SIB(&prog, dst_reg, src_reg, index_reg, off); *pprog = prog; return 0; } static int emit_atomic_ld_st(u8 **pprog, u32 atomic_op, u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size) { switch (atomic_op) { case BPF_LOAD_ACQ: /* dst_reg = smp_load_acquire(src_reg + off16) */ emit_ldx(pprog, bpf_size, dst_reg, src_reg, off); break; case BPF_STORE_REL: /* smp_store_release(dst_reg + off16, src_reg) */ emit_stx(pprog, bpf_size, dst_reg, src_reg, off); break; default: pr_err("bpf_jit: unknown atomic load/store opcode %02x\n", atomic_op); return -EFAULT; } return 0; } static int emit_atomic_ld_st_index(u8 **pprog, u32 atomic_op, u32 size, u32 dst_reg, u32 src_reg, u32 index_reg, int off) { switch (atomic_op) { case BPF_LOAD_ACQ: /* dst_reg = smp_load_acquire(src_reg + idx_reg + off16) */ emit_ldx_index(pprog, size, dst_reg, src_reg, index_reg, off); break; case BPF_STORE_REL: /* smp_store_release(dst_reg + idx_reg + off16, src_reg) */ emit_stx_index(pprog, size, dst_reg, src_reg, index_reg, off); break; default: pr_err("bpf_jit: unknown atomic load/store opcode %02x\n", atomic_op); return -EFAULT; } return 0; } #define DONT_CLEAR 1 bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs) { u32 reg = x->fixup >> 8; /* jump over faulting load and clear dest register */ if (reg != DONT_CLEAR) *(unsigned long *)((void *)regs + reg) = 0; regs->ip += x->fixup & 0xff; return true; } static void detect_reg_usage(struct bpf_insn *insn, int insn_cnt, bool *regs_used) { int i; for (i = 1; i <= insn_cnt; i++, insn++) { if (insn->dst_reg == BPF_REG_6 || insn->src_reg == BPF_REG_6) regs_used[0] = true; if (insn->dst_reg == BPF_REG_7 || insn->src_reg == BPF_REG_7) regs_used[1] = true; if (insn->dst_reg == BPF_REG_8 || insn->src_reg == BPF_REG_8) regs_used[2] = true; if (insn->dst_reg == BPF_REG_9 || insn->src_reg == BPF_REG_9) regs_used[3] = true; } } /* emit the 3-byte VEX prefix * * r: same as rex.r, extra bit for ModRM reg field * x: same as rex.x, extra bit for SIB index field * b: same as rex.b, extra bit for ModRM r/m, or SIB base * m: opcode map select, encoding escape bytes e.g. 0x0f38 * w: same as rex.w (32 bit or 64 bit) or opcode specific * src_reg2: additional source reg (encoded as BPF reg) * l: vector length (128 bit or 256 bit) or reserved * pp: opcode prefix (none, 0x66, 0xf2 or 0xf3) */ static void emit_3vex(u8 **pprog, bool r, bool x, bool b, u8 m, bool w, u8 src_reg2, bool l, u8 pp) { u8 *prog = *pprog; const u8 b0 = 0xc4; /* first byte of 3-byte VEX prefix */ u8 b1, b2; u8 vvvv = reg2hex[src_reg2]; /* reg2hex gives only the lower 3 bit of vvvv */ if (is_ereg(src_reg2)) vvvv |= 1 << 3; /* * 2nd byte of 3-byte VEX prefix * ~ means bit inverted encoding * * 7 0 * +---+---+---+---+---+---+---+---+ * |~R |~X |~B | m | * +---+---+---+---+---+---+---+---+ */ b1 = (!r << 7) | (!x << 6) | (!b << 5) | (m & 0x1f); /* * 3rd byte of 3-byte VEX prefix * * 7 0 * +---+---+---+---+---+---+---+---+ * | W | ~vvvv | L | pp | * +---+---+---+---+---+---+---+---+ */ b2 = (w << 7) | ((~vvvv & 0xf) << 3) | (l << 2) | (pp & 3); EMIT3(b0, b1, b2); *pprog = prog; } /* emit BMI2 shift instruction */ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op) { u8 *prog = *pprog; bool r = is_ereg(dst_reg); u8 m = 2; /* escape code 0f38 */ emit_3vex(&prog, r, false, r, m, is64, src_reg, false, op); EMIT2(0xf7, add_2reg(0xC0, dst_reg, dst_reg)); *pprog = prog; } static void emit_priv_frame_ptr(u8 **pprog, void __percpu *priv_frame_ptr) { u8 *prog = *pprog; /* movabs r9, priv_frame_ptr */ emit_mov_imm64(&prog, X86_REG_R9, (__force long) priv_frame_ptr >> 32, (u32) (__force long) priv_frame_ptr); #ifdef CONFIG_SMP /* add <r9>, gs:[<off>] */ EMIT2(0x65, 0x4c); EMIT3(0x03, 0x0c, 0x25); EMIT((u32)(unsigned long)&this_cpu_off, 4); #endif *pprog = prog; } #define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp))) #define __LOAD_TCC_PTR(off) \ EMIT3_off32(0x48, 0x8B, 0x85, off) /* mov rax, qword ptr [rbp - rounded_stack_depth - 16] */ #define LOAD_TAIL_CALL_CNT_PTR(stack) \ __LOAD_TCC_PTR(BPF_TAIL_CALL_CNT_PTR_STACK_OFF(stack)) /* Memory size/value to protect private stack overflow/underflow */ #define PRIV_STACK_GUARD_SZ 8 #define PRIV_STACK_GUARD_VAL 0xEB9F12345678eb9fULL static int emit_spectre_bhb_barrier(u8 **pprog, u8 *ip, struct bpf_prog *bpf_prog) { u8 *prog = *pprog; u8 *func; if (cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP)) { /* The clearing sequence clobbers eax and ecx. */ EMIT1(0x50); /* push rax */ EMIT1(0x51); /* push rcx */ ip += 2; func = (u8 *)clear_bhb_loop; ip += x86_call_depth_emit_accounting(&prog, func, ip); if (emit_call(&prog, func, ip)) return -EINVAL; EMIT1(0x59); /* pop rcx */ EMIT1(0x58); /* pop rax */ } /* Insert IBHF instruction */ if ((cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP) && cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) || cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_HW)) { /* * Add an Indirect Branch History Fence (IBHF). IBHF acts as a * fence preventing branch history from before the fence from * affecting indirect branches after the fence. This is * specifically used in cBPF jitted code to prevent Intra-mode * BHI attacks. The IBHF instruction is designed to be a NOP on * hardware that doesn't need or support it. The REP and REX.W * prefixes are required by the microcode, and they also ensure * that the NOP is unlikely to be used in existing code. * * IBHF is not a valid instruction in 32-bit mode. */ EMIT5(0xF3, 0x48, 0x0F, 0x1E, 0xF8); /* ibhf */ } *pprog = prog; return 0; } static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image, int oldproglen, struct jit_context *ctx, bool jmp_padding) { bool tail_call_reachable = bpf_prog->aux->tail_call_reachable; struct bpf_insn *insn = bpf_prog->insnsi; bool callee_regs_used[4] = {}; int insn_cnt = bpf_prog->len; bool seen_exit = false; u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; void __percpu *priv_frame_ptr = NULL; u64 arena_vm_start, user_vm_start; void __percpu *priv_stack_ptr; int i, excnt = 0; int ilen, proglen = 0; u8 *prog = temp; u32 stack_depth; int err; stack_depth = bpf_prog->aux->stack_depth; priv_stack_ptr = bpf_prog->aux->priv_stack_ptr; if (priv_stack_ptr) { priv_frame_ptr = priv_stack_ptr + PRIV_STACK_GUARD_SZ + round_up(stack_depth, 8); stack_depth = 0; } arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena); user_vm_start = bpf_arena_get_user_vm_start(bpf_prog->aux->arena); detect_reg_usage(insn, insn_cnt, callee_regs_used); emit_prologue(&prog, image, stack_depth, bpf_prog_was_classic(bpf_prog), tail_call_reachable, bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb); /* Exception callback will clobber callee regs for its own use, and * restore the original callee regs from main prog's stack frame. */ if (bpf_prog->aux->exception_boundary) { /* We also need to save r12, which is not mapped to any BPF * register, as we throw after entry into the kernel, which may * overwrite r12. */ push_r12(&prog); push_callee_regs(&prog, all_callee_regs_used); } else { if (arena_vm_start) push_r12(&prog); push_callee_regs(&prog, callee_regs_used); } if (arena_vm_start) emit_mov_imm64(&prog, X86_REG_R12, arena_vm_start >> 32, (u32) arena_vm_start); if (priv_frame_ptr) emit_priv_frame_ptr(&prog, priv_frame_ptr); ilen = prog - temp; if (rw_image) memcpy(rw_image + proglen, temp, ilen); proglen += ilen; addrs[0] = proglen; prog = temp; for (i = 1; i <= insn_cnt; i++, insn++) { const s32 imm32 = insn->imm; u32 dst_reg = insn->dst_reg; u32 src_reg = insn->src_reg; u8 b2 = 0, b3 = 0; u8 *start_of_ldx; s64 jmp_offset; s16 insn_off; u8 jmp_cond; u8 *func; int nops; if (priv_frame_ptr) { if (src_reg == BPF_REG_FP) src_reg = X86_REG_R9; if (dst_reg == BPF_REG_FP) dst_reg = X86_REG_R9; } switch (insn->code) { /* ALU */ case BPF_ALU | BPF_ADD | BPF_X: case BPF_ALU | BPF_SUB | BPF_X: case BPF_ALU | BPF_AND | BPF_X: case BPF_ALU | BPF_OR | BPF_X: case BPF_ALU | BPF_XOR | BPF_X: case BPF_ALU64 | BPF_ADD | BPF_X: case BPF_ALU64 | BPF_SUB | BPF_X: case BPF_ALU64 | BPF_AND | BPF_X: case BPF_ALU64 | BPF_OR | BPF_X: case BPF_ALU64 | BPF_XOR | BPF_X: maybe_emit_mod(&prog, dst_reg, src_reg, BPF_CLASS(insn->code) == BPF_ALU64); b2 = simple_alu_opcodes[BPF_OP(insn->code)]; EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg)); break; case BPF_ALU64 | BPF_MOV | BPF_X: if (insn_is_cast_user(insn)) { if (dst_reg != src_reg) /* 32-bit mov */ emit_mov_reg(&prog, false, dst_reg, src_reg); /* shl dst_reg, 32 */ maybe_emit_1mod(&prog, dst_reg, true); EMIT3(0xC1, add_1reg(0xE0, dst_reg), 32); /* or dst_reg, user_vm_start */ maybe_emit_1mod(&prog, dst_reg, true); if (is_axreg(dst_reg)) EMIT1_off32(0x0D, user_vm_start >> 32); else EMIT2_off32(0x81, add_1reg(0xC8, dst_reg), user_vm_start >> 32); /* rol dst_reg, 32 */ maybe_emit_1mod(&prog, dst_reg, true); EMIT3(0xC1, add_1reg(0xC0, dst_reg), 32); /* xor r11, r11 */ EMIT3(0x4D, 0x31, 0xDB); /* test dst_reg32, dst_reg32; check if lower 32-bit are zero */ maybe_emit_mod(&prog, dst_reg, dst_reg, false); EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg)); /* cmove r11, dst_reg; if so, set dst_reg to zero */ /* WARNING: Intel swapped src/dst register encoding in CMOVcc !!! */ maybe_emit_mod(&prog, AUX_REG, dst_reg, true); EMIT3(0x0F, 0x44, add_2reg(0xC0, AUX_REG, dst_reg)); break; } else if (insn_is_mov_percpu_addr(insn)) { /* mov <dst>, <src> (if necessary) */ EMIT_mov(dst_reg, src_reg); #ifdef CONFIG_SMP /* add <dst>, gs:[<off>] */ EMIT2(0x65, add_1mod(0x48, dst_reg)); EMIT3(0x03, add_2reg(0x04, 0, dst_reg), 0x25); EMIT((u32)(unsigned long)&this_cpu_off, 4); #endif break; } fallthrough; case BPF_ALU | BPF_MOV | BPF_X: if (insn->off == 0) emit_mov_reg(&prog, BPF_CLASS(insn->code) == BPF_ALU64, dst_reg, src_reg); else emit_movsx_reg(&prog, insn->off, BPF_CLASS(insn->code) == BPF_ALU64, dst_reg, src_reg); break; /* neg dst */ case BPF_ALU | BPF_NEG: case BPF_ALU64 | BPF_NEG: maybe_emit_1mod(&prog, dst_reg, BPF_CLASS(insn->code) == BPF_ALU64); EMIT2(0xF7, add_1reg(0xD8, dst_reg)); break; case BPF_ALU | BPF_ADD | BPF_K: case BPF_ALU | BPF_SUB | BPF_K: case BPF_ALU | BPF_AND | BPF_K: case BPF_ALU | BPF_OR | BPF_K: case BPF_ALU | BPF_XOR | BPF_K: case BPF_ALU64 | BPF_ADD | BPF_K: case BPF_ALU64 | BPF_SUB | BPF_K: case BPF_ALU64 | BPF_AND | BPF_K: case BPF_ALU64 | BPF_OR | BPF_K: case BPF_ALU64 | BPF_XOR | BPF_K: maybe_emit_1mod(&prog, dst_reg, BPF_CLASS(insn->code) == BPF_ALU64); /* * b3 holds 'normal' opcode, b2 short form only valid * in case dst is eax/rax. */ switch (BPF_OP(insn->code)) { case BPF_ADD: b3 = 0xC0; b2 = 0x05; break; case BPF_SUB: b3 = 0xE8; b2 = 0x2D; break; case BPF_AND: b3 = 0xE0; b2 = 0x25; break; case BPF_OR: b3 = 0xC8; b2 = 0x0D; break; case BPF_XOR: b3 = 0xF0; b2 = 0x35; break; } if (is_imm8(imm32)) EMIT3(0x83, add_1reg(b3, dst_reg), imm32); else if (is_axreg(dst_reg)) EMIT1_off32(b2, imm32); else EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32); break; case BPF_ALU64 | BPF_MOV | BPF_K: case BPF_ALU | BPF_MOV | BPF_K: emit_mov_imm32(&prog, BPF_CLASS(insn->code) == BPF_ALU64, dst_reg, imm32); break; case BPF_LD | BPF_IMM | BPF_DW: emit_mov_imm64(&prog, dst_reg, insn[1].imm, insn[0].imm); insn++; i++; break; /* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */ case BPF_ALU | BPF_MOD | BPF_X: case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU | BPF_MOD | BPF_K: case BPF_ALU | BPF_DIV | BPF_K: case BPF_ALU64 | BPF_MOD | BPF_X: case BPF_ALU64 | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_K: case BPF_ALU64 | BPF_DIV | BPF_K: { bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; if (dst_reg != BPF_REG_0) EMIT1(0x50); /* push rax */ if (dst_reg != BPF_REG_3) EMIT1(0x52); /* push rdx */ if (BPF_SRC(insn->code) == BPF_X) { if (src_reg == BPF_REG_0 || src_reg == BPF_REG_3) { /* mov r11, src_reg */ EMIT_mov(AUX_REG, src_reg); src_reg = AUX_REG; } } else { /* mov r11, imm32 */ EMIT3_off32(0x49, 0xC7, 0xC3, imm32); src_reg = AUX_REG; } if (dst_reg != BPF_REG_0) /* mov rax, dst_reg */ emit_mov_reg(&prog, is64, BPF_REG_0, dst_reg); if (insn->off == 0) { /* * xor edx, edx * equivalent to 'xor rdx, rdx', but one byte less */ EMIT2(0x31, 0xd2); /* div src_reg */ maybe_emit_1mod(&prog, src_reg, is64); EMIT2(0xF7, add_1reg(0xF0, src_reg)); } else { if (BPF_CLASS(insn->code) == BPF_ALU) EMIT1(0x99); /* cdq */ else EMIT2(0x48, 0x99); /* cqo */ /* idiv src_reg */ maybe_emit_1mod(&prog, src_reg, is64); EMIT2(0xF7, add_1reg(0xF8, src_reg)); } if (BPF_OP(insn->code) == BPF_MOD && dst_reg != BPF_REG_3) /* mov dst_reg, rdx */ emit_mov_reg(&prog, is64, dst_reg, BPF_REG_3); else if (BPF_OP(insn->code) == BPF_DIV && dst_reg != BPF_REG_0) /* mov dst_reg, rax */ emit_mov_reg(&prog, is64, dst_reg, BPF_REG_0); if (dst_reg != BPF_REG_3) EMIT1(0x5A); /* pop rdx */ if (dst_reg != BPF_REG_0) EMIT1(0x58); /* pop rax */ break; } case BPF_ALU | BPF_MUL | BPF_K: case BPF_ALU64 | BPF_MUL | BPF_K: maybe_emit_mod(&prog, dst_reg, dst_reg, BPF_CLASS(insn->code) == BPF_ALU64); if (is_imm8(imm32)) /* imul dst_reg, dst_reg, imm8 */ EMIT3(0x6B, add_2reg(0xC0, dst_reg, dst_reg), imm32); else /* imul dst_reg, dst_reg, imm32 */ EMIT2_off32(0x69, add_2reg(0xC0, dst_reg, dst_reg), imm32); break; case BPF_ALU | BPF_MUL | BPF_X: case BPF_ALU64 | BPF_MUL | BPF_X: maybe_emit_mod(&prog, src_reg, dst_reg, BPF_CLASS(insn->code) == BPF_ALU64); /* imul dst_reg, src_reg */ EMIT3(0x0F, 0xAF, add_2reg(0xC0, src_reg, dst_reg)); break; /* Shifts */ case BPF_ALU | BPF_LSH | BPF_K: case BPF_ALU | BPF_RSH | BPF_K: case BPF_ALU | BPF_ARSH | BPF_K: case BPF_ALU64 | BPF_LSH | BPF_K: case BPF_ALU64 | BPF_RSH | BPF_K: case BPF_ALU64 | BPF_ARSH | BPF_K: maybe_emit_1mod(&prog, dst_reg, BPF_CLASS(insn->code) == BPF_ALU64); b3 = simple_alu_opcodes[BPF_OP(insn->code)]; if (imm32 == 1) EMIT2(0xD1, add_1reg(b3, dst_reg)); else EMIT3(0xC1, add_1reg(b3, dst_reg), imm32); break; case BPF_ALU | BPF_LSH | BPF_X: case BPF_ALU | BPF_RSH | BPF_X: case BPF_ALU | BPF_ARSH | BPF_X: case BPF_ALU64 | BPF_LSH | BPF_X: case BPF_ALU64 | BPF_RSH | BPF_X: case BPF_ALU64 | BPF_ARSH | BPF_X: /* BMI2 shifts aren't better when shift count is already in rcx */ if (boot_cpu_has(X86_FEATURE_BMI2) && src_reg != BPF_REG_4) { /* shrx/sarx/shlx dst_reg, dst_reg, src_reg */ bool w = (BPF_CLASS(insn->code) == BPF_ALU64); u8 op; switch (BPF_OP(insn->code)) { case BPF_LSH: op = 1; /* prefix 0x66 */ break; case BPF_RSH: op = 3; /* prefix 0xf2 */ break; case BPF_ARSH: op = 2; /* prefix 0xf3 */ break; } emit_shiftx(&prog, dst_reg, src_reg, w, op); break; } if (src_reg != BPF_REG_4) { /* common case */ /* Check for bad case when dst_reg == rcx */ if (dst_reg == BPF_REG_4) { /* mov r11, dst_reg */ EMIT_mov(AUX_REG, dst_reg); dst_reg = AUX_REG; } else { EMIT1(0x51); /* push rcx */ } /* mov rcx, src_reg */ EMIT_mov(BPF_REG_4, src_reg); } /* shl %rax, %cl | shr %rax, %cl | sar %rax, %cl */ maybe_emit_1mod(&prog, dst_reg, BPF_CLASS(insn->code) == BPF_ALU64); b3 = simple_alu_opcodes[BPF_OP(insn->code)]; EMIT2(0xD3, add_1reg(b3, dst_reg)); if (src_reg != BPF_REG_4) { if (insn->dst_reg == BPF_REG_4) /* mov dst_reg, r11 */ EMIT_mov(insn->dst_reg, AUX_REG); else EMIT1(0x59); /* pop rcx */ } break; case BPF_ALU | BPF_END | BPF_FROM_BE: case BPF_ALU64 | BPF_END | BPF_FROM_LE: switch (imm32) { case 16: /* Emit 'ror %ax, 8' to swap lower 2 bytes */ EMIT1(0x66); if (is_ereg(dst_reg)) EMIT1(0x41); EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8); /* Emit 'movzwl eax, ax' */ if (is_ereg(dst_reg)) EMIT3(0x45, 0x0F, 0xB7); else EMIT2(0x0F, 0xB7); EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); break; case 32: /* Emit 'bswap eax' to swap lower 4 bytes */ if (is_ereg(dst_reg)) EMIT2(0x41, 0x0F); else EMIT1(0x0F); EMIT1(add_1reg(0xC8, dst_reg)); break; case 64: /* Emit 'bswap rax' to swap 8 bytes */ EMIT3(add_1mod(0x48, dst_reg), 0x0F, add_1reg(0xC8, dst_reg)); break; } break; case BPF_ALU | BPF_END | BPF_FROM_LE: switch (imm32) { case 16: /* * Emit 'movzwl eax, ax' to zero extend 16-bit * into 64 bit */ if (is_ereg(dst_reg)) EMIT3(0x45, 0x0F, 0xB7); else EMIT2(0x0F, 0xB7); EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); break; case 32: /* Emit 'mov eax, eax' to clear upper 32-bits */ if (is_ereg(dst_reg)) EMIT1(0x45); EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg)); break; case 64: /* nop */ break; } break; /* speculation barrier */ case BPF_ST | BPF_NOSPEC: EMIT_LFENCE(); break; /* ST: *(u8*)(dst_reg + off) = imm */ case BPF_ST | BPF_MEM | BPF_B: if (is_ereg(dst_reg)) EMIT2(0x41, 0xC6); else EMIT1(0xC6); goto st; case BPF_ST | BPF_MEM | BPF_H: if (is_ereg(dst_reg)) EMIT3(0x66, 0x41, 0xC7); else EMIT2(0x66, 0xC7); goto st; case BPF_ST | BPF_MEM | BPF_W: if (is_ereg(dst_reg)) EMIT2(0x41, 0xC7); else EMIT1(0xC7); goto st; case BPF_ST | BPF_MEM | BPF_DW: EMIT2(add_1mod(0x48, dst_reg), 0xC7); st: if (is_imm8(insn->off)) EMIT2(add_1reg(0x40, dst_reg), insn->off); else EMIT1_off32(add_1reg(0x80, dst_reg), insn->off); EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(insn->code))); break; /* STX: *(u8*)(dst_reg + off) = src_reg */ case BPF_STX | BPF_MEM | BPF_B: case BPF_STX | BPF_MEM | BPF_H: case BPF_STX | BPF_MEM | BPF_W: case BPF_STX | BPF_MEM | BPF_DW: emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); break; case BPF_ST | BPF_PROBE_MEM32 | BPF_B: case BPF_ST | BPF_PROBE_MEM32 | BPF_H: case BPF_ST | BPF_PROBE_MEM32 | BPF_W: case BPF_ST | BPF_PROBE_MEM32 | BPF_DW: start_of_ldx = prog; emit_st_r12(&prog, BPF_SIZE(insn->code), dst_reg, insn->off, insn->imm); goto populate_extable; /* LDX: dst_reg = *(u8*)(src_reg + r12 + off) */ case BPF_LDX | BPF_PROBE_MEM32 | BPF_B: case BPF_LDX | BPF_PROBE_MEM32 | BPF_H: case BPF_LDX | BPF_PROBE_MEM32 | BPF_W: case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW: case BPF_STX | BPF_PROBE_MEM32 | BPF_B: case BPF_STX | BPF_PROBE_MEM32 | BPF_H: case BPF_STX | BPF_PROBE_MEM32 | BPF_W: case BPF_STX | BPF_PROBE_MEM32 | BPF_DW: start_of_ldx = prog; if (BPF_CLASS(insn->code) == BPF_LDX) emit_ldx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); else emit_stx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); populate_extable: { struct exception_table_entry *ex; u8 *_insn = image + proglen + (start_of_ldx - temp); s64 delta; if (!bpf_prog->aux->extable) break; if (excnt >= bpf_prog->aux->num_exentries) { pr_err("mem32 extable bug\n"); return -EFAULT; } ex = &bpf_prog->aux->extable[excnt++]; delta = _insn - (u8 *)&ex->insn; /* switch ex to rw buffer for writes */ ex = (void *)rw_image + ((void *)ex - (void *)image); ex->insn = delta; ex->data = EX_TYPE_BPF; ex->fixup = (prog - start_of_ldx) | ((BPF_CLASS(insn->code) == BPF_LDX ? reg2pt_regs[dst_reg] : DONT_CLEAR) << 8); } break; /* LDX: dst_reg = *(u8*)(src_reg + off) */ case BPF_LDX | BPF_MEM | BPF_B: case BPF_LDX | BPF_PROBE_MEM | BPF_B: case BPF_LDX | BPF_MEM | BPF_H: case BPF_LDX | BPF_PROBE_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_W: case BPF_LDX | BPF_PROBE_MEM | BPF_W: case BPF_LDX | BPF_MEM | BPF_DW: case BPF_LDX | BPF_PROBE_MEM | BPF_DW: /* LDXS: dst_reg = *(s8*)(src_reg + off) */ case BPF_LDX | BPF_MEMSX | BPF_B: case BPF_LDX | BPF_MEMSX | BPF_H: case BPF_LDX | BPF_MEMSX | BPF_W: case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: insn_off = insn->off; if (BPF_MODE(insn->code) == BPF_PROBE_MEM || BPF_MODE(insn->code) == BPF_PROBE_MEMSX) { /* Conservatively check that src_reg + insn->off is a kernel address: * src_reg + insn->off > TASK_SIZE_MAX + PAGE_SIZE * and * src_reg + insn->off < VSYSCALL_ADDR */ u64 limit = TASK_SIZE_MAX + PAGE_SIZE - VSYSCALL_ADDR; u8 *end_of_jmp; /* movabsq r10, VSYSCALL_ADDR */ emit_mov_imm64(&prog, BPF_REG_AX, (long)VSYSCALL_ADDR >> 32, (u32)(long)VSYSCALL_ADDR); /* mov src_reg, r11 */ EMIT_mov(AUX_REG, src_reg); if (insn->off) { /* add r11, insn->off */ maybe_emit_1mod(&prog, AUX_REG, true); EMIT2_off32(0x81, add_1reg(0xC0, AUX_REG), insn->off); } /* sub r11, r10 */ maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true); EMIT2(0x29, add_2reg(0xC0, AUX_REG, BPF_REG_AX)); /* movabsq r10, limit */ emit_mov_imm64(&prog, BPF_REG_AX, (long)limit >> 32, (u32)(long)limit); /* cmp r10, r11 */ maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true); EMIT2(0x39, add_2reg(0xC0, AUX_REG, BPF_REG_AX)); /* if unsigned '>', goto load */ EMIT2(X86_JA, 0); end_of_jmp = prog; /* xor dst_reg, dst_reg */ emit_mov_imm32(&prog, false, dst_reg, 0); /* jmp byte_after_ldx */ EMIT2(0xEB, 0); /* populate jmp_offset for JAE above to jump to start_of_ldx */ start_of_ldx = prog; end_of_jmp[-1] = start_of_ldx - end_of_jmp; } if (BPF_MODE(insn->code) == BPF_PROBE_MEMSX || BPF_MODE(insn->code) == BPF_MEMSX) emit_ldsx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn_off); else emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn_off); if (BPF_MODE(insn->code) == BPF_PROBE_MEM || BPF_MODE(insn->code) == BPF_PROBE_MEMSX) { struct exception_table_entry *ex; u8 *_insn = image + proglen + (start_of_ldx - temp); s64 delta; /* populate jmp_offset for JMP above */ start_of_ldx[-1] = prog - start_of_ldx; if (!bpf_prog->aux->extable) break; if (excnt >= bpf_prog->aux->num_exentries) { pr_err("ex gen bug\n"); return -EFAULT; } ex = &bpf_prog->aux->extable[excnt++]; delta = _insn - (u8 *)&ex->insn; if (!is_simm32(delta)) { pr_err("extable->insn doesn't fit into 32-bit\n"); return -EFAULT; } /* switch ex to rw buffer for writes */ ex = (void *)rw_image + ((void *)ex - (void *)image); ex->insn = delta; ex->data = EX_TYPE_BPF; if (dst_reg > BPF_REG_9) { pr_err("verifier error\n"); return -EFAULT; } /* * Compute size of x86 insn and its target dest x86 register. * ex_handler_bpf() will use lower 8 bits to adjust * pt_regs->ip to jump over this x86 instruction * and upper bits to figure out which pt_regs to zero out. * End result: x86 insn "mov rbx, qword ptr [rax+0x14]" * of 4 bytes will be ignored and rbx will be zero inited. */ ex->fixup = (prog - start_of_ldx) | (reg2pt_regs[dst_reg] << 8); } break; case BPF_STX | BPF_ATOMIC | BPF_B: case BPF_STX | BPF_ATOMIC | BPF_H: if (!bpf_atomic_is_load_store(insn)) { pr_err("bpf_jit: 1- and 2-byte RMW atomics are not supported\n"); return -EFAULT; } fallthrough; case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: if (insn->imm == (BPF_AND | BPF_FETCH) || insn->imm == (BPF_OR | BPF_FETCH) || insn->imm == (BPF_XOR | BPF_FETCH)) { bool is64 = BPF_SIZE(insn->code) == BPF_DW; u32 real_src_reg = src_reg; u32 real_dst_reg = dst_reg; u8 *branch_target; /* * Can't be implemented with a single x86 insn. * Need to do a CMPXCHG loop. */ /* Will need RAX as a CMPXCHG operand so save R0 */ emit_mov_reg(&prog, true, BPF_REG_AX, BPF_REG_0); if (src_reg == BPF_REG_0) real_src_reg = BPF_REG_AX; if (dst_reg == BPF_REG_0) real_dst_reg = BPF_REG_AX; branch_target = prog; /* Load old value */ emit_ldx(&prog, BPF_SIZE(insn->code), BPF_REG_0, real_dst_reg, insn->off); /* * Perform the (commutative) operation locally, * put the result in the AUX_REG. */ emit_mov_reg(&prog, is64, AUX_REG, BPF_REG_0); maybe_emit_mod(&prog, AUX_REG, real_src_reg, is64); EMIT2(simple_alu_opcodes[BPF_OP(insn->imm)], add_2reg(0xC0, AUX_REG, real_src_reg)); /* Attempt to swap in new value */ err = emit_atomic_rmw(&prog, BPF_CMPXCHG, real_dst_reg, AUX_REG, insn->off, BPF_SIZE(insn->code)); if (WARN_ON(err)) return err; /* * ZF tells us whether we won the race. If it's * cleared we need to try again. */ EMIT2(X86_JNE, -(prog - branch_target) - 2); /* Return the pre-modification value */ emit_mov_reg(&prog, is64, real_src_reg, BPF_REG_0); /* Restore R0 after clobbering RAX */ emit_mov_reg(&prog, true, BPF_REG_0, BPF_REG_AX); break; } if (bpf_atomic_is_load_store(insn)) err = emit_atomic_ld_st(&prog, insn->imm, dst_reg, src_reg, insn->off, BPF_SIZE(insn->code)); else err = emit_atomic_rmw(&prog, insn->imm, dst_reg, src_reg, insn->off, BPF_SIZE(insn->code)); if (err) return err; break; case BPF_STX | BPF_PROBE_ATOMIC | BPF_B: case BPF_STX | BPF_PROBE_ATOMIC | BPF_H: if (!bpf_atomic_is_load_store(insn)) { pr_err("bpf_jit: 1- and 2-byte RMW atomics are not supported\n"); return -EFAULT; } fallthrough; case BPF_STX | BPF_PROBE_ATOMIC | BPF_W: case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW: start_of_ldx = prog; if (bpf_atomic_is_load_store(insn)) err = emit_atomic_ld_st_index(&prog, insn->imm, BPF_SIZE(insn->code), dst_reg, src_reg, X86_REG_R12, insn->off); else err = emit_atomic_rmw_index(&prog, insn->imm, BPF_SIZE(insn->code), dst_reg, src_reg, X86_REG_R12, insn->off); if (err) return err; goto populate_extable; /* call */ case BPF_JMP | BPF_CALL: { u8 *ip = image + addrs[i - 1]; func = (u8 *) __bpf_call_base + imm32; if (src_reg == BPF_PSEUDO_CALL && tail_call_reachable) { LOAD_TAIL_CALL_CNT_PTR(stack_depth); ip += 7; } if (!imm32) return -EINVAL; if (priv_frame_ptr) { push_r9(&prog); ip += 2; } ip += x86_call_depth_emit_accounting(&prog, func, ip); if (emit_call(&prog, func, ip)) return -EINVAL; if (priv_frame_ptr) pop_r9(&prog); break; } case BPF_JMP | BPF_TAIL_CALL: if (imm32) emit_bpf_tail_call_direct(bpf_prog, &bpf_prog->aux->poke_tab[imm32 - 1], &prog, image + addrs[i - 1], callee_regs_used, stack_depth, ctx); else emit_bpf_tail_call_indirect(bpf_prog, &prog, callee_regs_used, stack_depth, image + addrs[i - 1], ctx); break; /* cond jump */ case BPF_JMP | BPF_JEQ | BPF_X: case BPF_JMP | BPF_JNE | BPF_X: case BPF_JMP | BPF_JGT | BPF_X: case BPF_JMP | BPF_JLT | BPF_X: case BPF_JMP | BPF_JGE | BPF_X: case BPF_JMP | BPF_JLE | BPF_X: case BPF_JMP | BPF_JSGT | BPF_X: case BPF_JMP | BPF_JSLT | BPF_X: case BPF_JMP | BPF_JSGE | BPF_X: case BPF_JMP | BPF_JSLE | BPF_X: case BPF_JMP32 | BPF_JEQ | BPF_X: case BPF_JMP32 | BPF_JNE | BPF_X: case BPF_JMP32 | BPF_JGT | BPF_X: case BPF_JMP32 | BPF_JLT | BPF_X: case BPF_JMP32 | BPF_JGE | BPF_X: case BPF_JMP32 | BPF_JLE | BPF_X: case BPF_JMP32 | BPF_JSGT | BPF_X: case BPF_JMP32 | BPF_JSLT | BPF_X: case BPF_JMP32 | BPF_JSGE | BPF_X: case BPF_JMP32 | BPF_JSLE | BPF_X: /* cmp dst_reg, src_reg */ maybe_emit_mod(&prog, dst_reg, src_reg, BPF_CLASS(insn->code) == BPF_JMP); EMIT2(0x39, add_2reg(0xC0, dst_reg, src_reg)); goto emit_cond_jmp; case BPF_JMP | BPF_JSET | BPF_X: case BPF_JMP32 | BPF_JSET | BPF_X: /* test dst_reg, src_reg */ maybe_emit_mod(&prog, dst_reg, src_reg, BPF_CLASS(insn->code) == BPF_JMP); EMIT2(0x85, add_2reg(0xC0, dst_reg, src_reg)); goto emit_cond_jmp; case BPF_JMP | BPF_JSET | BPF_K: case BPF_JMP32 | BPF_JSET | BPF_K: /* test dst_reg, imm32 */ maybe_emit_1mod(&prog, dst_reg, BPF_CLASS(insn->code) == BPF_JMP); EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32); goto emit_cond_jmp; case BPF_JMP | BPF_JEQ | BPF_K: case BPF_JMP | BPF_JNE | BPF_K: case BPF_JMP | BPF_JGT | BPF_K: case BPF_JMP | BPF_JLT | BPF_K: case BPF_JMP | BPF_JGE | BPF_K: case BPF_JMP | BPF_JLE | BPF_K: case BPF_JMP | BPF_JSGT | BPF_K: case BPF_JMP | BPF_JSLT | BPF_K: case BPF_JMP | BPF_JSGE | BPF_K: case BPF_JMP | BPF_JSLE | BPF_K: case BPF_JMP32 | BPF_JEQ | BPF_K: case BPF_JMP32 | BPF_JNE | BPF_K: case BPF_JMP32 | BPF_JGT | BPF_K: case BPF_JMP32 | BPF_JLT | BPF_K: case BPF_JMP32 | BPF_JGE | BPF_K: case BPF_JMP32 | BPF_JLE | BPF_K: case BPF_JMP32 | BPF_JSGT | BPF_K: case BPF_JMP32 | BPF_JSLT | BPF_K: case BPF_JMP32 | BPF_JSGE | BPF_K: case BPF_JMP32 | BPF_JSLE | BPF_K: /* test dst_reg, dst_reg to save one extra byte */ if (imm32 == 0) { maybe_emit_mod(&prog, dst_reg, dst_reg, BPF_CLASS(insn->code) == BPF_JMP); EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg)); goto emit_cond_jmp; } /* cmp dst_reg, imm8/32 */ maybe_emit_1mod(&prog, dst_reg, BPF_CLASS(insn->code) == BPF_JMP); if (is_imm8(imm32)) EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32); else EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32); emit_cond_jmp: /* Convert BPF opcode to x86 */ switch (BPF_OP(insn->code)) { case BPF_JEQ: jmp_cond = X86_JE; break; case BPF_JSET: case BPF_JNE: jmp_cond = X86_JNE; break; case BPF_JGT: /* GT is unsigned '>', JA in x86 */ jmp_cond = X86_JA; break; case BPF_JLT: /* LT is unsigned '<', JB in x86 */ jmp_cond = X86_JB; break; case BPF_JGE: /* GE is unsigned '>=', JAE in x86 */ jmp_cond = X86_JAE; break; case BPF_JLE: /* LE is unsigned '<=', JBE in x86 */ jmp_cond = X86_JBE; break; case BPF_JSGT: /* Signed '>', GT in x86 */ jmp_cond = X86_JG; break; case BPF_JSLT: /* Signed '<', LT in x86 */ jmp_cond = X86_JL; break; case BPF_JSGE: /* Signed '>=', GE in x86 */ jmp_cond = X86_JGE; break; case BPF_JSLE: /* Signed '<=', LE in x86 */ jmp_cond = X86_JLE; break; default: /* to silence GCC warning */ return -EFAULT; } jmp_offset = addrs[i + insn->off] - addrs[i]; if (is_imm8_jmp_offset(jmp_offset)) { if (jmp_padding) { /* To keep the jmp_offset valid, the extra bytes are * padded before the jump insn, so we subtract the * 2 bytes of jmp_cond insn from INSN_SZ_DIFF. * * If the previous pass already emits an imm8 * jmp_cond, then this BPF insn won't shrink, so * "nops" is 0. * * On the other hand, if the previous pass emits an * imm32 jmp_cond, the extra 4 bytes(*) is padded to * keep the image from shrinking further. * * (*) imm32 jmp_cond is 6 bytes, and imm8 jmp_cond * is 2 bytes, so the size difference is 4 bytes. */ nops = INSN_SZ_DIFF - 2; if (nops != 0 && nops != 4) { pr_err("unexpected jmp_cond padding: %d bytes\n", nops); return -EFAULT; } emit_nops(&prog, nops); } EMIT2(jmp_cond, jmp_offset); } else if (is_simm32(jmp_offset)) { EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset); } else { pr_err("cond_jmp gen bug %llx\n", jmp_offset); return -EFAULT; } break; case BPF_JMP | BPF_JA: case BPF_JMP32 | BPF_JA: if (BPF_CLASS(insn->code) == BPF_JMP) { if (insn->off == -1) /* -1 jmp instructions will always jump * backwards two bytes. Explicitly handling * this case avoids wasting too many passes * when there are long sequences of replaced * dead code. */ jmp_offset = -2; else jmp_offset = addrs[i + insn->off] - addrs[i]; } else { if (insn->imm == -1) jmp_offset = -2; else jmp_offset = addrs[i + insn->imm] - addrs[i]; } if (!jmp_offset) { /* * If jmp_padding is enabled, the extra nops will * be inserted. Otherwise, optimize out nop jumps. */ if (jmp_padding) { /* There are 3 possible conditions. * (1) This BPF_JA is already optimized out in * the previous run, so there is no need * to pad any extra byte (0 byte). * (2) The previous pass emits an imm8 jmp, * so we pad 2 bytes to match the previous * insn size. * (3) Similarly, the previous pass emits an * imm32 jmp, and 5 bytes is padded. */ nops = INSN_SZ_DIFF; if (nops != 0 && nops != 2 && nops != 5) { pr_err("unexpected nop jump padding: %d bytes\n", nops); return -EFAULT; } emit_nops(&prog, nops); } break; } emit_jmp: if (is_imm8_jmp_offset(jmp_offset)) { if (jmp_padding) { /* To avoid breaking jmp_offset, the extra bytes * are padded before the actual jmp insn, so * 2 bytes is subtracted from INSN_SZ_DIFF. * * If the previous pass already emits an imm8 * jmp, there is nothing to pad (0 byte). * * If it emits an imm32 jmp (5 bytes) previously * and now an imm8 jmp (2 bytes), then we pad * (5 - 2 = 3) bytes to stop the image from * shrinking further. */ nops = INSN_SZ_DIFF - 2; if (nops != 0 && nops != 3) { pr_err("unexpected jump padding: %d bytes\n", nops); return -EFAULT; } emit_nops(&prog, INSN_SZ_DIFF - 2); } EMIT2(0xEB, jmp_offset); } else if (is_simm32(jmp_offset)) { EMIT1_off32(0xE9, jmp_offset); } else { pr_err("jmp gen bug %llx\n", jmp_offset); return -EFAULT; } break; case BPF_JMP | BPF_EXIT: if (seen_exit) { jmp_offset = ctx->cleanup_addr - addrs[i]; goto emit_jmp; } seen_exit = true; /* Update cleanup_addr */ ctx->cleanup_addr = proglen; if (bpf_prog_was_classic(bpf_prog) && !capable(CAP_SYS_ADMIN)) { u8 *ip = image + addrs[i - 1]; if (emit_spectre_bhb_barrier(&prog, ip, bpf_prog)) return -EINVAL; } if (bpf_prog->aux->exception_boundary) { pop_callee_regs(&prog, all_callee_regs_used); pop_r12(&prog); } else { pop_callee_regs(&prog, callee_regs_used); if (arena_vm_start) pop_r12(&prog); } EMIT1(0xC9); /* leave */ emit_return(&prog, image + addrs[i - 1] + (prog - temp)); break; default: /* * By design x86-64 JIT should support all BPF instructions. * This error will be seen if new instruction was added * to the interpreter, but not to the JIT, or if there is * junk in bpf_prog. */ pr_err("bpf_jit: unknown opcode %02x\n", insn->code); return -EINVAL; } ilen = prog - temp; if (ilen > BPF_MAX_INSN_SIZE) { pr_err("bpf_jit: fatal insn size error\n"); return -EFAULT; } if (image) { /* * When populating the image, assert that: * * i) We do not write beyond the allocated space, and * ii) addrs[i] did not change from the prior run, in order * to validate assumptions made for computing branch * displacements. */ if (unlikely(proglen + ilen > oldproglen || proglen + ilen != addrs[i])) { pr_err("bpf_jit: fatal error\n"); return -EFAULT; } memcpy(rw_image + proglen, temp, ilen); } proglen += ilen; addrs[i] = proglen; prog = temp; } if (image && excnt != bpf_prog->aux->num_exentries) { pr_err("extable is not populated\n"); return -EFAULT; } return proglen; } static void clean_stack_garbage(const struct btf_func_model *m, u8 **pprog, int nr_stack_slots, int stack_size) { int arg_size, off; u8 *prog; /* Generally speaking, the compiler will pass the arguments * on-stack with "push" instruction, which will take 8-byte * on the stack. In this case, there won't be garbage values * while we copy the arguments from origin stack frame to current * in BPF_DW. * * However, sometimes the compiler will only allocate 4-byte on * the stack for the arguments. For now, this case will only * happen if there is only one argument on-stack and its size * not more than 4 byte. In this case, there will be garbage * values on the upper 4-byte where we store the argument on * current stack frame. * * arguments on origin stack: * * stack_arg_1(4-byte) xxx(4-byte) * * what we copy: * * stack_arg_1(8-byte): stack_arg_1(origin) xxx * * and the xxx is the garbage values which we should clean here. */ if (nr_stack_slots != 1) return; /* the size of the last argument */ arg_size = m->arg_size[m->nr_args - 1]; if (arg_size <= 4) { off = -(stack_size - 4); prog = *pprog; /* mov DWORD PTR [rbp + off], 0 */ if (!is_imm8(off)) EMIT2_off32(0xC7, 0x85, off); else EMIT3(0xC7, 0x45, off); EMIT(0, 4); *pprog = prog; } } /* get the count of the regs that are used to pass arguments */ static int get_nr_used_regs(const struct btf_func_model *m) { int i, arg_regs, nr_used_regs = 0; for (i = 0; i < min_t(int, m->nr_args, MAX_BPF_FUNC_ARGS); i++) { arg_regs = (m->arg_size[i] + 7) / 8; if (nr_used_regs + arg_regs <= 6) nr_used_regs += arg_regs; if (nr_used_regs >= 6) break; } return nr_used_regs; } static void save_args(const struct btf_func_model *m, u8 **prog, int stack_size, bool for_call_origin) { int arg_regs, first_off = 0, nr_regs = 0, nr_stack_slots = 0; int i, j; /* Store function arguments to stack. * For a function that accepts two pointers the sequence will be: * mov QWORD PTR [rbp-0x10],rdi * mov QWORD PTR [rbp-0x8],rsi */ for (i = 0; i < min_t(int, m->nr_args, MAX_BPF_FUNC_ARGS); i++) { arg_regs = (m->arg_size[i] + 7) / 8; /* According to the research of Yonghong, struct members * should be all in register or all on the stack. * Meanwhile, the compiler will pass the argument on regs * if the remaining regs can hold the argument. * * Disorder of the args can happen. For example: * * struct foo_struct { * long a; * int b; * }; * int foo(char, char, char, char, char, struct foo_struct, * char); * * the arg1-5,arg7 will be passed by regs, and arg6 will * by stack. */ if (nr_regs + arg_regs > 6) { /* copy function arguments from origin stack frame * into current stack frame. * * The starting address of the arguments on-stack * is: * rbp + 8(push rbp) + * 8(return addr of origin call) + * 8(return addr of the caller) * which means: rbp + 24 */ for (j = 0; j < arg_regs; j++) { emit_ldx(prog, BPF_DW, BPF_REG_0, BPF_REG_FP, nr_stack_slots * 8 + 0x18); emit_stx(prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -stack_size); if (!nr_stack_slots) first_off = stack_size; stack_size -= 8; nr_stack_slots++; } } else { /* Only copy the arguments on-stack to current * 'stack_size' and ignore the regs, used to * prepare the arguments on-stack for origin call. */ if (for_call_origin) { nr_regs += arg_regs; continue; } /* copy the arguments from regs into stack */ for (j = 0; j < arg_regs; j++) { emit_stx(prog, BPF_DW, BPF_REG_FP, nr_regs == 5 ? X86_REG_R9 : BPF_REG_1 + nr_regs, -stack_size); stack_size -= 8; nr_regs++; } } } clean_stack_garbage(m, prog, nr_stack_slots, first_off); } static void restore_regs(const struct btf_func_model *m, u8 **prog, int stack_size) { int i, j, arg_regs, nr_regs = 0; /* Restore function arguments from stack. * For a function that accepts two pointers the sequence will be: * EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10] * EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8] * * The logic here is similar to what we do in save_args() */ for (i = 0; i < min_t(int, m->nr_args, MAX_BPF_FUNC_ARGS); i++) { arg_regs = (m->arg_size[i] + 7) / 8; if (nr_regs + arg_regs <= 6) { for (j = 0; j < arg_regs; j++) { emit_ldx(prog, BPF_DW, nr_regs == 5 ? X86_REG_R9 : BPF_REG_1 + nr_regs, BPF_REG_FP, -stack_size); stack_size -= 8; nr_regs++; } } else { stack_size -= 8 * arg_regs; } if (nr_regs >= 6) break; } } static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, struct bpf_tramp_link *l, int stack_size, int run_ctx_off, bool save_ret, void *image, void *rw_image) { u8 *prog = *pprog; u8 *jmp_insn; int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); struct bpf_prog *p = l->link.prog; u64 cookie = l->cookie; /* mov rdi, cookie */ emit_mov_imm64(&prog, BPF_REG_1, (long) cookie >> 32, (u32) (long) cookie); /* Prepare struct bpf_tramp_run_ctx. * * bpf_tramp_run_ctx is already preserved by * arch_prepare_bpf_trampoline(). * * mov QWORD PTR [rbp - run_ctx_off + ctx_cookie_off], rdi */ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_1, -run_ctx_off + ctx_cookie_off); /* arg1: mov rdi, progs[i] */ emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); /* arg2: lea rsi, [rbp - ctx_cookie_off] */ if (!is_imm8(-run_ctx_off)) EMIT3_off32(0x48, 0x8D, 0xB5, -run_ctx_off); else EMIT4(0x48, 0x8D, 0x75, -run_ctx_off); if (emit_rsb_call(&prog, bpf_trampoline_enter(p), image + (prog - (u8 *)rw_image))) return -EINVAL; /* remember prog start time returned by __bpf_prog_enter */ emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0); /* if (__bpf_prog_enter*(prog) == 0) * goto skip_exec_of_prog; */ EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ /* emit 2 nops that will be replaced with JE insn */ jmp_insn = prog; emit_nops(&prog, 2); /* arg1: lea rdi, [rbp - stack_size] */ if (!is_imm8(-stack_size)) EMIT3_off32(0x48, 0x8D, 0xBD, -stack_size); else EMIT4(0x48, 0x8D, 0x7D, -stack_size); /* arg2: progs[i]->insnsi for interpreter */ if (!p->jited) emit_mov_imm64(&prog, BPF_REG_2, (long) p->insnsi >> 32, (u32) (long) p->insnsi); /* call JITed bpf program or interpreter */ if (emit_rsb_call(&prog, p->bpf_func, image + (prog - (u8 *)rw_image))) return -EINVAL; /* * BPF_TRAMP_MODIFY_RETURN trampolines can modify the return * of the previous call which is then passed on the stack to * the next BPF program. * * BPF_TRAMP_FENTRY trampoline may need to return the return * value of BPF_PROG_TYPE_STRUCT_OPS prog. */ if (save_ret) emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); /* replace 2 nops with JE insn, since jmp target is known */ jmp_insn[0] = X86_JE; jmp_insn[1] = prog - jmp_insn - 2; /* arg1: mov rdi, progs[i] */ emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); /* arg2: mov rsi, rbx <- start time in nsec */ emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); /* arg3: lea rdx, [rbp - run_ctx_off] */ if (!is_imm8(-run_ctx_off)) EMIT3_off32(0x48, 0x8D, 0x95, -run_ctx_off); else EMIT4(0x48, 0x8D, 0x55, -run_ctx_off); if (emit_rsb_call(&prog, bpf_trampoline_exit(p), image + (prog - (u8 *)rw_image))) return -EINVAL; *pprog = prog; return 0; } static void emit_align(u8 **pprog, u32 align) { u8 *target, *prog = *pprog; target = PTR_ALIGN(prog, align); if (target != prog) emit_nops(&prog, target - prog); *pprog = prog; } static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) { u8 *prog = *pprog; s64 offset; offset = func - (ip + 2 + 4); if (!is_simm32(offset)) { pr_err("Target %p is out of range\n", func); return -EINVAL; } EMIT2_off32(0x0F, jmp_cond + 0x10, offset); *pprog = prog; return 0; } static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, struct bpf_tramp_links *tl, int stack_size, int run_ctx_off, bool save_ret, void *image, void *rw_image) { int i; u8 *prog = *pprog; for (i = 0; i < tl->nr_links; i++) { if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, save_ret, image, rw_image)) return -EINVAL; } *pprog = prog; return 0; } static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, struct bpf_tramp_links *tl, int stack_size, int run_ctx_off, u8 **branches, void *image, void *rw_image) { u8 *prog = *pprog; int i; /* The first fmod_ret program will receive a garbage return value. * Set this to 0 to avoid confusing the program. */ emit_mov_imm32(&prog, false, BPF_REG_0, 0); emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); for (i = 0; i < tl->nr_links; i++) { if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true, image, rw_image)) return -EINVAL; /* mod_ret prog stored return value into [rbp - 8]. Emit: * if (*(u64 *)(rbp - 8) != 0) * goto do_fexit; */ /* cmp QWORD PTR [rbp - 0x8], 0x0 */ EMIT4(0x48, 0x83, 0x7d, 0xf8); EMIT1(0x00); /* Save the location of the branch and Generate 6 nops * (4 bytes for an offset and 2 bytes for the jump) These nops * are replaced with a conditional jump once do_fexit (i.e. the * start of the fexit invocation) is finalized. */ branches[i] = prog; emit_nops(&prog, 4 + 2); } *pprog = prog; return 0; } /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */ #define LOAD_TRAMP_TAIL_CALL_CNT_PTR(stack) \ __LOAD_TCC_PTR(-round_up(stack, 8) - 8) /* Example: * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); * its 'struct btf_func_model' will be nr_args=2 * The assembly code when eth_type_trans is executing after trampoline: * * push rbp * mov rbp, rsp * sub rsp, 16 // space for skb and dev * push rbx // temp regs to pass start time * mov qword ptr [rbp - 16], rdi // save skb pointer to stack * mov qword ptr [rbp - 8], rsi // save dev pointer to stack * call __bpf_prog_enter // rcu_read_lock and preempt_disable * mov rbx, rax // remember start time in bpf stats are enabled * lea rdi, [rbp - 16] // R1==ctx of bpf prog * call addr_of_jited_FENTRY_prog * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off * mov rsi, rbx // prog start time * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math * mov rdi, qword ptr [rbp - 16] // restore skb pointer from stack * mov rsi, qword ptr [rbp - 8] // restore dev pointer from stack * pop rbx * leave * ret * * eth_type_trans has 5 byte nop at the beginning. These 5 bytes will be * replaced with 'call generated_bpf_trampoline'. When it returns * eth_type_trans will continue executing with original skb and dev pointers. * * The assembly code when eth_type_trans is called from trampoline: * * push rbp * mov rbp, rsp * sub rsp, 24 // space for skb, dev, return value * push rbx // temp regs to pass start time * mov qword ptr [rbp - 24], rdi // save skb pointer to stack * mov qword ptr [rbp - 16], rsi // save dev pointer to stack * call __bpf_prog_enter // rcu_read_lock and preempt_disable * mov rbx, rax // remember start time if bpf stats are enabled * lea rdi, [rbp - 24] // R1==ctx of bpf prog * call addr_of_jited_FENTRY_prog // bpf prog can access skb and dev * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off * mov rsi, rbx // prog start time * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math * mov rdi, qword ptr [rbp - 24] // restore skb pointer from stack * mov rsi, qword ptr [rbp - 16] // restore dev pointer from stack * call eth_type_trans+5 // execute body of eth_type_trans * mov qword ptr [rbp - 8], rax // save return value * call __bpf_prog_enter // rcu_read_lock and preempt_disable * mov rbx, rax // remember start time in bpf stats are enabled * lea rdi, [rbp - 24] // R1==ctx of bpf prog * call addr_of_jited_FEXIT_prog // bpf prog can access skb, dev, return value * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off * mov rsi, rbx // prog start time * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math * mov rax, qword ptr [rbp - 8] // restore eth_type_trans's return value * pop rbx * leave * add rsp, 8 // skip eth_type_trans's frame * ret // return to its caller */ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_image, void *rw_image_end, void *image, const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, void *func_addr) { int i, ret, nr_regs = m->nr_args, stack_size = 0; int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off; struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; void *orig_call = func_addr; u8 **branches = NULL; u8 *prog; bool save_ret; /* * F_INDIRECT is only compatible with F_RET_FENTRY_RET, it is * explicitly incompatible with F_CALL_ORIG | F_SKIP_FRAME | F_IP_ARG * because @func_addr. */ WARN_ON_ONCE((flags & BPF_TRAMP_F_INDIRECT) && (flags & ~(BPF_TRAMP_F_INDIRECT | BPF_TRAMP_F_RET_FENTRY_RET))); /* extra registers for struct arguments */ for (i = 0; i < m->nr_args; i++) { if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) nr_regs += (m->arg_size[i] + 7) / 8 - 1; } /* x86-64 supports up to MAX_BPF_FUNC_ARGS arguments. 1-6 * are passed through regs, the remains are through stack. */ if (nr_regs > MAX_BPF_FUNC_ARGS) return -ENOTSUPP; /* Generated trampoline stack layout: * * RBP + 8 [ return address ] * RBP + 0 [ RBP ] * * RBP - 8 [ return value ] BPF_TRAMP_F_CALL_ORIG or * BPF_TRAMP_F_RET_FENTRY_RET flags * * [ reg_argN ] always * [ ... ] * RBP - regs_off [ reg_arg1 ] program's ctx pointer * * RBP - nregs_off [ regs count ] always * * RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag * * RBP - rbx_off [ rbx value ] always * * RBP - run_ctx_off [ bpf_tramp_run_ctx ] * * [ stack_argN ] BPF_TRAMP_F_CALL_ORIG * [ ... ] * [ stack_arg2 ] * RBP - arg_stack_off [ stack_arg1 ] * RSP [ tail_call_cnt_ptr ] BPF_TRAMP_F_TAIL_CALL_CTX */ /* room for return value of orig_call or fentry prog */ save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); if (save_ret) stack_size += 8; stack_size += nr_regs * 8; regs_off = stack_size; /* regs count */ stack_size += 8; nregs_off = stack_size; if (flags & BPF_TRAMP_F_IP_ARG) stack_size += 8; /* room for IP address argument */ ip_off = stack_size; stack_size += 8; rbx_off = stack_size; stack_size += (sizeof(struct bpf_tramp_run_ctx) + 7) & ~0x7; run_ctx_off = stack_size; if (nr_regs > 6 && (flags & BPF_TRAMP_F_CALL_ORIG)) { /* the space that used to pass arguments on-stack */ stack_size += (nr_regs - get_nr_used_regs(m)) * 8; /* make sure the stack pointer is 16-byte aligned if we * need pass arguments on stack, which means * [stack_size + 8(rbp) + 8(rip) + 8(origin rip)] * should be 16-byte aligned. Following code depend on * that stack_size is already 8-byte aligned. */ stack_size += (stack_size % 16) ? 0 : 8; } arg_stack_off = stack_size; if (flags & BPF_TRAMP_F_SKIP_FRAME) { /* skip patched call instruction and point orig_call to actual * body of the kernel function. */ if (is_endbr(orig_call)) orig_call += ENDBR_INSN_SIZE; orig_call += X86_PATCH_SIZE; } prog = rw_image; if (flags & BPF_TRAMP_F_INDIRECT) { /* * Indirect call for bpf_struct_ops */ emit_cfi(&prog, image, cfi_get_func_hash(func_addr), cfi_get_func_arity(func_addr)); } else { /* * Direct-call fentry stub, as such it needs accounting for the * __fentry__ call. */ x86_call_depth_emit_accounting(&prog, NULL, image); } EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ if (!is_imm8(stack_size)) { /* sub rsp, stack_size */ EMIT3_off32(0x48, 0x81, 0xEC, stack_size); } else { /* sub rsp, stack_size */ EMIT4(0x48, 0x83, 0xEC, stack_size); } if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) EMIT1(0x50); /* push rax */ /* mov QWORD PTR [rbp - rbx_off], rbx */ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_6, -rbx_off); /* Store number of argument registers of the traced function: * mov rax, nr_regs * mov QWORD PTR [rbp - nregs_off], rax */ emit_mov_imm64(&prog, BPF_REG_0, 0, (u32) nr_regs); emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -nregs_off); if (flags & BPF_TRAMP_F_IP_ARG) { /* Store IP address of the traced function: * movabsq rax, func_addr * mov QWORD PTR [rbp - ip_off], rax */ emit_mov_imm64(&prog, BPF_REG_0, (long) func_addr >> 32, (u32) (long) func_addr); emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -ip_off); } save_args(m, &prog, regs_off, false); if (flags & BPF_TRAMP_F_CALL_ORIG) { /* arg1: mov rdi, im */ emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im); if (emit_rsb_call(&prog, __bpf_tramp_enter, image + (prog - (u8 *)rw_image))) { ret = -EINVAL; goto cleanup; } } if (fentry->nr_links) { if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET, image, rw_image)) return -EINVAL; } if (fmod_ret->nr_links) { branches = kcalloc(fmod_ret->nr_links, sizeof(u8 *), GFP_KERNEL); if (!branches) return -ENOMEM; if (invoke_bpf_mod_ret(m, &prog, fmod_ret, regs_off, run_ctx_off, branches, image, rw_image)) { ret = -EINVAL; goto cleanup; } } if (flags & BPF_TRAMP_F_CALL_ORIG) { restore_regs(m, &prog, regs_off); save_args(m, &prog, arg_stack_off, true); if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) { /* Before calling the original function, load the * tail_call_cnt_ptr from stack to rax. */ LOAD_TRAMP_TAIL_CALL_CNT_PTR(stack_size); } if (flags & BPF_TRAMP_F_ORIG_STACK) { emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, 8); EMIT2(0xff, 0xd3); /* call *rbx */ } else { /* call original function */ if (emit_rsb_call(&prog, orig_call, image + (prog - (u8 *)rw_image))) { ret = -EINVAL; goto cleanup; } } /* remember return value in a stack for bpf prog to access */ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); im->ip_after_call = image + (prog - (u8 *)rw_image); emit_nops(&prog, X86_PATCH_SIZE); } if (fmod_ret->nr_links) { /* From Intel 64 and IA-32 Architectures Optimization * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler * Coding Rule 11: All branch targets should be 16-byte * aligned. */ emit_align(&prog, 16); /* Update the branches saved in invoke_bpf_mod_ret with the * aligned address of do_fexit. */ for (i = 0; i < fmod_ret->nr_links; i++) { emit_cond_near_jump(&branches[i], image + (prog - (u8 *)rw_image), image + (branches[i] - (u8 *)rw_image), X86_JNE); } } if (fexit->nr_links) { if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off, false, image, rw_image)) { ret = -EINVAL; goto cleanup; } } if (flags & BPF_TRAMP_F_RESTORE_REGS) restore_regs(m, &prog, regs_off); /* This needs to be done regardless. If there were fmod_ret programs, * the return value is only updated on the stack and still needs to be * restored to R0. */ if (flags & BPF_TRAMP_F_CALL_ORIG) { im->ip_epilogue = image + (prog - (u8 *)rw_image); /* arg1: mov rdi, im */ emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im); if (emit_rsb_call(&prog, __bpf_tramp_exit, image + (prog - (u8 *)rw_image))) { ret = -EINVAL; goto cleanup; } } else if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) { /* Before running the original function, load the * tail_call_cnt_ptr from stack to rax. */ LOAD_TRAMP_TAIL_CALL_CNT_PTR(stack_size); } /* restore return value of orig_call or fentry prog back into RAX */ if (save_ret) emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, -rbx_off); EMIT1(0xC9); /* leave */ if (flags & BPF_TRAMP_F_SKIP_FRAME) { /* skip our return address and return to parent */ EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ } emit_return(&prog, image + (prog - (u8 *)rw_image)); /* Make sure the trampoline generation logic doesn't overflow */ if (WARN_ON_ONCE(prog > (u8 *)rw_image_end - BPF_INSN_SAFETY)) { ret = -EFAULT; goto cleanup; } ret = prog - (u8 *)rw_image + BPF_INSN_SAFETY; cleanup: kfree(branches); return ret; } void *arch_alloc_bpf_trampoline(unsigned int size) { return bpf_prog_pack_alloc(size, jit_fill_hole); } void arch_free_bpf_trampoline(void *image, unsigned int size) { bpf_prog_pack_free(image, size); } int arch_protect_bpf_trampoline(void *image, unsigned int size) { return 0; } int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, void *func_addr) { void *rw_image, *tmp; int ret; u32 size = image_end - image; /* rw_image doesn't need to be in module memory range, so we can * use kvmalloc. */ rw_image = kvmalloc(size, GFP_KERNEL); if (!rw_image) return -ENOMEM; ret = __arch_prepare_bpf_trampoline(im, rw_image, rw_image + size, image, m, flags, tlinks, func_addr); if (ret < 0) goto out; tmp = bpf_arch_text_copy(image, rw_image, size); if (IS_ERR(tmp)) ret = PTR_ERR(tmp); out: kvfree(rw_image); return ret; } int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, void *func_addr) { struct bpf_tramp_image im; void *image; int ret; /* Allocate a temporary buffer for __arch_prepare_bpf_trampoline(). * This will NOT cause fragmentation in direct map, as we do not * call set_memory_*() on this buffer. * * We cannot use kvmalloc here, because we need image to be in * module memory range. */ image = bpf_jit_alloc_exec(PAGE_SIZE); if (!image) return -ENOMEM; ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image, m, flags, tlinks, func_addr); bpf_jit_free_exec(image); return ret; } static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs, u8 *image, u8 *buf) { u8 *jg_reloc, *prog = *pprog; int pivot, err, jg_bytes = 1; s64 jg_offset; if (a == b) { /* Leaf node of recursion, i.e. not a range of indices * anymore. */ EMIT1(add_1mod(0x48, BPF_REG_3)); /* cmp rdx,func */ if (!is_simm32(progs[a])) return -1; EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3), progs[a]); err = emit_cond_near_jump(&prog, /* je func */ (void *)progs[a], image + (prog - buf), X86_JE); if (err) return err; emit_indirect_jump(&prog, 2 /* rdx */, image + (prog - buf)); *pprog = prog; return 0; } /* Not a leaf node, so we pivot, and recursively descend into * the lower and upper ranges. */ pivot = (b - a) / 2; EMIT1(add_1mod(0x48, BPF_REG_3)); /* cmp rdx,func */ if (!is_simm32(progs[a + pivot])) return -1; EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3), progs[a + pivot]); if (pivot > 2) { /* jg upper_part */ /* Require near jump. */ jg_bytes = 4; EMIT2_off32(0x0F, X86_JG + 0x10, 0); } else { EMIT2(X86_JG, 0); } jg_reloc = prog; err = emit_bpf_dispatcher(&prog, a, a + pivot, /* emit lower_part */ progs, image, buf); if (err) return err; /* From Intel 64 and IA-32 Architectures Optimization * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler * Coding Rule 11: All branch targets should be 16-byte * aligned. */ emit_align(&prog, 16); jg_offset = prog - jg_reloc; emit_code(jg_reloc - jg_bytes, jg_offset, jg_bytes); err = emit_bpf_dispatcher(&prog, a + pivot + 1, /* emit upper_part */ b, progs, image, buf); if (err) return err; *pprog = prog; return 0; } static int cmp_ips(const void *a, const void *b) { const s64 *ipa = a; const s64 *ipb = b; if (*ipa > *ipb) return 1; if (*ipa < *ipb) return -1; return 0; } int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs) { u8 *prog = buf; sort(funcs, num_funcs, sizeof(funcs[0]), cmp_ips, NULL); return emit_bpf_dispatcher(&prog, 0, num_funcs - 1, funcs, image, buf); } static const char *bpf_get_prog_name(struct bpf_prog *prog) { if (prog->aux->ksym.prog) return prog->aux->ksym.name; return prog->aux->name; } static void priv_stack_init_guard(void __percpu *priv_stack_ptr, int alloc_size) { int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3; u64 *stack_ptr; for_each_possible_cpu(cpu) { stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu); stack_ptr[0] = PRIV_STACK_GUARD_VAL; stack_ptr[underflow_idx] = PRIV_STACK_GUARD_VAL; } } static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size, struct bpf_prog *prog) { int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3; u64 *stack_ptr; for_each_possible_cpu(cpu) { stack_ptr = per_cpu_ptr(priv_stack_ptr, cpu); if (stack_ptr[0] != PRIV_STACK_GUARD_VAL || stack_ptr[underflow_idx] != PRIV_STACK_GUARD_VAL) { pr_err("BPF private stack overflow/underflow detected for prog %sx\n", bpf_get_prog_name(prog)); break; } } } struct x64_jit_data { struct bpf_binary_header *rw_header; struct bpf_binary_header *header; int *addrs; u8 *image; int proglen; struct jit_context ctx; }; #define MAX_PASSES 20 #define PADDING_PASSES (MAX_PASSES - 5) struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { struct bpf_binary_header *rw_header = NULL; struct bpf_binary_header *header = NULL; struct bpf_prog *tmp, *orig_prog = prog; void __percpu *priv_stack_ptr = NULL; struct x64_jit_data *jit_data; int priv_stack_alloc_sz; int proglen, oldproglen = 0; struct jit_context ctx = {}; bool tmp_blinded = false; bool extra_pass = false; bool padding = false; u8 *rw_image = NULL; u8 *image = NULL; int *addrs; int pass; int i; if (!prog->jit_requested) return orig_prog; tmp = bpf_jit_blind_constants(prog); /* * If blinding was requested and we failed during blinding, * we must fall back to the interpreter. */ if (IS_ERR(tmp)) return orig_prog; if (tmp != prog) { tmp_blinded = true; prog = tmp; } jit_data = prog->aux->jit_data; if (!jit_data) { jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); if (!jit_data) { prog = orig_prog; goto out; } prog->aux->jit_data = jit_data; } priv_stack_ptr = prog->aux->priv_stack_ptr; if (!priv_stack_ptr && prog->aux->jits_use_priv_stack) { /* Allocate actual private stack size with verifier-calculated * stack size plus two memory guards to protect overflow and * underflow. */ priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 8) + 2 * PRIV_STACK_GUARD_SZ; priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_sz, 8, GFP_KERNEL); if (!priv_stack_ptr) { prog = orig_prog; goto out_priv_stack; } priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_sz); prog->aux->priv_stack_ptr = priv_stack_ptr; } addrs = jit_data->addrs; if (addrs) { ctx = jit_data->ctx; oldproglen = jit_data->proglen; image = jit_data->image; header = jit_data->header; rw_header = jit_data->rw_header; rw_image = (void *)rw_header + ((void *)image - (void *)header); extra_pass = true; padding = true; goto skip_init_addrs; } addrs = kvmalloc_array(prog->len + 1, sizeof(*addrs), GFP_KERNEL); if (!addrs) { prog = orig_prog; goto out_addrs; } /* * Before first pass, make a rough estimation of addrs[] * each BPF instruction is translated to less than 64 bytes */ for (proglen = 0, i = 0; i <= prog->len; i++) { proglen += 64; addrs[i] = proglen; } ctx.cleanup_addr = proglen; skip_init_addrs: /* * JITed image shrinks with every pass and the loop iterates * until the image stops shrinking. Very large BPF programs * may converge on the last pass. In such case do one more * pass to emit the final image. */ for (pass = 0; pass < MAX_PASSES || image; pass++) { if (!padding && pass >= PADDING_PASSES) padding = true; proglen = do_jit(prog, addrs, image, rw_image, oldproglen, &ctx, padding); if (proglen <= 0) { out_image: image = NULL; if (header) { bpf_arch_text_copy(&header->size, &rw_header->size, sizeof(rw_header->size)); bpf_jit_binary_pack_free(header, rw_header); } /* Fall back to interpreter mode */ prog = orig_prog; if (extra_pass) { prog->bpf_func = NULL; prog->jited = 0; prog->jited_len = 0; } goto out_addrs; } if (image) { if (proglen != oldproglen) { pr_err("bpf_jit: proglen=%d != oldproglen=%d\n", proglen, oldproglen); goto out_image; } break; } if (proglen == oldproglen) { /* * The number of entries in extable is the number of BPF_LDX * insns that access kernel memory via "pointer to BTF type". * The verifier changed their opcode from LDX|MEM|size * to LDX|PROBE_MEM|size to make JITing easier. */ u32 align = __alignof__(struct exception_table_entry); u32 extable_size = prog->aux->num_exentries * sizeof(struct exception_table_entry); /* allocate module memory for x86 insns and extable */ header = bpf_jit_binary_pack_alloc(roundup(proglen, align) + extable_size, &image, align, &rw_header, &rw_image, jit_fill_hole); if (!header) { prog = orig_prog; goto out_addrs; } prog->aux->extable = (void *) image + roundup(proglen, align); } oldproglen = proglen; cond_resched(); } if (bpf_jit_enable > 1) bpf_jit_dump(prog->len, proglen, pass + 1, rw_image); if (image) { if (!prog->is_func || extra_pass) { /* * bpf_jit_binary_pack_finalize fails in two scenarios: * 1) header is not pointing to proper module memory; * 2) the arch doesn't support bpf_arch_text_copy(). * * Both cases are serious bugs and justify WARN_ON. */ if (WARN_ON(bpf_jit_binary_pack_finalize(header, rw_header))) { /* header has been freed */ header = NULL; goto out_image; } bpf_tail_call_direct_fixup(prog); } else { jit_data->addrs = addrs; jit_data->ctx = ctx; jit_data->proglen = proglen; jit_data->image = image; jit_data->header = header; jit_data->rw_header = rw_header; } /* * ctx.prog_offset is used when CFI preambles put code *before* * the function. See emit_cfi(). For FineIBT specifically this code * can also be executed and bpf_prog_kallsyms_add() will * generate an additional symbol to cover this, hence also * decrement proglen. */ prog->bpf_func = (void *)image + cfi_get_offset(); prog->jited = 1; prog->jited_len = proglen - cfi_get_offset(); } else { prog = orig_prog; } if (!image || !prog->is_func || extra_pass) { if (image) bpf_prog_fill_jited_linfo(prog, addrs + 1); out_addrs: kvfree(addrs); if (!image && priv_stack_ptr) { free_percpu(priv_stack_ptr); prog->aux->priv_stack_ptr = NULL; } out_priv_stack: kfree(jit_data); prog->aux->jit_data = NULL; } out: if (tmp_blinded) bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog); return prog; } bool bpf_jit_supports_kfunc_call(void) { return true; } void *bpf_arch_text_copy(void *dst, void *src, size_t len) { if (text_poke_copy(dst, src, len) == NULL) return ERR_PTR(-EINVAL); return dst; } /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */ bool bpf_jit_supports_subprog_tailcalls(void) { return true; } bool bpf_jit_supports_percpu_insn(void) { return true; } void bpf_jit_free(struct bpf_prog *prog) { if (prog->jited) { struct x64_jit_data *jit_data = prog->aux->jit_data; struct bpf_binary_header *hdr; void __percpu *priv_stack_ptr; int priv_stack_alloc_sz; /* * If we fail the final pass of JIT (from jit_subprogs), * the program may not be finalized yet. Call finalize here * before freeing it. */ if (jit_data) { bpf_jit_binary_pack_finalize(jit_data->header, jit_data->rw_header); kvfree(jit_data->addrs); kfree(jit_data); } prog->bpf_func = (void *)prog->bpf_func - cfi_get_offset(); hdr = bpf_jit_binary_pack_hdr(prog); bpf_jit_binary_pack_free(hdr, NULL); priv_stack_ptr = prog->aux->priv_stack_ptr; if (priv_stack_ptr) { priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 8) + 2 * PRIV_STACK_GUARD_SZ; priv_stack_check_guard(priv_stack_ptr, priv_stack_alloc_sz, prog); free_percpu(prog->aux->priv_stack_ptr); } WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog)); } bpf_prog_unlock_free(prog); } bool bpf_jit_supports_exceptions(void) { /* We unwind through both kernel frames (starting from within bpf_throw * call) and BPF frames. Therefore we require ORC unwinder to be enabled * to walk kernel frames and reach BPF frames in the stack trace. */ return IS_ENABLED(CONFIG_UNWINDER_ORC); } bool bpf_jit_supports_private_stack(void) { return true; } void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie) { #if defined(CONFIG_UNWINDER_ORC) struct unwind_state state; unsigned long addr; for (unwind_start(&state, current, NULL, NULL); !unwind_done(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); if (!addr || !consume_fn(cookie, (u64)addr, (u64)state.sp, (u64)state.bp)) break; } return; #endif WARN(1, "verification of programs using bpf_throw should have failed\n"); } void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, struct bpf_prog *new, struct bpf_prog *old) { u8 *old_addr, *new_addr, *old_bypass_addr; int ret; old_bypass_addr = old ? NULL : poke->bypass_addr; old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL; new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL; /* * On program loading or teardown, the program's kallsym entry * might not be in place, so we use __bpf_arch_text_poke to skip * the kallsyms check. */ if (new) { ret = __bpf_arch_text_poke(poke->tailcall_target, BPF_MOD_JUMP, old_addr, new_addr); BUG_ON(ret < 0); if (!old) { ret = __bpf_arch_text_poke(poke->tailcall_bypass, BPF_MOD_JUMP, poke->bypass_addr, NULL); BUG_ON(ret < 0); } } else { ret = __bpf_arch_text_poke(poke->tailcall_bypass, BPF_MOD_JUMP, old_bypass_addr, poke->bypass_addr); BUG_ON(ret < 0); /* let other CPUs finish the execution of program * so that it will not possible to expose them * to invalid nop, stack unwind, nop state */ if (!ret) synchronize_rcu(); ret = __bpf_arch_text_poke(poke->tailcall_target, BPF_MOD_JUMP, old_addr, NULL); BUG_ON(ret < 0); } } bool bpf_jit_supports_arena(void) { return true; } bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) { if (!in_arena) return true; switch (insn->code) { case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: if (insn->imm == (BPF_AND | BPF_FETCH) || insn->imm == (BPF_OR | BPF_FETCH) || insn->imm == (BPF_XOR | BPF_FETCH)) return false; } return true; } bool bpf_jit_supports_ptr_xchg(void) { return true; } /* x86-64 JIT emits its own code to filter user addresses so return 0 here */ u64 bpf_arch_uaddress_limit(void) { return 0; } bool bpf_jit_supports_timed_may_goto(void) { return true; } |
| 13 13 752 3632 7511 7515 7327 7315 1934 1924 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _MM_PERCPU_INTERNAL_H #define _MM_PERCPU_INTERNAL_H #include <linux/types.h> #include <linux/percpu.h> #include <linux/memcontrol.h> /* * pcpu_block_md is the metadata block struct. * Each chunk's bitmap is split into a number of full blocks. * All units are in terms of bits. * * The scan hint is the largest known contiguous area before the contig hint. * It is not necessarily the actual largest contig hint though. There is an * invariant that the scan_hint_start > contig_hint_start iff * scan_hint == contig_hint. This is necessary because when scanning forward, * we don't know if a new contig hint would be better than the current one. */ struct pcpu_block_md { int scan_hint; /* scan hint for block */ int scan_hint_start; /* block relative starting position of the scan hint */ int contig_hint; /* contig hint for block */ int contig_hint_start; /* block relative starting position of the contig hint */ int left_free; /* size of free space along the left side of the block */ int right_free; /* size of free space along the right side of the block */ int first_free; /* block position of first free */ int nr_bits; /* total bits responsible for */ }; struct pcpuobj_ext { #ifdef CONFIG_MEMCG struct obj_cgroup *cgroup; #endif #ifdef CONFIG_MEM_ALLOC_PROFILING union codetag_ref tag; #endif }; #if defined(CONFIG_MEMCG) || defined(CONFIG_MEM_ALLOC_PROFILING) #define NEED_PCPUOBJ_EXT #endif struct pcpu_chunk { #ifdef CONFIG_PERCPU_STATS int nr_alloc; /* # of allocations */ size_t max_alloc_size; /* largest allocation size */ #endif struct list_head list; /* linked to pcpu_slot lists */ int free_bytes; /* free bytes in the chunk */ struct pcpu_block_md chunk_md; unsigned long *bound_map; /* boundary map */ /* * base_addr is the base address of this chunk. * To reduce false sharing, current layout is optimized to make sure * base_addr locate in the different cacheline with free_bytes and * chunk_md. */ void *base_addr ____cacheline_aligned_in_smp; unsigned long *alloc_map; /* allocation map */ struct pcpu_block_md *md_blocks; /* metadata blocks */ void *data; /* chunk data */ bool immutable; /* no [de]population allowed */ bool isolated; /* isolated from active chunk slots */ int start_offset; /* the overlap with the previous region to have a page aligned base_addr */ int end_offset; /* additional area required to have the region end page aligned */ #ifdef NEED_PCPUOBJ_EXT struct pcpuobj_ext *obj_exts; /* vector of object cgroups */ #endif int nr_pages; /* # of pages served by this chunk */ int nr_populated; /* # of populated pages */ int nr_empty_pop_pages; /* # of empty populated pages */ unsigned long populated[]; /* populated bitmap */ }; static inline bool need_pcpuobj_ext(void) { if (IS_ENABLED(CONFIG_MEM_ALLOC_PROFILING)) return true; if (!mem_cgroup_kmem_disabled()) return true; return false; } extern spinlock_t pcpu_lock; extern struct list_head *pcpu_chunk_lists; extern int pcpu_nr_slots; extern int pcpu_sidelined_slot; extern int pcpu_to_depopulate_slot; extern int pcpu_nr_empty_pop_pages; extern struct pcpu_chunk *pcpu_first_chunk; extern struct pcpu_chunk *pcpu_reserved_chunk; /** * pcpu_chunk_nr_blocks - converts nr_pages to # of md_blocks * @chunk: chunk of interest * * This conversion is from the number of physical pages that the chunk * serves to the number of bitmap blocks used. */ static inline int pcpu_chunk_nr_blocks(struct pcpu_chunk *chunk) { return chunk->nr_pages * PAGE_SIZE / PCPU_BITMAP_BLOCK_SIZE; } /** * pcpu_nr_pages_to_map_bits - converts the pages to size of bitmap * @pages: number of physical pages * * This conversion is from physical pages to the number of bits * required in the bitmap. */ static inline int pcpu_nr_pages_to_map_bits(int pages) { return pages * PAGE_SIZE / PCPU_MIN_ALLOC_SIZE; } /** * pcpu_chunk_map_bits - helper to convert nr_pages to size of bitmap * @chunk: chunk of interest * * This conversion is from the number of physical pages that the chunk * serves to the number of bits in the bitmap. */ static inline int pcpu_chunk_map_bits(struct pcpu_chunk *chunk) { return pcpu_nr_pages_to_map_bits(chunk->nr_pages); } /** * pcpu_obj_full_size - helper to calculate size of each accounted object * @size: size of area to allocate in bytes * * For each accounted object there is an extra space which is used to store * obj_cgroup membership if kmemcg is not disabled. Charge it too. */ static inline size_t pcpu_obj_full_size(size_t size) { size_t extra_size = 0; #ifdef CONFIG_MEMCG if (!mem_cgroup_kmem_disabled()) extra_size += size / PCPU_MIN_ALLOC_SIZE * sizeof(struct obj_cgroup *); #endif return size * num_possible_cpus() + extra_size; } #ifdef CONFIG_PERCPU_STATS #include <linux/spinlock.h> struct percpu_stats { u64 nr_alloc; /* lifetime # of allocations */ u64 nr_dealloc; /* lifetime # of deallocations */ u64 nr_cur_alloc; /* current # of allocations */ u64 nr_max_alloc; /* max # of live allocations */ u32 nr_chunks; /* current # of live chunks */ u32 nr_max_chunks; /* max # of live chunks */ size_t min_alloc_size; /* min allocation size */ size_t max_alloc_size; /* max allocation size */ }; extern struct percpu_stats pcpu_stats; extern struct pcpu_alloc_info pcpu_stats_ai; /* * For debug purposes. We don't care about the flexible array. */ static inline void pcpu_stats_save_ai(const struct pcpu_alloc_info *ai) { memcpy(&pcpu_stats_ai, ai, sizeof(struct pcpu_alloc_info)); /* initialize min_alloc_size to unit_size */ pcpu_stats.min_alloc_size = pcpu_stats_ai.unit_size; } /* * pcpu_stats_area_alloc - increment area allocation stats * @chunk: the location of the area being allocated * @size: size of area to allocate in bytes * * CONTEXT: * pcpu_lock. */ static inline void pcpu_stats_area_alloc(struct pcpu_chunk *chunk, size_t size) { lockdep_assert_held(&pcpu_lock); pcpu_stats.nr_alloc++; pcpu_stats.nr_cur_alloc++; pcpu_stats.nr_max_alloc = max(pcpu_stats.nr_max_alloc, pcpu_stats.nr_cur_alloc); pcpu_stats.min_alloc_size = min(pcpu_stats.min_alloc_size, size); pcpu_stats.max_alloc_size = max(pcpu_stats.max_alloc_size, size); chunk->nr_alloc++; chunk->max_alloc_size = max(chunk->max_alloc_size, size); } /* * pcpu_stats_area_dealloc - decrement allocation stats * @chunk: the location of the area being deallocated * * CONTEXT: * pcpu_lock. */ static inline void pcpu_stats_area_dealloc(struct pcpu_chunk *chunk) { lockdep_assert_held(&pcpu_lock); pcpu_stats.nr_dealloc++; pcpu_stats.nr_cur_alloc--; chunk->nr_alloc--; } /* * pcpu_stats_chunk_alloc - increment chunk stats */ static inline void pcpu_stats_chunk_alloc(void) { unsigned long flags; spin_lock_irqsave(&pcpu_lock, flags); pcpu_stats.nr_chunks++; pcpu_stats.nr_max_chunks = max(pcpu_stats.nr_max_chunks, pcpu_stats.nr_chunks); spin_unlock_irqrestore(&pcpu_lock, flags); } /* * pcpu_stats_chunk_dealloc - decrement chunk stats */ static inline void pcpu_stats_chunk_dealloc(void) { unsigned long flags; spin_lock_irqsave(&pcpu_lock, flags); pcpu_stats.nr_chunks--; spin_unlock_irqrestore(&pcpu_lock, flags); } #else static inline void pcpu_stats_save_ai(const struct pcpu_alloc_info *ai) { } static inline void pcpu_stats_area_alloc(struct pcpu_chunk *chunk, size_t size) { } static inline void pcpu_stats_area_dealloc(struct pcpu_chunk *chunk) { } static inline void pcpu_stats_chunk_alloc(void) { } static inline void pcpu_stats_chunk_dealloc(void) { } #endif /* !CONFIG_PERCPU_STATS */ #endif |
| 1 1 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 | // SPDX-License-Identifier: GPL-2.0-or-later /* cx231xx-core.c - driver for Conexant Cx23100/101/102 USB video capture devices Copyright (C) 2008 <srinivasa.deevi at conexant dot com> Based on em28xx driver */ #include "cx231xx.h" #include <linux/init.h> #include <linux/list.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <media/v4l2-common.h> #include <media/tuner.h> #include "cx231xx-reg.h" /* #define ENABLE_DEBUG_ISOC_FRAMES */ static unsigned int core_debug; module_param(core_debug, int, 0644); MODULE_PARM_DESC(core_debug, "enable debug messages [core]"); #define cx231xx_coredbg(fmt, arg...) do {\ if (core_debug) \ printk(KERN_INFO "%s %s :"fmt, \ dev->name, __func__ , ##arg); } while (0) static unsigned int reg_debug; module_param(reg_debug, int, 0644); MODULE_PARM_DESC(reg_debug, "enable debug messages [URB reg]"); static int alt = CX231XX_PINOUT; module_param(alt, int, 0644); MODULE_PARM_DESC(alt, "alternate setting to use for video endpoint"); #define cx231xx_isocdbg(fmt, arg...) do {\ if (core_debug) \ printk(KERN_INFO "%s %s :"fmt, \ dev->name, __func__ , ##arg); } while (0) /***************************************************************** * Device control list functions * ******************************************************************/ LIST_HEAD(cx231xx_devlist); static DEFINE_MUTEX(cx231xx_devlist_mutex); /* * cx231xx_realease_resources() * unregisters the v4l2,i2c and usb devices * called when the device gets disconnected or at module unload */ void cx231xx_remove_from_devlist(struct cx231xx *dev) { if (dev == NULL) return; if (dev->udev == NULL) return; if (atomic_read(&dev->devlist_count) > 0) { mutex_lock(&cx231xx_devlist_mutex); list_del(&dev->devlist); atomic_dec(&dev->devlist_count); mutex_unlock(&cx231xx_devlist_mutex); } }; void cx231xx_add_into_devlist(struct cx231xx *dev) { mutex_lock(&cx231xx_devlist_mutex); list_add_tail(&dev->devlist, &cx231xx_devlist); atomic_inc(&dev->devlist_count); mutex_unlock(&cx231xx_devlist_mutex); }; static LIST_HEAD(cx231xx_extension_devlist); int cx231xx_register_extension(struct cx231xx_ops *ops) { struct cx231xx *dev = NULL; mutex_lock(&cx231xx_devlist_mutex); list_add_tail(&ops->next, &cx231xx_extension_devlist); list_for_each_entry(dev, &cx231xx_devlist, devlist) { ops->init(dev); dev_info(dev->dev, "%s initialized\n", ops->name); } mutex_unlock(&cx231xx_devlist_mutex); return 0; } EXPORT_SYMBOL(cx231xx_register_extension); void cx231xx_unregister_extension(struct cx231xx_ops *ops) { struct cx231xx *dev = NULL; mutex_lock(&cx231xx_devlist_mutex); list_for_each_entry(dev, &cx231xx_devlist, devlist) { ops->fini(dev); dev_info(dev->dev, "%s removed\n", ops->name); } list_del(&ops->next); mutex_unlock(&cx231xx_devlist_mutex); } EXPORT_SYMBOL(cx231xx_unregister_extension); void cx231xx_init_extension(struct cx231xx *dev) { struct cx231xx_ops *ops = NULL; mutex_lock(&cx231xx_devlist_mutex); list_for_each_entry(ops, &cx231xx_extension_devlist, next) { if (ops->init) ops->init(dev); } mutex_unlock(&cx231xx_devlist_mutex); } void cx231xx_close_extension(struct cx231xx *dev) { struct cx231xx_ops *ops = NULL; mutex_lock(&cx231xx_devlist_mutex); list_for_each_entry(ops, &cx231xx_extension_devlist, next) { if (ops->fini) ops->fini(dev); } mutex_unlock(&cx231xx_devlist_mutex); } /**************************************************************** * U S B related functions * *****************************************************************/ int cx231xx_send_usb_command(struct cx231xx_i2c *i2c_bus, struct cx231xx_i2c_xfer_data *req_data) { int status = 0; struct cx231xx *dev = i2c_bus->dev; struct VENDOR_REQUEST_IN ven_req; u8 saddr_len = 0; u8 _i2c_period = 0; u8 _i2c_nostop = 0; u8 _i2c_reserve = 0; if (dev->state & DEV_DISCONNECTED) return -ENODEV; /* Get the I2C period, nostop and reserve parameters */ _i2c_period = i2c_bus->i2c_period; _i2c_nostop = i2c_bus->i2c_nostop; _i2c_reserve = i2c_bus->i2c_reserve; saddr_len = req_data->saddr_len; /* Set wValue */ ven_req.wValue = (req_data->dev_addr << 9 | _i2c_period << 4 | saddr_len << 2 | _i2c_nostop << 1 | I2C_SYNC | _i2c_reserve << 6); /* set channel number */ if (req_data->direction & I2C_M_RD) { /* channel number, for read,spec required channel_num +4 */ ven_req.bRequest = i2c_bus->nr + 4; } else ven_req.bRequest = i2c_bus->nr; /* channel number, */ /* set index value */ switch (saddr_len) { case 0: ven_req.wIndex = 0; /* need check */ break; case 1: ven_req.wIndex = (req_data->saddr_dat & 0xff); break; case 2: ven_req.wIndex = req_data->saddr_dat; break; } /* set wLength value */ ven_req.wLength = req_data->buf_size; /* set bData value */ ven_req.bData = 0; /* set the direction */ if (req_data->direction) { ven_req.direction = USB_DIR_IN; memset(req_data->p_buffer, 0x00, ven_req.wLength); } else ven_req.direction = USB_DIR_OUT; /* set the buffer for read / write */ ven_req.pBuff = req_data->p_buffer; /* call common vendor command request */ status = cx231xx_send_vendor_cmd(dev, &ven_req); if (status < 0 && !dev->i2c_scan_running) { dev_err(dev->dev, "%s: failed with status -%d\n", __func__, status); } return status; } EXPORT_SYMBOL_GPL(cx231xx_send_usb_command); /* * Sends/Receives URB control messages, assuring to use a kalloced buffer * for all operations (dev->urb_buf), to avoid using stacked buffers, as * they aren't safe for usage with USB, due to DMA restrictions. * Also implements the debug code for control URB's. */ static int __usb_control_msg(struct cx231xx *dev, unsigned int pipe, __u8 request, __u8 requesttype, __u16 value, __u16 index, void *data, __u16 size, int timeout) { int rc, i; if (reg_debug) { printk(KERN_DEBUG "%s: (pipe 0x%08x): %s: %02x %02x %02x %02x %02x %02x %02x %02x ", dev->name, pipe, (requesttype & USB_DIR_IN) ? "IN" : "OUT", requesttype, request, value & 0xff, value >> 8, index & 0xff, index >> 8, size & 0xff, size >> 8); if (!(requesttype & USB_DIR_IN)) { printk(KERN_CONT ">>>"); for (i = 0; i < size; i++) printk(KERN_CONT " %02x", ((unsigned char *)data)[i]); } } /* Do the real call to usb_control_msg */ mutex_lock(&dev->ctrl_urb_lock); if (!(requesttype & USB_DIR_IN) && size) memcpy(dev->urb_buf, data, size); rc = usb_control_msg(dev->udev, pipe, request, requesttype, value, index, dev->urb_buf, size, timeout); if ((requesttype & USB_DIR_IN) && size) memcpy(data, dev->urb_buf, size); mutex_unlock(&dev->ctrl_urb_lock); if (reg_debug) { if (unlikely(rc < 0)) { printk(KERN_CONT "FAILED!\n"); return rc; } if ((requesttype & USB_DIR_IN)) { printk(KERN_CONT "<<<"); for (i = 0; i < size; i++) printk(KERN_CONT " %02x", ((unsigned char *)data)[i]); } printk(KERN_CONT "\n"); } return rc; } /* * cx231xx_read_ctrl_reg() * reads data from the usb device specifying bRequest and wValue */ int cx231xx_read_ctrl_reg(struct cx231xx *dev, u8 req, u16 reg, char *buf, int len) { u8 val = 0; int ret; int pipe = usb_rcvctrlpipe(dev->udev, 0); if (dev->state & DEV_DISCONNECTED) return -ENODEV; if (len > URB_MAX_CTRL_SIZE) return -EINVAL; switch (len) { case 1: val = ENABLE_ONE_BYTE; break; case 2: val = ENABLE_TWE_BYTE; break; case 3: val = ENABLE_THREE_BYTE; break; case 4: val = ENABLE_FOUR_BYTE; break; default: val = 0xFF; /* invalid option */ } if (val == 0xFF) return -EINVAL; ret = __usb_control_msg(dev, pipe, req, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, val, reg, buf, len, HZ); return ret; } int cx231xx_send_vendor_cmd(struct cx231xx *dev, struct VENDOR_REQUEST_IN *ven_req) { int ret; int pipe = 0; int unsend_size = 0; u8 *pdata; if (dev->state & DEV_DISCONNECTED) return -ENODEV; if ((ven_req->wLength > URB_MAX_CTRL_SIZE)) return -EINVAL; if (ven_req->direction) pipe = usb_rcvctrlpipe(dev->udev, 0); else pipe = usb_sndctrlpipe(dev->udev, 0); /* * If the cx23102 read more than 4 bytes with i2c bus, * need chop to 4 byte per request */ if ((ven_req->wLength > 4) && ((ven_req->bRequest == 0x4) || (ven_req->bRequest == 0x5) || (ven_req->bRequest == 0x6) || /* Internal Master 3 Bus can send * and receive only 4 bytes per time */ (ven_req->bRequest == 0x2))) { unsend_size = 0; pdata = ven_req->pBuff; unsend_size = ven_req->wLength; /* the first package */ ven_req->wValue = ven_req->wValue & 0xFFFB; ven_req->wValue = (ven_req->wValue & 0xFFBD) | 0x2; ret = __usb_control_msg(dev, pipe, ven_req->bRequest, ven_req->direction | USB_TYPE_VENDOR | USB_RECIP_DEVICE, ven_req->wValue, ven_req->wIndex, pdata, 0x0004, HZ); unsend_size = unsend_size - 4; /* the middle package */ ven_req->wValue = (ven_req->wValue & 0xFFBD) | 0x42; while (unsend_size - 4 > 0) { pdata = pdata + 4; ret = __usb_control_msg(dev, pipe, ven_req->bRequest, ven_req->direction | USB_TYPE_VENDOR | USB_RECIP_DEVICE, ven_req->wValue, ven_req->wIndex, pdata, 0x0004, HZ); unsend_size = unsend_size - 4; } /* the last package */ ven_req->wValue = (ven_req->wValue & 0xFFBD) | 0x40; pdata = pdata + 4; ret = __usb_control_msg(dev, pipe, ven_req->bRequest, ven_req->direction | USB_TYPE_VENDOR | USB_RECIP_DEVICE, ven_req->wValue, ven_req->wIndex, pdata, unsend_size, HZ); } else { ret = __usb_control_msg(dev, pipe, ven_req->bRequest, ven_req->direction | USB_TYPE_VENDOR | USB_RECIP_DEVICE, ven_req->wValue, ven_req->wIndex, ven_req->pBuff, ven_req->wLength, HZ); } return ret; } /* * cx231xx_write_ctrl_reg() * sends data to the usb device, specifying bRequest */ int cx231xx_write_ctrl_reg(struct cx231xx *dev, u8 req, u16 reg, char *buf, int len) { u8 val = 0; int ret; int pipe = usb_sndctrlpipe(dev->udev, 0); if (dev->state & DEV_DISCONNECTED) return -ENODEV; if ((len < 1) || (len > URB_MAX_CTRL_SIZE)) return -EINVAL; switch (len) { case 1: val = ENABLE_ONE_BYTE; break; case 2: val = ENABLE_TWE_BYTE; break; case 3: val = ENABLE_THREE_BYTE; break; case 4: val = ENABLE_FOUR_BYTE; break; default: val = 0xFF; /* invalid option */ } if (val == 0xFF) return -EINVAL; if (reg_debug) { int byte; cx231xx_isocdbg("(pipe 0x%08x): OUT: %02x %02x %02x %02x %02x %02x %02x %02x >>>", pipe, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, req, 0, val, reg & 0xff, reg >> 8, len & 0xff, len >> 8); for (byte = 0; byte < len; byte++) cx231xx_isocdbg(" %02x", (unsigned char)buf[byte]); cx231xx_isocdbg("\n"); } ret = __usb_control_msg(dev, pipe, req, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, val, reg, buf, len, HZ); return ret; } /**************************************************************** * USB Alternate Setting functions * *****************************************************************/ int cx231xx_set_video_alternate(struct cx231xx *dev) { int errCode, prev_alt = dev->video_mode.alt; unsigned int min_pkt_size = dev->width * 2 + 4; u32 usb_interface_index = 0; /* When image size is bigger than a certain value, the frame size should be increased, otherwise, only green screen will be received. */ if (dev->width * 2 * dev->height > 720 * 240 * 2) min_pkt_size *= 2; if (dev->width > 360) { /* resolutions: 720,704,640 */ dev->video_mode.alt = 3; } else if (dev->width > 180) { /* resolutions: 360,352,320,240 */ dev->video_mode.alt = 2; } else if (dev->width > 0) { /* resolutions: 180,176,160,128,88 */ dev->video_mode.alt = 1; } else { /* Change to alt0 BULK to release USB bandwidth */ dev->video_mode.alt = 0; } if (dev->USE_ISO == 0) dev->video_mode.alt = 0; cx231xx_coredbg("dev->video_mode.alt= %d\n", dev->video_mode.alt); /* Get the correct video interface Index */ usb_interface_index = dev->current_pcb_config.hs_config_info[0].interface_info. video_index + 1; if (dev->video_mode.alt != prev_alt) { cx231xx_coredbg("minimum isoc packet size: %u (alt=%d)\n", min_pkt_size, dev->video_mode.alt); if (dev->video_mode.alt_max_pkt_size != NULL) dev->video_mode.max_pkt_size = dev->video_mode.alt_max_pkt_size[dev->video_mode.alt]; cx231xx_coredbg("setting alternate %d with wMaxPacketSize=%u\n", dev->video_mode.alt, dev->video_mode.max_pkt_size); errCode = usb_set_interface(dev->udev, usb_interface_index, dev->video_mode.alt); if (errCode < 0) { dev_err(dev->dev, "cannot change alt number to %d (error=%i)\n", dev->video_mode.alt, errCode); return errCode; } } return 0; } int cx231xx_set_alt_setting(struct cx231xx *dev, u8 index, u8 alt) { int status = 0; u32 usb_interface_index = 0; u32 max_pkt_size = 0; switch (index) { case INDEX_TS1: usb_interface_index = dev->current_pcb_config.hs_config_info[0].interface_info. ts1_index + 1; dev->ts1_mode.alt = alt; if (dev->ts1_mode.alt_max_pkt_size != NULL) max_pkt_size = dev->ts1_mode.max_pkt_size = dev->ts1_mode.alt_max_pkt_size[dev->ts1_mode.alt]; break; case INDEX_TS2: usb_interface_index = dev->current_pcb_config.hs_config_info[0].interface_info. ts2_index + 1; break; case INDEX_AUDIO: usb_interface_index = dev->current_pcb_config.hs_config_info[0].interface_info. audio_index + 1; dev->adev.alt = alt; if (dev->adev.alt_max_pkt_size != NULL) max_pkt_size = dev->adev.max_pkt_size = dev->adev.alt_max_pkt_size[dev->adev.alt]; break; case INDEX_VIDEO: usb_interface_index = dev->current_pcb_config.hs_config_info[0].interface_info. video_index + 1; dev->video_mode.alt = alt; if (dev->video_mode.alt_max_pkt_size != NULL) max_pkt_size = dev->video_mode.max_pkt_size = dev->video_mode.alt_max_pkt_size[dev->video_mode. alt]; break; case INDEX_VANC: if (dev->board.no_alt_vanc) return 0; usb_interface_index = dev->current_pcb_config.hs_config_info[0].interface_info. vanc_index + 1; dev->vbi_mode.alt = alt; if (dev->vbi_mode.alt_max_pkt_size != NULL) max_pkt_size = dev->vbi_mode.max_pkt_size = dev->vbi_mode.alt_max_pkt_size[dev->vbi_mode.alt]; break; case INDEX_HANC: usb_interface_index = dev->current_pcb_config.hs_config_info[0].interface_info. hanc_index + 1; dev->sliced_cc_mode.alt = alt; if (dev->sliced_cc_mode.alt_max_pkt_size != NULL) max_pkt_size = dev->sliced_cc_mode.max_pkt_size = dev->sliced_cc_mode.alt_max_pkt_size[dev-> sliced_cc_mode. alt]; break; default: break; } if (alt > 0 && max_pkt_size == 0) { dev_err(dev->dev, "can't change interface %d alt no. to %d: Max. Pkt size = 0\n", usb_interface_index, alt); /*To workaround error number=-71 on EP0 for videograbber, need add following codes.*/ if (dev->board.no_alt_vanc) return -1; } cx231xx_coredbg("setting alternate %d with wMaxPacketSize=%u,Interface = %d\n", alt, max_pkt_size, usb_interface_index); if (usb_interface_index > 0) { status = usb_set_interface(dev->udev, usb_interface_index, alt); if (status < 0) { dev_err(dev->dev, "can't change interface %d alt no. to %d (err=%i)\n", usb_interface_index, alt, status); return status; } } return status; } EXPORT_SYMBOL_GPL(cx231xx_set_alt_setting); int cx231xx_gpio_set(struct cx231xx *dev, struct cx231xx_reg_seq *gpio) { int rc = 0; if (!gpio) return rc; /* Send GPIO reset sequences specified at board entry */ while (gpio->sleep >= 0) { rc = cx231xx_set_gpio_value(dev, gpio->bit, gpio->val); if (rc < 0) return rc; if (gpio->sleep > 0) msleep(gpio->sleep); gpio++; } return rc; } int cx231xx_demod_reset(struct cx231xx *dev) { u8 status = 0; u8 value[4] = { 0, 0, 0, 0 }; status = cx231xx_read_ctrl_reg(dev, VRT_GET_REGISTER, PWR_CTL_EN, value, 4); cx231xx_coredbg("reg0x%x=0x%x 0x%x 0x%x 0x%x\n", PWR_CTL_EN, value[0], value[1], value[2], value[3]); cx231xx_coredbg("Enter cx231xx_demod_reset()\n"); value[1] = (u8) 0x3; status = cx231xx_write_ctrl_reg(dev, VRT_SET_REGISTER, PWR_CTL_EN, value, 4); msleep(10); value[1] = (u8) 0x0; status = cx231xx_write_ctrl_reg(dev, VRT_SET_REGISTER, PWR_CTL_EN, value, 4); msleep(10); value[1] = (u8) 0x3; status = cx231xx_write_ctrl_reg(dev, VRT_SET_REGISTER, PWR_CTL_EN, value, 4); msleep(10); status = cx231xx_read_ctrl_reg(dev, VRT_GET_REGISTER, PWR_CTL_EN, value, 4); cx231xx_coredbg("reg0x%x=0x%x 0x%x 0x%x 0x%x\n", PWR_CTL_EN, value[0], value[1], value[2], value[3]); return status; } EXPORT_SYMBOL_GPL(cx231xx_demod_reset); int is_fw_load(struct cx231xx *dev) { return cx231xx_check_fw(dev); } EXPORT_SYMBOL_GPL(is_fw_load); int cx231xx_set_mode(struct cx231xx *dev, enum cx231xx_mode set_mode) { int errCode = 0; if (dev->mode == set_mode) return 0; if (set_mode == CX231XX_SUSPEND) { /* Set the chip in power saving mode */ dev->mode = set_mode; } /* Resource is locked */ if (dev->mode != CX231XX_SUSPEND) return -EINVAL; dev->mode = set_mode; if (dev->mode == CX231XX_DIGITAL_MODE)/* Set Digital power mode */ { /* set AGC mode to Digital */ switch (dev->model) { case CX231XX_BOARD_CNXT_CARRAERA: case CX231XX_BOARD_CNXT_RDE_250: case CX231XX_BOARD_CNXT_SHELBY: case CX231XX_BOARD_CNXT_RDU_250: errCode = cx231xx_set_agc_analog_digital_mux_select(dev, 0); break; case CX231XX_BOARD_CNXT_RDE_253S: case CX231XX_BOARD_CNXT_RDU_253S: case CX231XX_BOARD_PV_PLAYTV_USB_HYBRID: errCode = cx231xx_set_agc_analog_digital_mux_select(dev, 1); break; case CX231XX_BOARD_HAUPPAUGE_EXETER: case CX231XX_BOARD_HAUPPAUGE_930C_HD_1113xx: errCode = cx231xx_set_power_mode(dev, POLARIS_AVMODE_DIGITAL); break; default: break; } } else/* Set Analog Power mode */ { /* set AGC mode to Analog */ switch (dev->model) { case CX231XX_BOARD_CNXT_CARRAERA: case CX231XX_BOARD_CNXT_RDE_250: case CX231XX_BOARD_CNXT_SHELBY: case CX231XX_BOARD_CNXT_RDU_250: errCode = cx231xx_set_agc_analog_digital_mux_select(dev, 1); break; case CX231XX_BOARD_CNXT_RDE_253S: case CX231XX_BOARD_CNXT_RDU_253S: case CX231XX_BOARD_HAUPPAUGE_EXETER: case CX231XX_BOARD_HAUPPAUGE_930C_HD_1113xx: case CX231XX_BOARD_PV_PLAYTV_USB_HYBRID: case CX231XX_BOARD_HAUPPAUGE_USB2_FM_PAL: case CX231XX_BOARD_HAUPPAUGE_USB2_FM_NTSC: errCode = cx231xx_set_agc_analog_digital_mux_select(dev, 0); break; default: break; } } if (errCode < 0) { dev_err(dev->dev, "Failed to set devmode to %s: error: %i", dev->mode == CX231XX_DIGITAL_MODE ? "digital" : "analog", errCode); return errCode; } return 0; } EXPORT_SYMBOL_GPL(cx231xx_set_mode); int cx231xx_ep5_bulkout(struct cx231xx *dev, u8 *firmware, u16 size) { int errCode = 0; int actlen = -1; int ret = -ENOMEM; u32 *buffer; buffer = kmemdup(firmware, EP5_BUF_SIZE, GFP_KERNEL); if (buffer == NULL) return -ENOMEM; ret = usb_bulk_msg(dev->udev, usb_sndbulkpipe(dev->udev, 5), buffer, EP5_BUF_SIZE, &actlen, EP5_TIMEOUT_MS); if (ret) dev_err(dev->dev, "bulk message failed: %d (%d/%d)", ret, size, actlen); else { errCode = actlen != size ? -1 : 0; } kfree(buffer); return errCode; } /***************************************************************** * URB Streaming functions * ******************************************************************/ /* * IRQ callback, called by URB callback */ static void cx231xx_isoc_irq_callback(struct urb *urb) { struct cx231xx_dmaqueue *dma_q = urb->context; struct cx231xx_video_mode *vmode = container_of(dma_q, struct cx231xx_video_mode, vidq); struct cx231xx *dev = container_of(vmode, struct cx231xx, video_mode); unsigned long flags; int i; switch (urb->status) { case 0: /* success */ case -ETIMEDOUT: /* NAK */ break; case -ECONNRESET: /* kill */ case -ENOENT: case -ESHUTDOWN: return; default: /* error */ cx231xx_isocdbg("urb completion error %d.\n", urb->status); break; } /* Copy data from URB */ spin_lock_irqsave(&dev->video_mode.slock, flags); dev->video_mode.isoc_ctl.isoc_copy(dev, urb); spin_unlock_irqrestore(&dev->video_mode.slock, flags); /* Reset urb buffers */ for (i = 0; i < urb->number_of_packets; i++) { urb->iso_frame_desc[i].status = 0; urb->iso_frame_desc[i].actual_length = 0; } urb->status = usb_submit_urb(urb, GFP_ATOMIC); if (urb->status) { cx231xx_isocdbg("urb resubmit failed (error=%i)\n", urb->status); } } /***************************************************************** * URB Streaming functions * ******************************************************************/ /* * IRQ callback, called by URB callback */ static void cx231xx_bulk_irq_callback(struct urb *urb) { struct cx231xx_dmaqueue *dma_q = urb->context; struct cx231xx_video_mode *vmode = container_of(dma_q, struct cx231xx_video_mode, vidq); struct cx231xx *dev = container_of(vmode, struct cx231xx, video_mode); unsigned long flags; switch (urb->status) { case 0: /* success */ case -ETIMEDOUT: /* NAK */ break; case -ECONNRESET: /* kill */ case -ENOENT: case -ESHUTDOWN: return; case -EPIPE: /* stall */ cx231xx_isocdbg("urb completion error - device is stalled.\n"); return; default: /* error */ cx231xx_isocdbg("urb completion error %d.\n", urb->status); break; } /* Copy data from URB */ spin_lock_irqsave(&dev->video_mode.slock, flags); dev->video_mode.bulk_ctl.bulk_copy(dev, urb); spin_unlock_irqrestore(&dev->video_mode.slock, flags); /* Reset urb buffers */ urb->status = usb_submit_urb(urb, GFP_ATOMIC); if (urb->status) { cx231xx_isocdbg("urb resubmit failed (error=%i)\n", urb->status); } } /* * Stop and Deallocate URBs */ void cx231xx_uninit_isoc(struct cx231xx *dev) { struct cx231xx_dmaqueue *dma_q = &dev->video_mode.vidq; struct urb *urb; int i; bool broken_pipe = false; cx231xx_isocdbg("cx231xx: called cx231xx_uninit_isoc\n"); dev->video_mode.isoc_ctl.nfields = -1; for (i = 0; i < dev->video_mode.isoc_ctl.num_bufs; i++) { urb = dev->video_mode.isoc_ctl.urb[i]; if (urb) { if (!irqs_disabled()) usb_kill_urb(urb); else usb_unlink_urb(urb); if (dev->video_mode.isoc_ctl.transfer_buffer[i]) { usb_free_coherent(dev->udev, urb->transfer_buffer_length, dev->video_mode.isoc_ctl. transfer_buffer[i], urb->transfer_dma); } if (urb->status == -EPIPE) { broken_pipe = true; } usb_free_urb(urb); dev->video_mode.isoc_ctl.urb[i] = NULL; } dev->video_mode.isoc_ctl.transfer_buffer[i] = NULL; } if (broken_pipe) { cx231xx_isocdbg("Reset endpoint to recover broken pipe."); usb_reset_endpoint(dev->udev, dev->video_mode.end_point_addr); } kfree(dev->video_mode.isoc_ctl.urb); kfree(dev->video_mode.isoc_ctl.transfer_buffer); kfree(dma_q->p_left_data); dev->video_mode.isoc_ctl.urb = NULL; dev->video_mode.isoc_ctl.transfer_buffer = NULL; dev->video_mode.isoc_ctl.num_bufs = 0; dma_q->p_left_data = NULL; if (dev->mode_tv == 0) cx231xx_capture_start(dev, 0, Raw_Video); else cx231xx_capture_start(dev, 0, TS1_serial_mode); } EXPORT_SYMBOL_GPL(cx231xx_uninit_isoc); /* * Stop and Deallocate URBs */ void cx231xx_uninit_bulk(struct cx231xx *dev) { struct cx231xx_dmaqueue *dma_q = &dev->video_mode.vidq; struct urb *urb; int i; bool broken_pipe = false; cx231xx_isocdbg("cx231xx: called cx231xx_uninit_bulk\n"); dev->video_mode.bulk_ctl.nfields = -1; for (i = 0; i < dev->video_mode.bulk_ctl.num_bufs; i++) { urb = dev->video_mode.bulk_ctl.urb[i]; if (urb) { if (!irqs_disabled()) usb_kill_urb(urb); else usb_unlink_urb(urb); if (dev->video_mode.bulk_ctl.transfer_buffer[i]) { usb_free_coherent(dev->udev, urb->transfer_buffer_length, dev->video_mode.bulk_ctl. transfer_buffer[i], urb->transfer_dma); } if (urb->status == -EPIPE) { broken_pipe = true; } usb_free_urb(urb); dev->video_mode.bulk_ctl.urb[i] = NULL; } dev->video_mode.bulk_ctl.transfer_buffer[i] = NULL; } if (broken_pipe) { cx231xx_isocdbg("Reset endpoint to recover broken pipe."); usb_reset_endpoint(dev->udev, dev->video_mode.end_point_addr); } kfree(dev->video_mode.bulk_ctl.urb); kfree(dev->video_mode.bulk_ctl.transfer_buffer); kfree(dma_q->p_left_data); dev->video_mode.bulk_ctl.urb = NULL; dev->video_mode.bulk_ctl.transfer_buffer = NULL; dev->video_mode.bulk_ctl.num_bufs = 0; dma_q->p_left_data = NULL; if (dev->mode_tv == 0) cx231xx_capture_start(dev, 0, Raw_Video); else cx231xx_capture_start(dev, 0, TS1_serial_mode); } EXPORT_SYMBOL_GPL(cx231xx_uninit_bulk); /* * Allocate URBs and start IRQ */ int cx231xx_init_isoc(struct cx231xx *dev, int max_packets, int num_bufs, int max_pkt_size, int (*isoc_copy) (struct cx231xx *dev, struct urb *urb)) { struct cx231xx_dmaqueue *dma_q = &dev->video_mode.vidq; int i; int sb_size, pipe; struct urb *urb; int j, k; int rc; /* De-allocates all pending stuff */ cx231xx_uninit_isoc(dev); dma_q->p_left_data = kzalloc(EP5_BUF_SIZE, GFP_KERNEL); if (dma_q->p_left_data == NULL) return -ENOMEM; dev->video_mode.isoc_ctl.isoc_copy = isoc_copy; dev->video_mode.isoc_ctl.num_bufs = num_bufs; dma_q->pos = 0; dma_q->is_partial_line = 0; dma_q->last_sav = 0; dma_q->current_field = -1; dma_q->field1_done = 0; dma_q->lines_per_field = dev->height / 2; dma_q->bytes_left_in_line = dev->width << 1; dma_q->lines_completed = 0; dma_q->mpeg_buffer_done = 0; dma_q->left_data_count = 0; dma_q->mpeg_buffer_completed = 0; dma_q->add_ps_package_head = CX231XX_NEED_ADD_PS_PACKAGE_HEAD; dma_q->ps_head[0] = 0x00; dma_q->ps_head[1] = 0x00; dma_q->ps_head[2] = 0x01; dma_q->ps_head[3] = 0xBA; for (i = 0; i < 8; i++) dma_q->partial_buf[i] = 0; dev->video_mode.isoc_ctl.urb = kcalloc(num_bufs, sizeof(void *), GFP_KERNEL); if (!dev->video_mode.isoc_ctl.urb) { dev_err(dev->dev, "cannot alloc memory for usb buffers\n"); kfree(dma_q->p_left_data); return -ENOMEM; } dev->video_mode.isoc_ctl.transfer_buffer = kcalloc(num_bufs, sizeof(void *), GFP_KERNEL); if (!dev->video_mode.isoc_ctl.transfer_buffer) { dev_err(dev->dev, "cannot allocate memory for usbtransfer\n"); kfree(dev->video_mode.isoc_ctl.urb); kfree(dma_q->p_left_data); return -ENOMEM; } dev->video_mode.isoc_ctl.max_pkt_size = max_pkt_size; dev->video_mode.isoc_ctl.buf = NULL; sb_size = max_packets * dev->video_mode.isoc_ctl.max_pkt_size; if (dev->mode_tv == 1) dev->video_mode.end_point_addr = 0x81; else dev->video_mode.end_point_addr = 0x84; /* allocate urbs and transfer buffers */ for (i = 0; i < dev->video_mode.isoc_ctl.num_bufs; i++) { urb = usb_alloc_urb(max_packets, GFP_KERNEL); if (!urb) { cx231xx_uninit_isoc(dev); return -ENOMEM; } dev->video_mode.isoc_ctl.urb[i] = urb; dev->video_mode.isoc_ctl.transfer_buffer[i] = usb_alloc_coherent(dev->udev, sb_size, GFP_KERNEL, &urb->transfer_dma); if (!dev->video_mode.isoc_ctl.transfer_buffer[i]) { dev_err(dev->dev, "unable to allocate %i bytes for transfer buffer %i\n", sb_size, i); cx231xx_uninit_isoc(dev); return -ENOMEM; } memset(dev->video_mode.isoc_ctl.transfer_buffer[i], 0, sb_size); pipe = usb_rcvisocpipe(dev->udev, dev->video_mode.end_point_addr); usb_fill_int_urb(urb, dev->udev, pipe, dev->video_mode.isoc_ctl.transfer_buffer[i], sb_size, cx231xx_isoc_irq_callback, dma_q, 1); urb->number_of_packets = max_packets; urb->transfer_flags = URB_ISO_ASAP | URB_NO_TRANSFER_DMA_MAP; k = 0; for (j = 0; j < max_packets; j++) { urb->iso_frame_desc[j].offset = k; urb->iso_frame_desc[j].length = dev->video_mode.isoc_ctl.max_pkt_size; k += dev->video_mode.isoc_ctl.max_pkt_size; } } init_waitqueue_head(&dma_q->wq); /* submit urbs and enables IRQ */ for (i = 0; i < dev->video_mode.isoc_ctl.num_bufs; i++) { rc = usb_submit_urb(dev->video_mode.isoc_ctl.urb[i], GFP_ATOMIC); if (rc) { dev_err(dev->dev, "submit of urb %i failed (error=%i)\n", i, rc); cx231xx_uninit_isoc(dev); return rc; } } if (dev->mode_tv == 0) cx231xx_capture_start(dev, 1, Raw_Video); else cx231xx_capture_start(dev, 1, TS1_serial_mode); return 0; } EXPORT_SYMBOL_GPL(cx231xx_init_isoc); /* * Allocate URBs and start IRQ */ int cx231xx_init_bulk(struct cx231xx *dev, int max_packets, int num_bufs, int max_pkt_size, int (*bulk_copy) (struct cx231xx *dev, struct urb *urb)) { struct cx231xx_dmaqueue *dma_q = &dev->video_mode.vidq; int i; int sb_size, pipe; struct urb *urb; int rc; dev->video_input = dev->video_input > 2 ? 2 : dev->video_input; cx231xx_coredbg("Setting Video mux to %d\n", dev->video_input); video_mux(dev, dev->video_input); /* De-allocates all pending stuff */ cx231xx_uninit_bulk(dev); dev->video_mode.bulk_ctl.bulk_copy = bulk_copy; dev->video_mode.bulk_ctl.num_bufs = num_bufs; dma_q->pos = 0; dma_q->is_partial_line = 0; dma_q->last_sav = 0; dma_q->current_field = -1; dma_q->field1_done = 0; dma_q->lines_per_field = dev->height / 2; dma_q->bytes_left_in_line = dev->width << 1; dma_q->lines_completed = 0; dma_q->mpeg_buffer_done = 0; dma_q->left_data_count = 0; dma_q->mpeg_buffer_completed = 0; dma_q->ps_head[0] = 0x00; dma_q->ps_head[1] = 0x00; dma_q->ps_head[2] = 0x01; dma_q->ps_head[3] = 0xBA; for (i = 0; i < 8; i++) dma_q->partial_buf[i] = 0; dev->video_mode.bulk_ctl.urb = kcalloc(num_bufs, sizeof(void *), GFP_KERNEL); if (!dev->video_mode.bulk_ctl.urb) { dev_err(dev->dev, "cannot alloc memory for usb buffers\n"); return -ENOMEM; } dev->video_mode.bulk_ctl.transfer_buffer = kcalloc(num_bufs, sizeof(void *), GFP_KERNEL); if (!dev->video_mode.bulk_ctl.transfer_buffer) { dev_err(dev->dev, "cannot allocate memory for usbtransfer\n"); kfree(dev->video_mode.bulk_ctl.urb); return -ENOMEM; } dev->video_mode.bulk_ctl.max_pkt_size = max_pkt_size; dev->video_mode.bulk_ctl.buf = NULL; sb_size = max_packets * dev->video_mode.bulk_ctl.max_pkt_size; if (dev->mode_tv == 1) dev->video_mode.end_point_addr = 0x81; else dev->video_mode.end_point_addr = 0x84; /* allocate urbs and transfer buffers */ for (i = 0; i < dev->video_mode.bulk_ctl.num_bufs; i++) { urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { cx231xx_uninit_bulk(dev); return -ENOMEM; } dev->video_mode.bulk_ctl.urb[i] = urb; urb->transfer_flags = URB_NO_TRANSFER_DMA_MAP; dev->video_mode.bulk_ctl.transfer_buffer[i] = usb_alloc_coherent(dev->udev, sb_size, GFP_KERNEL, &urb->transfer_dma); if (!dev->video_mode.bulk_ctl.transfer_buffer[i]) { dev_err(dev->dev, "unable to allocate %i bytes for transfer buffer %i\n", sb_size, i); cx231xx_uninit_bulk(dev); return -ENOMEM; } memset(dev->video_mode.bulk_ctl.transfer_buffer[i], 0, sb_size); pipe = usb_rcvbulkpipe(dev->udev, dev->video_mode.end_point_addr); usb_fill_bulk_urb(urb, dev->udev, pipe, dev->video_mode.bulk_ctl.transfer_buffer[i], sb_size, cx231xx_bulk_irq_callback, dma_q); } /* clear halt */ rc = usb_clear_halt(dev->udev, dev->video_mode.bulk_ctl.urb[0]->pipe); if (rc < 0) { dev_err(dev->dev, "failed to clear USB bulk endpoint stall/halt condition (error=%i)\n", rc); cx231xx_uninit_bulk(dev); return rc; } init_waitqueue_head(&dma_q->wq); /* submit urbs and enables IRQ */ for (i = 0; i < dev->video_mode.bulk_ctl.num_bufs; i++) { rc = usb_submit_urb(dev->video_mode.bulk_ctl.urb[i], GFP_ATOMIC); if (rc) { dev_err(dev->dev, "submit of urb %i failed (error=%i)\n", i, rc); cx231xx_uninit_bulk(dev); return rc; } } if (dev->mode_tv == 0) cx231xx_capture_start(dev, 1, Raw_Video); else cx231xx_capture_start(dev, 1, TS1_serial_mode); return 0; } EXPORT_SYMBOL_GPL(cx231xx_init_bulk); void cx231xx_stop_TS1(struct cx231xx *dev) { u8 val[4] = { 0, 0, 0, 0 }; val[0] = 0x00; val[1] = 0x03; val[2] = 0x00; val[3] = 0x00; cx231xx_write_ctrl_reg(dev, VRT_SET_REGISTER, TS_MODE_REG, val, 4); val[0] = 0x00; val[1] = 0x70; val[2] = 0x04; val[3] = 0x00; cx231xx_write_ctrl_reg(dev, VRT_SET_REGISTER, TS1_CFG_REG, val, 4); } /* EXPORT_SYMBOL_GPL(cx231xx_stop_TS1); */ void cx231xx_start_TS1(struct cx231xx *dev) { u8 val[4] = { 0, 0, 0, 0 }; val[0] = 0x03; val[1] = 0x03; val[2] = 0x00; val[3] = 0x00; cx231xx_write_ctrl_reg(dev, VRT_SET_REGISTER, TS_MODE_REG, val, 4); val[0] = 0x04; val[1] = 0xA3; val[2] = 0x3B; val[3] = 0x00; cx231xx_write_ctrl_reg(dev, VRT_SET_REGISTER, TS1_CFG_REG, val, 4); } /* EXPORT_SYMBOL_GPL(cx231xx_start_TS1); */ /***************************************************************** * Device Init/UnInit functions * ******************************************************************/ int cx231xx_dev_init(struct cx231xx *dev) { int errCode = 0; /* Initialize I2C bus */ /* External Master 1 Bus */ dev->i2c_bus[0].nr = 0; dev->i2c_bus[0].dev = dev; dev->i2c_bus[0].i2c_period = I2C_SPEED_100K; /* 100 KHz */ dev->i2c_bus[0].i2c_nostop = 0; dev->i2c_bus[0].i2c_reserve = 0; dev->i2c_bus[0].i2c_rc = -ENODEV; /* External Master 2 Bus */ dev->i2c_bus[1].nr = 1; dev->i2c_bus[1].dev = dev; dev->i2c_bus[1].i2c_period = I2C_SPEED_100K; /* 100 KHz */ dev->i2c_bus[1].i2c_nostop = 0; dev->i2c_bus[1].i2c_reserve = 0; dev->i2c_bus[1].i2c_rc = -ENODEV; /* Internal Master 3 Bus */ dev->i2c_bus[2].nr = 2; dev->i2c_bus[2].dev = dev; dev->i2c_bus[2].i2c_period = I2C_SPEED_100K; /* 100kHz */ dev->i2c_bus[2].i2c_nostop = 0; dev->i2c_bus[2].i2c_reserve = 0; dev->i2c_bus[2].i2c_rc = -ENODEV; /* register I2C buses */ errCode = cx231xx_i2c_register(&dev->i2c_bus[0]); if (errCode < 0) return errCode; errCode = cx231xx_i2c_register(&dev->i2c_bus[1]); if (errCode < 0) return errCode; errCode = cx231xx_i2c_register(&dev->i2c_bus[2]); if (errCode < 0) return errCode; errCode = cx231xx_i2c_mux_create(dev); if (errCode < 0) { dev_err(dev->dev, "%s: Failed to create I2C mux\n", __func__); return errCode; } errCode = cx231xx_i2c_mux_register(dev, 0); if (errCode < 0) return errCode; errCode = cx231xx_i2c_mux_register(dev, 1); if (errCode < 0) return errCode; /* scan the real bus segments in the order of physical port numbers */ cx231xx_do_i2c_scan(dev, I2C_0); cx231xx_do_i2c_scan(dev, I2C_1_MUX_1); cx231xx_do_i2c_scan(dev, I2C_2); cx231xx_do_i2c_scan(dev, I2C_1_MUX_3); /* init hardware */ /* Note : with out calling set power mode function, afe can not be set up correctly */ if (dev->board.external_av) { errCode = cx231xx_set_power_mode(dev, POLARIS_AVMODE_ENXTERNAL_AV); if (errCode < 0) { dev_err(dev->dev, "%s: Failed to set Power - errCode [%d]!\n", __func__, errCode); return errCode; } } else { errCode = cx231xx_set_power_mode(dev, POLARIS_AVMODE_ANALOGT_TV); if (errCode < 0) { dev_err(dev->dev, "%s: Failed to set Power - errCode [%d]!\n", __func__, errCode); return errCode; } } /* reset the Tuner, if it is a Xceive tuner */ if ((dev->board.tuner_type == TUNER_XC5000) || (dev->board.tuner_type == TUNER_XC2028)) cx231xx_gpio_set(dev, dev->board.tuner_gpio); /* initialize Colibri block */ errCode = cx231xx_afe_init_super_block(dev, 0x23c); if (errCode < 0) { dev_err(dev->dev, "%s: cx231xx_afe init super block - errCode [%d]!\n", __func__, errCode); return errCode; } errCode = cx231xx_afe_init_channels(dev); if (errCode < 0) { dev_err(dev->dev, "%s: cx231xx_afe init channels - errCode [%d]!\n", __func__, errCode); return errCode; } /* Set DIF in By pass mode */ errCode = cx231xx_dif_set_standard(dev, DIF_USE_BASEBAND); if (errCode < 0) { dev_err(dev->dev, "%s: cx231xx_dif set to By pass mode - errCode [%d]!\n", __func__, errCode); return errCode; } /* I2S block related functions */ errCode = cx231xx_i2s_blk_initialize(dev); if (errCode < 0) { dev_err(dev->dev, "%s: cx231xx_i2s block initialize - errCode [%d]!\n", __func__, errCode); return errCode; } /* init control pins */ errCode = cx231xx_init_ctrl_pin_status(dev); if (errCode < 0) { dev_err(dev->dev, "%s: cx231xx_init ctrl pins - errCode [%d]!\n", __func__, errCode); return errCode; } /* set AGC mode to Analog */ switch (dev->model) { case CX231XX_BOARD_CNXT_CARRAERA: case CX231XX_BOARD_CNXT_RDE_250: case CX231XX_BOARD_CNXT_SHELBY: case CX231XX_BOARD_CNXT_RDU_250: errCode = cx231xx_set_agc_analog_digital_mux_select(dev, 1); break; case CX231XX_BOARD_CNXT_RDE_253S: case CX231XX_BOARD_CNXT_RDU_253S: case CX231XX_BOARD_HAUPPAUGE_EXETER: case CX231XX_BOARD_HAUPPAUGE_930C_HD_1113xx: case CX231XX_BOARD_PV_PLAYTV_USB_HYBRID: case CX231XX_BOARD_HAUPPAUGE_USB2_FM_PAL: case CX231XX_BOARD_HAUPPAUGE_USB2_FM_NTSC: errCode = cx231xx_set_agc_analog_digital_mux_select(dev, 0); break; default: break; } if (errCode < 0) { dev_err(dev->dev, "%s: cx231xx_AGC mode to Analog - errCode [%d]!\n", __func__, errCode); return errCode; } /* set all alternate settings to zero initially */ cx231xx_set_alt_setting(dev, INDEX_VIDEO, 0); cx231xx_set_alt_setting(dev, INDEX_VANC, 0); cx231xx_set_alt_setting(dev, INDEX_HANC, 0); if (dev->board.has_dvb) cx231xx_set_alt_setting(dev, INDEX_TS1, 0); errCode = 0; return errCode; } EXPORT_SYMBOL_GPL(cx231xx_dev_init); void cx231xx_dev_uninit(struct cx231xx *dev) { /* Un Initialize I2C bus */ cx231xx_i2c_mux_unregister(dev); cx231xx_i2c_unregister(&dev->i2c_bus[2]); cx231xx_i2c_unregister(&dev->i2c_bus[1]); cx231xx_i2c_unregister(&dev->i2c_bus[0]); } EXPORT_SYMBOL_GPL(cx231xx_dev_uninit); /***************************************************************** * G P I O related functions * ******************************************************************/ int cx231xx_send_gpio_cmd(struct cx231xx *dev, u32 gpio_bit, u8 *gpio_val, u8 len, u8 request, u8 direction) { int status = 0; struct VENDOR_REQUEST_IN ven_req; /* Set wValue */ ven_req.wValue = (u16) (gpio_bit >> 16 & 0xffff); /* set request */ if (!request) { if (direction) ven_req.bRequest = VRT_GET_GPIO; /* 0x9 gpio */ else ven_req.bRequest = VRT_SET_GPIO; /* 0x8 gpio */ } else { if (direction) ven_req.bRequest = VRT_GET_GPIE; /* 0xb gpie */ else ven_req.bRequest = VRT_SET_GPIE; /* 0xa gpie */ } /* set index value */ ven_req.wIndex = (u16) (gpio_bit & 0xffff); /* set wLength value */ ven_req.wLength = len; /* set bData value */ ven_req.bData = 0; /* set the buffer for read / write */ ven_req.pBuff = gpio_val; /* set the direction */ if (direction) { ven_req.direction = USB_DIR_IN; memset(ven_req.pBuff, 0x00, ven_req.wLength); } else ven_req.direction = USB_DIR_OUT; /* call common vendor command request */ status = cx231xx_send_vendor_cmd(dev, &ven_req); if (status < 0) { dev_err(dev->dev, "%s: failed with status -%d\n", __func__, status); } return status; } EXPORT_SYMBOL_GPL(cx231xx_send_gpio_cmd); /***************************************************************** * C O N T R O L - Register R E A D / W R I T E functions * *****************************************************************/ int cx231xx_mode_register(struct cx231xx *dev, u16 address, u32 mode) { u8 value[4] = { 0x0, 0x0, 0x0, 0x0 }; u32 tmp = 0; int status = 0; status = cx231xx_read_ctrl_reg(dev, VRT_GET_REGISTER, address, value, 4); if (status < 0) return status; tmp = le32_to_cpu(*((__le32 *) value)); tmp |= mode; value[0] = (u8) tmp; value[1] = (u8) (tmp >> 8); value[2] = (u8) (tmp >> 16); value[3] = (u8) (tmp >> 24); status = cx231xx_write_ctrl_reg(dev, VRT_SET_REGISTER, address, value, 4); return status; } /***************************************************************** * I 2 C Internal C O N T R O L functions * *****************************************************************/ int cx231xx_read_i2c_master(struct cx231xx *dev, u8 dev_addr, u16 saddr, u8 saddr_len, u32 *data, u8 data_len, int master) { int status = 0; struct cx231xx_i2c_xfer_data req_data; u8 value[64] = "0"; if (saddr_len == 0) saddr = 0; else if (saddr_len == 1) saddr &= 0xff; /* prepare xfer_data struct */ req_data.dev_addr = dev_addr >> 1; req_data.direction = I2C_M_RD; req_data.saddr_len = saddr_len; req_data.saddr_dat = saddr; req_data.buf_size = data_len; req_data.p_buffer = (u8 *) value; /* usb send command */ if (master == 0) status = dev->cx231xx_send_usb_command(&dev->i2c_bus[0], &req_data); else if (master == 1) status = dev->cx231xx_send_usb_command(&dev->i2c_bus[1], &req_data); else if (master == 2) status = dev->cx231xx_send_usb_command(&dev->i2c_bus[2], &req_data); if (status >= 0) { /* Copy the data read back to main buffer */ if (data_len == 1) *data = value[0]; else if (data_len == 4) *data = value[0] | value[1] << 8 | value[2] << 16 | value[3] << 24; else if (data_len > 4) *data = value[saddr]; } return status; } int cx231xx_write_i2c_master(struct cx231xx *dev, u8 dev_addr, u16 saddr, u8 saddr_len, u32 data, u8 data_len, int master) { int status = 0; u8 value[4] = { 0, 0, 0, 0 }; struct cx231xx_i2c_xfer_data req_data; value[0] = (u8) data; value[1] = (u8) (data >> 8); value[2] = (u8) (data >> 16); value[3] = (u8) (data >> 24); if (saddr_len == 0) saddr = 0; else if (saddr_len == 1) saddr &= 0xff; /* prepare xfer_data struct */ req_data.dev_addr = dev_addr >> 1; req_data.direction = 0; req_data.saddr_len = saddr_len; req_data.saddr_dat = saddr; req_data.buf_size = data_len; req_data.p_buffer = value; /* usb send command */ if (master == 0) status = dev->cx231xx_send_usb_command(&dev->i2c_bus[0], &req_data); else if (master == 1) status = dev->cx231xx_send_usb_command(&dev->i2c_bus[1], &req_data); else if (master == 2) status = dev->cx231xx_send_usb_command(&dev->i2c_bus[2], &req_data); return status; } int cx231xx_read_i2c_data(struct cx231xx *dev, u8 dev_addr, u16 saddr, u8 saddr_len, u32 *data, u8 data_len) { int status = 0; struct cx231xx_i2c_xfer_data req_data; u8 value[4] = { 0, 0, 0, 0 }; if (saddr_len == 0) saddr = 0; else if (saddr_len == 1) saddr &= 0xff; /* prepare xfer_data struct */ req_data.dev_addr = dev_addr >> 1; req_data.direction = I2C_M_RD; req_data.saddr_len = saddr_len; req_data.saddr_dat = saddr; req_data.buf_size = data_len; req_data.p_buffer = (u8 *) value; /* usb send command */ status = dev->cx231xx_send_usb_command(&dev->i2c_bus[0], &req_data); if (status >= 0) { /* Copy the data read back to main buffer */ if (data_len == 1) *data = value[0]; else *data = value[0] | value[1] << 8 | value[2] << 16 | value[3] << 24; } return status; } int cx231xx_write_i2c_data(struct cx231xx *dev, u8 dev_addr, u16 saddr, u8 saddr_len, u32 data, u8 data_len) { int status = 0; u8 value[4] = { 0, 0, 0, 0 }; struct cx231xx_i2c_xfer_data req_data; value[0] = (u8) data; value[1] = (u8) (data >> 8); value[2] = (u8) (data >> 16); value[3] = (u8) (data >> 24); if (saddr_len == 0) saddr = 0; else if (saddr_len == 1) saddr &= 0xff; /* prepare xfer_data struct */ req_data.dev_addr = dev_addr >> 1; req_data.direction = 0; req_data.saddr_len = saddr_len; req_data.saddr_dat = saddr; req_data.buf_size = data_len; req_data.p_buffer = value; /* usb send command */ status = dev->cx231xx_send_usb_command(&dev->i2c_bus[0], &req_data); return status; } int cx231xx_reg_mask_write(struct cx231xx *dev, u8 dev_addr, u8 size, u16 register_address, u8 bit_start, u8 bit_end, u32 value) { int status = 0; u32 tmp; u32 mask = 0; int i; if (bit_start > (size - 1) || bit_end > (size - 1)) return -1; if (size == 8) { status = cx231xx_read_i2c_data(dev, dev_addr, register_address, 2, &tmp, 1); } else { status = cx231xx_read_i2c_data(dev, dev_addr, register_address, 2, &tmp, 4); } if (status < 0) return status; mask = 1 << bit_end; for (i = bit_end; i > bit_start && i > 0; i--) mask = mask + (1 << (i - 1)); value <<= bit_start; if (size == 8) { tmp &= ~mask; tmp |= value; tmp &= 0xff; status = cx231xx_write_i2c_data(dev, dev_addr, register_address, 2, tmp, 1); } else { tmp &= ~mask; tmp |= value; status = cx231xx_write_i2c_data(dev, dev_addr, register_address, 2, tmp, 4); } return status; } int cx231xx_read_modify_write_i2c_dword(struct cx231xx *dev, u8 dev_addr, u16 saddr, u32 mask, u32 value) { u32 temp; int status = 0; status = cx231xx_read_i2c_data(dev, dev_addr, saddr, 2, &temp, 4); if (status < 0) return status; temp &= ~mask; temp |= value; status = cx231xx_write_i2c_data(dev, dev_addr, saddr, 2, temp, 4); return status; } u32 cx231xx_set_field(u32 field_mask, u32 data) { u32 temp; for (temp = field_mask; (temp & 1) == 0; temp >>= 1) data <<= 1; return data; } |
| 79 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Common values for AES algorithms */ #ifndef _CRYPTO_AES_H #define _CRYPTO_AES_H #include <linux/types.h> #include <linux/crypto.h> #define AES_MIN_KEY_SIZE 16 #define AES_MAX_KEY_SIZE 32 #define AES_KEYSIZE_128 16 #define AES_KEYSIZE_192 24 #define AES_KEYSIZE_256 32 #define AES_BLOCK_SIZE 16 #define AES_MAX_KEYLENGTH (15 * 16) #define AES_MAX_KEYLENGTH_U32 (AES_MAX_KEYLENGTH / sizeof(u32)) /* * Please ensure that the first two fields are 16-byte aligned * relative to the start of the structure, i.e., don't move them! */ struct crypto_aes_ctx { u32 key_enc[AES_MAX_KEYLENGTH_U32]; u32 key_dec[AES_MAX_KEYLENGTH_U32]; u32 key_length; }; extern const u32 crypto_ft_tab[4][256] ____cacheline_aligned; extern const u32 crypto_it_tab[4][256] ____cacheline_aligned; /* * validate key length for AES algorithms */ static inline int aes_check_keylen(unsigned int keylen) { switch (keylen) { case AES_KEYSIZE_128: case AES_KEYSIZE_192: case AES_KEYSIZE_256: break; default: return -EINVAL; } return 0; } int crypto_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len); /** * aes_expandkey - Expands the AES key as described in FIPS-197 * @ctx: The location where the computed key will be stored. * @in_key: The supplied key. * @key_len: The length of the supplied key. * * Returns 0 on success. The function fails only if an invalid key size (or * pointer) is supplied. * The expanded key size is 240 bytes (max of 14 rounds with a unique 16 bytes * key schedule plus a 16 bytes key which is used before the first round). * The decryption key is prepared for the "Equivalent Inverse Cipher" as * described in FIPS-197. The first slot (16 bytes) of each key (enc or dec) is * for the initial combination, the second slot for the first round and so on. */ int aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, unsigned int key_len); /** * aes_encrypt - Encrypt a single AES block * @ctx: Context struct containing the key schedule * @out: Buffer to store the ciphertext * @in: Buffer containing the plaintext */ void aes_encrypt(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); /** * aes_decrypt - Decrypt a single AES block * @ctx: Context struct containing the key schedule * @out: Buffer to store the plaintext * @in: Buffer containing the ciphertext */ void aes_decrypt(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in); extern const u8 crypto_aes_sbox[]; extern const u8 crypto_aes_inv_sbox[]; void aescfb_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src, int len, const u8 iv[AES_BLOCK_SIZE]); void aescfb_decrypt(const struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src, int len, const u8 iv[AES_BLOCK_SIZE]); #endif |
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 | // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/ext4/truncate.h * * Common inline functions needed for truncate support */ /* * Truncate blocks that were not used by write. We have to truncate the * pagecache as well so that corresponding buffers get properly unmapped. */ static inline void ext4_truncate_failed_write(struct inode *inode) { struct address_space *mapping = inode->i_mapping; /* * We don't need to call ext4_break_layouts() because the blocks we * are truncating were never visible to userspace. */ filemap_invalidate_lock(mapping); truncate_inode_pages(mapping, inode->i_size); ext4_truncate(inode); filemap_invalidate_unlock(mapping); } /* * Work out how many blocks we need to proceed with the next chunk of a * truncate transaction. */ static inline unsigned long ext4_blocks_for_truncate(struct inode *inode) { ext4_lblk_t needed; needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); /* Give ourselves just enough room to cope with inodes in which * i_blocks is corrupt: we've seen disk corruptions in the past * which resulted in random data in an inode which looked enough * like a regular file for ext4 to try to delete it. Things * will go a bit crazy if that happens, but at least we should * try not to panic the whole kernel. */ if (needed < 2) needed = 2; /* But we need to bound the transaction so we don't overflow the * journal. */ if (needed > EXT4_MAX_TRANS_DATA) needed = EXT4_MAX_TRANS_DATA; return EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + needed; } |
| 1654 873 873 1519 783 796 811 784 1567 197 111 25 26 787 785 786 784 2 2 2 1 785 2 782 768 784 2 781 779 117 117 671 671 670 669 2 759 595 596 599 598 597 339 265 811 339 341 820 1 820 821 821 4 4 4 817 1 814 813 1 813 26 813 814 814 811 808 271 810 808 64 334 813 473 812 812 812 809 810 1 807 812 809 8 589 594 2 2 590 554 808 811 13 7 13 811 216 212 3 210 2 654 2 2 812 812 815 820 816 818 1 821 2 817 819 1574 1582 1578 1575 1 1576 823 1584 13 14 13 13 13 14 14 14 14 14 14 3 6 5 3 3 3 3 80 80 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 | /* Netfilter messages via netlink socket. Allows for user space * protocol helpers and general trouble making from userspace. * * (C) 2001 by Jay Schulist <jschlst@samba.org>, * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org> * (C) 2005-2017 by Pablo Neira Ayuso <pablo@netfilter.org> * * Initial netfilter messages via netlink development funded and * generally made possible by Network Robots, Inc. (www.networkrobots.com) * * Further development of this code funded by Astaro AG (http://www.astaro.com) * * This software may be used and distributed according to the terms * of the GNU General Public License, incorporated herein by reference. */ #include <linux/module.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/skbuff.h> #include <linux/uaccess.h> #include <net/sock.h> #include <linux/init.h> #include <linux/sched/signal.h> #include <net/netlink.h> #include <net/netns/generic.h> #include <linux/netfilter.h> #include <linux/netfilter/nfnetlink.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); MODULE_DESCRIPTION("Netfilter messages via netlink socket"); #define nfnl_dereference_protected(id) \ rcu_dereference_protected(table[(id)].subsys, \ lockdep_nfnl_is_held((id))) #define NFNL_MAX_ATTR_COUNT 32 static unsigned int nfnetlink_pernet_id __read_mostly; #ifdef CONFIG_NF_CONNTRACK_EVENTS static DEFINE_SPINLOCK(nfnl_grp_active_lock); #endif struct nfnl_net { struct sock *nfnl; }; static struct { struct mutex mutex; const struct nfnetlink_subsystem __rcu *subsys; } table[NFNL_SUBSYS_COUNT]; static struct lock_class_key nfnl_lockdep_keys[NFNL_SUBSYS_COUNT]; static const char *const nfnl_lockdep_names[NFNL_SUBSYS_COUNT] = { [NFNL_SUBSYS_NONE] = "nfnl_subsys_none", [NFNL_SUBSYS_CTNETLINK] = "nfnl_subsys_ctnetlink", [NFNL_SUBSYS_CTNETLINK_EXP] = "nfnl_subsys_ctnetlink_exp", [NFNL_SUBSYS_QUEUE] = "nfnl_subsys_queue", [NFNL_SUBSYS_ULOG] = "nfnl_subsys_ulog", [NFNL_SUBSYS_OSF] = "nfnl_subsys_osf", [NFNL_SUBSYS_IPSET] = "nfnl_subsys_ipset", [NFNL_SUBSYS_ACCT] = "nfnl_subsys_acct", [NFNL_SUBSYS_CTNETLINK_TIMEOUT] = "nfnl_subsys_cttimeout", [NFNL_SUBSYS_CTHELPER] = "nfnl_subsys_cthelper", [NFNL_SUBSYS_NFTABLES] = "nfnl_subsys_nftables", [NFNL_SUBSYS_NFT_COMPAT] = "nfnl_subsys_nftcompat", [NFNL_SUBSYS_HOOK] = "nfnl_subsys_hook", }; static const int nfnl_group2type[NFNLGRP_MAX+1] = { [NFNLGRP_CONNTRACK_NEW] = NFNL_SUBSYS_CTNETLINK, [NFNLGRP_CONNTRACK_UPDATE] = NFNL_SUBSYS_CTNETLINK, [NFNLGRP_CONNTRACK_DESTROY] = NFNL_SUBSYS_CTNETLINK, [NFNLGRP_CONNTRACK_EXP_NEW] = NFNL_SUBSYS_CTNETLINK_EXP, [NFNLGRP_CONNTRACK_EXP_UPDATE] = NFNL_SUBSYS_CTNETLINK_EXP, [NFNLGRP_CONNTRACK_EXP_DESTROY] = NFNL_SUBSYS_CTNETLINK_EXP, [NFNLGRP_NFTABLES] = NFNL_SUBSYS_NFTABLES, [NFNLGRP_ACCT_QUOTA] = NFNL_SUBSYS_ACCT, [NFNLGRP_NFTRACE] = NFNL_SUBSYS_NFTABLES, [NFNLGRP_NFT_DEV] = NFNL_SUBSYS_NFTABLES, }; static struct nfnl_net *nfnl_pernet(struct net *net) { return net_generic(net, nfnetlink_pernet_id); } void nfnl_lock(__u8 subsys_id) { mutex_lock(&table[subsys_id].mutex); } EXPORT_SYMBOL_GPL(nfnl_lock); void nfnl_unlock(__u8 subsys_id) { mutex_unlock(&table[subsys_id].mutex); } EXPORT_SYMBOL_GPL(nfnl_unlock); #ifdef CONFIG_PROVE_LOCKING bool lockdep_nfnl_is_held(u8 subsys_id) { return lockdep_is_held(&table[subsys_id].mutex); } EXPORT_SYMBOL_GPL(lockdep_nfnl_is_held); #endif int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n) { u8 cb_id; /* Sanity-check attr_count size to avoid stack buffer overflow. */ for (cb_id = 0; cb_id < n->cb_count; cb_id++) if (WARN_ON(n->cb[cb_id].attr_count > NFNL_MAX_ATTR_COUNT)) return -EINVAL; nfnl_lock(n->subsys_id); if (table[n->subsys_id].subsys) { nfnl_unlock(n->subsys_id); return -EBUSY; } rcu_assign_pointer(table[n->subsys_id].subsys, n); nfnl_unlock(n->subsys_id); return 0; } EXPORT_SYMBOL_GPL(nfnetlink_subsys_register); int nfnetlink_subsys_unregister(const struct nfnetlink_subsystem *n) { nfnl_lock(n->subsys_id); table[n->subsys_id].subsys = NULL; nfnl_unlock(n->subsys_id); synchronize_rcu(); return 0; } EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister); static inline const struct nfnetlink_subsystem *nfnetlink_get_subsys(u16 type) { u8 subsys_id = NFNL_SUBSYS_ID(type); if (subsys_id >= NFNL_SUBSYS_COUNT) return NULL; return rcu_dereference(table[subsys_id].subsys); } static inline const struct nfnl_callback * nfnetlink_find_client(u16 type, const struct nfnetlink_subsystem *ss) { u8 cb_id = NFNL_MSG_TYPE(type); if (cb_id >= ss->cb_count) return NULL; return &ss->cb[cb_id]; } int nfnetlink_has_listeners(struct net *net, unsigned int group) { struct nfnl_net *nfnlnet = nfnl_pernet(net); return netlink_has_listeners(nfnlnet->nfnl, group); } EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, unsigned int group, int echo, gfp_t flags) { struct nfnl_net *nfnlnet = nfnl_pernet(net); return nlmsg_notify(nfnlnet->nfnl, skb, portid, group, echo, flags); } EXPORT_SYMBOL_GPL(nfnetlink_send); int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error) { struct nfnl_net *nfnlnet = nfnl_pernet(net); return netlink_set_err(nfnlnet->nfnl, portid, group, error); } EXPORT_SYMBOL_GPL(nfnetlink_set_err); int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid) { struct nfnl_net *nfnlnet = nfnl_pernet(net); int err; err = nlmsg_unicast(nfnlnet->nfnl, skb, portid); if (err == -EAGAIN) err = -ENOBUFS; return err; } EXPORT_SYMBOL_GPL(nfnetlink_unicast); void nfnetlink_broadcast(struct net *net, struct sk_buff *skb, __u32 portid, __u32 group, gfp_t allocation) { struct nfnl_net *nfnlnet = nfnl_pernet(net); netlink_broadcast(nfnlnet->nfnl, skb, portid, group, allocation); } EXPORT_SYMBOL_GPL(nfnetlink_broadcast); /* Process one complete nfnetlink message. */ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); const struct nfnl_callback *nc; const struct nfnetlink_subsystem *ss; int type, err; /* All the messages must at least contain nfgenmsg */ if (nlmsg_len(nlh) < sizeof(struct nfgenmsg)) return 0; type = nlh->nlmsg_type; replay: rcu_read_lock(); ss = nfnetlink_get_subsys(type); if (!ss) { #ifdef CONFIG_MODULES rcu_read_unlock(); request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type)); rcu_read_lock(); ss = nfnetlink_get_subsys(type); if (!ss) #endif { rcu_read_unlock(); return -EINVAL; } } nc = nfnetlink_find_client(type, ss); if (!nc) { rcu_read_unlock(); return -EINVAL; } { int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); struct nfnl_net *nfnlnet = nfnl_pernet(net); u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); struct nlattr *cda[NFNL_MAX_ATTR_COUNT + 1]; struct nlattr *attr = (void *)nlh + min_len; int attrlen = nlh->nlmsg_len - min_len; __u8 subsys_id = NFNL_SUBSYS_ID(type); struct nfnl_info info = { .net = net, .sk = nfnlnet->nfnl, .nlh = nlh, .nfmsg = nlmsg_data(nlh), .extack = extack, }; /* Sanity-check NFNL_MAX_ATTR_COUNT */ if (ss->cb[cb_id].attr_count > NFNL_MAX_ATTR_COUNT) { rcu_read_unlock(); return -ENOMEM; } err = nla_parse_deprecated(cda, ss->cb[cb_id].attr_count, attr, attrlen, ss->cb[cb_id].policy, extack); if (err < 0) { rcu_read_unlock(); return err; } if (!nc->call) { rcu_read_unlock(); return -EINVAL; } switch (nc->type) { case NFNL_CB_RCU: err = nc->call(skb, &info, (const struct nlattr **)cda); rcu_read_unlock(); break; case NFNL_CB_MUTEX: rcu_read_unlock(); nfnl_lock(subsys_id); if (nfnl_dereference_protected(subsys_id) != ss || nfnetlink_find_client(type, ss) != nc) { nfnl_unlock(subsys_id); err = -EAGAIN; break; } err = nc->call(skb, &info, (const struct nlattr **)cda); nfnl_unlock(subsys_id); break; default: rcu_read_unlock(); err = -EINVAL; break; } if (err == -EAGAIN) goto replay; return err; } } struct nfnl_err { struct list_head head; struct nlmsghdr *nlh; int err; struct netlink_ext_ack extack; }; static int nfnl_err_add(struct list_head *list, struct nlmsghdr *nlh, int err, const struct netlink_ext_ack *extack) { struct nfnl_err *nfnl_err; nfnl_err = kmalloc(sizeof(struct nfnl_err), GFP_KERNEL); if (nfnl_err == NULL) return -ENOMEM; nfnl_err->nlh = nlh; nfnl_err->err = err; nfnl_err->extack = *extack; list_add_tail(&nfnl_err->head, list); return 0; } static void nfnl_err_del(struct nfnl_err *nfnl_err) { list_del(&nfnl_err->head); kfree(nfnl_err); } static void nfnl_err_reset(struct list_head *err_list) { struct nfnl_err *nfnl_err, *next; list_for_each_entry_safe(nfnl_err, next, err_list, head) nfnl_err_del(nfnl_err); } static void nfnl_err_deliver(struct list_head *err_list, struct sk_buff *skb) { struct nfnl_err *nfnl_err, *next; list_for_each_entry_safe(nfnl_err, next, err_list, head) { netlink_ack(skb, nfnl_err->nlh, nfnl_err->err, &nfnl_err->extack); nfnl_err_del(nfnl_err); } } enum { NFNL_BATCH_FAILURE = (1 << 0), NFNL_BATCH_DONE = (1 << 1), NFNL_BATCH_REPLAY = (1 << 2), }; static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, u16 subsys_id, u32 genid) { struct sk_buff *oskb = skb; struct net *net = sock_net(skb->sk); const struct nfnetlink_subsystem *ss; const struct nfnl_callback *nc; struct netlink_ext_ack extack; LIST_HEAD(err_list); u32 status; int err; if (subsys_id >= NFNL_SUBSYS_COUNT) return netlink_ack(skb, nlh, -EINVAL, NULL); replay: status = 0; replay_abort: skb = netlink_skb_clone(oskb, GFP_KERNEL); if (!skb) return netlink_ack(oskb, nlh, -ENOMEM, NULL); nfnl_lock(subsys_id); ss = nfnl_dereference_protected(subsys_id); if (!ss) { #ifdef CONFIG_MODULES nfnl_unlock(subsys_id); request_module("nfnetlink-subsys-%d", subsys_id); nfnl_lock(subsys_id); ss = nfnl_dereference_protected(subsys_id); if (!ss) #endif { nfnl_unlock(subsys_id); netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL); return consume_skb(skb); } } if (!ss->valid_genid || !ss->commit || !ss->abort) { nfnl_unlock(subsys_id); netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL); return consume_skb(skb); } if (!try_module_get(ss->owner)) { nfnl_unlock(subsys_id); netlink_ack(oskb, nlh, -EOPNOTSUPP, NULL); return consume_skb(skb); } if (!ss->valid_genid(net, genid)) { module_put(ss->owner); nfnl_unlock(subsys_id); netlink_ack(oskb, nlh, -ERESTART, NULL); return consume_skb(skb); } nfnl_unlock(subsys_id); if (nlh->nlmsg_flags & NLM_F_ACK) { memset(&extack, 0, sizeof(extack)); nfnl_err_add(&err_list, nlh, 0, &extack); } while (skb->len >= nlmsg_total_size(0)) { int msglen, type; if (fatal_signal_pending(current)) { nfnl_err_reset(&err_list); err = -EINTR; status = NFNL_BATCH_FAILURE; goto done; } memset(&extack, 0, sizeof(extack)); nlh = nlmsg_hdr(skb); err = 0; if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len || nlmsg_len(nlh) < sizeof(struct nfgenmsg)) { nfnl_err_reset(&err_list); status |= NFNL_BATCH_FAILURE; goto done; } /* Only requests are handled by the kernel */ if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) { err = -EINVAL; goto ack; } type = nlh->nlmsg_type; if (type == NFNL_MSG_BATCH_BEGIN) { /* Malformed: Batch begin twice */ nfnl_err_reset(&err_list); status |= NFNL_BATCH_FAILURE; goto done; } else if (type == NFNL_MSG_BATCH_END) { status |= NFNL_BATCH_DONE; goto done; } else if (type < NLMSG_MIN_TYPE) { err = -EINVAL; goto ack; } /* We only accept a batch with messages for the same * subsystem. */ if (NFNL_SUBSYS_ID(type) != subsys_id) { err = -EINVAL; goto ack; } nc = nfnetlink_find_client(type, ss); if (!nc) { err = -EINVAL; goto ack; } if (nc->type != NFNL_CB_BATCH) { err = -EINVAL; goto ack; } { int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); struct nfnl_net *nfnlnet = nfnl_pernet(net); struct nlattr *cda[NFNL_MAX_ATTR_COUNT + 1]; struct nlattr *attr = (void *)nlh + min_len; u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); int attrlen = nlh->nlmsg_len - min_len; struct nfnl_info info = { .net = net, .sk = nfnlnet->nfnl, .nlh = nlh, .nfmsg = nlmsg_data(nlh), .extack = &extack, }; /* Sanity-check NFTA_MAX_ATTR */ if (ss->cb[cb_id].attr_count > NFNL_MAX_ATTR_COUNT) { err = -ENOMEM; goto ack; } err = nla_parse_deprecated(cda, ss->cb[cb_id].attr_count, attr, attrlen, ss->cb[cb_id].policy, &extack); if (err < 0) goto ack; err = nc->call(skb, &info, (const struct nlattr **)cda); /* The lock was released to autoload some module, we * have to abort and start from scratch using the * original skb. */ if (err == -EAGAIN) { status |= NFNL_BATCH_REPLAY; goto done; } } ack: if (nlh->nlmsg_flags & NLM_F_ACK || err) { /* Errors are delivered once the full batch has been * processed, this avoids that the same error is * reported several times when replaying the batch. */ if (err == -ENOMEM || nfnl_err_add(&err_list, nlh, err, &extack) < 0) { /* We failed to enqueue an error, reset the * list of errors and send OOM to userspace * pointing to the batch header. */ nfnl_err_reset(&err_list); netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM, NULL); status |= NFNL_BATCH_FAILURE; goto done; } /* We don't stop processing the batch on errors, thus, * userspace gets all the errors that the batch * triggers. */ if (err) status |= NFNL_BATCH_FAILURE; } msglen = NLMSG_ALIGN(nlh->nlmsg_len); if (msglen > skb->len) msglen = skb->len; skb_pull(skb, msglen); } done: if (status & NFNL_BATCH_REPLAY) { ss->abort(net, oskb, NFNL_ABORT_AUTOLOAD); nfnl_err_reset(&err_list); consume_skb(skb); module_put(ss->owner); goto replay; } else if (status == NFNL_BATCH_DONE) { err = ss->commit(net, oskb); if (err == -EAGAIN) { status |= NFNL_BATCH_REPLAY; goto done; } else if (err) { ss->abort(net, oskb, NFNL_ABORT_NONE); netlink_ack(oskb, nlmsg_hdr(oskb), err, NULL); } else if (nlh->nlmsg_flags & NLM_F_ACK) { memset(&extack, 0, sizeof(extack)); nfnl_err_add(&err_list, nlh, 0, &extack); } } else { enum nfnl_abort_action abort_action; if (status & NFNL_BATCH_FAILURE) abort_action = NFNL_ABORT_NONE; else abort_action = NFNL_ABORT_VALIDATE; err = ss->abort(net, oskb, abort_action); if (err == -EAGAIN) { nfnl_err_reset(&err_list); consume_skb(skb); module_put(ss->owner); status |= NFNL_BATCH_FAILURE; goto replay_abort; } } nfnl_err_deliver(&err_list, oskb); consume_skb(skb); module_put(ss->owner); } static const struct nla_policy nfnl_batch_policy[NFNL_BATCH_MAX + 1] = { [NFNL_BATCH_GENID] = { .type = NLA_U32 }, }; static void nfnetlink_rcv_skb_batch(struct sk_buff *skb, struct nlmsghdr *nlh) { int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); struct nlattr *attr = (void *)nlh + min_len; struct nlattr *cda[NFNL_BATCH_MAX + 1]; int attrlen = nlh->nlmsg_len - min_len; struct nfgenmsg *nfgenmsg; int msglen, err; u32 gen_id = 0; u16 res_id; msglen = NLMSG_ALIGN(nlh->nlmsg_len); if (msglen > skb->len) msglen = skb->len; if (skb->len < NLMSG_HDRLEN + sizeof(struct nfgenmsg)) return; err = nla_parse_deprecated(cda, NFNL_BATCH_MAX, attr, attrlen, nfnl_batch_policy, NULL); if (err < 0) { netlink_ack(skb, nlh, err, NULL); return; } if (cda[NFNL_BATCH_GENID]) gen_id = ntohl(nla_get_be32(cda[NFNL_BATCH_GENID])); nfgenmsg = nlmsg_data(nlh); skb_pull(skb, msglen); /* Work around old nft using host byte order */ if (nfgenmsg->res_id == (__force __be16)NFNL_SUBSYS_NFTABLES) res_id = NFNL_SUBSYS_NFTABLES; else res_id = ntohs(nfgenmsg->res_id); nfnetlink_rcv_batch(skb, nlh, res_id, gen_id); } static void nfnetlink_rcv(struct sk_buff *skb) { struct nlmsghdr *nlh = nlmsg_hdr(skb); if (skb->len < NLMSG_HDRLEN || nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) return; if (!netlink_net_capable(skb, CAP_NET_ADMIN)) { netlink_ack(skb, nlh, -EPERM, NULL); return; } if (nlh->nlmsg_type == NFNL_MSG_BATCH_BEGIN) nfnetlink_rcv_skb_batch(skb, nlh); else netlink_rcv_skb(skb, nfnetlink_rcv_msg); } static void nfnetlink_bind_event(struct net *net, unsigned int group) { #ifdef CONFIG_NF_CONNTRACK_EVENTS int type, group_bit; u8 v; /* All NFNLGRP_CONNTRACK_* group bits fit into u8. * The other groups are not relevant and can be ignored. */ if (group >= 8) return; type = nfnl_group2type[group]; switch (type) { case NFNL_SUBSYS_CTNETLINK: break; case NFNL_SUBSYS_CTNETLINK_EXP: break; default: return; } group_bit = (1 << group); spin_lock(&nfnl_grp_active_lock); v = READ_ONCE(nf_ctnetlink_has_listener); if ((v & group_bit) == 0) { v |= group_bit; /* read concurrently without nfnl_grp_active_lock held. */ WRITE_ONCE(nf_ctnetlink_has_listener, v); } spin_unlock(&nfnl_grp_active_lock); #endif } static int nfnetlink_bind(struct net *net, int group) { const struct nfnetlink_subsystem *ss; int type; if (group <= NFNLGRP_NONE || group > NFNLGRP_MAX) return 0; type = nfnl_group2type[group]; rcu_read_lock(); ss = nfnetlink_get_subsys(type << 8); rcu_read_unlock(); if (!ss) request_module_nowait("nfnetlink-subsys-%d", type); nfnetlink_bind_event(net, group); return 0; } static void nfnetlink_unbind(struct net *net, int group) { #ifdef CONFIG_NF_CONNTRACK_EVENTS int type, group_bit; if (group <= NFNLGRP_NONE || group > NFNLGRP_MAX) return; type = nfnl_group2type[group]; switch (type) { case NFNL_SUBSYS_CTNETLINK: break; case NFNL_SUBSYS_CTNETLINK_EXP: break; default: return; } /* ctnetlink_has_listener is u8 */ if (group >= 8) return; group_bit = (1 << group); spin_lock(&nfnl_grp_active_lock); if (!nfnetlink_has_listeners(net, group)) { u8 v = READ_ONCE(nf_ctnetlink_has_listener); v &= ~group_bit; /* read concurrently without nfnl_grp_active_lock held. */ WRITE_ONCE(nf_ctnetlink_has_listener, v); } spin_unlock(&nfnl_grp_active_lock); #endif } static int __net_init nfnetlink_net_init(struct net *net) { struct nfnl_net *nfnlnet = nfnl_pernet(net); struct netlink_kernel_cfg cfg = { .groups = NFNLGRP_MAX, .input = nfnetlink_rcv, .bind = nfnetlink_bind, .unbind = nfnetlink_unbind, }; nfnlnet->nfnl = netlink_kernel_create(net, NETLINK_NETFILTER, &cfg); if (!nfnlnet->nfnl) return -ENOMEM; return 0; } static void __net_exit nfnetlink_net_exit_batch(struct list_head *net_exit_list) { struct nfnl_net *nfnlnet; struct net *net; list_for_each_entry(net, net_exit_list, exit_list) { nfnlnet = nfnl_pernet(net); netlink_kernel_release(nfnlnet->nfnl); } } static struct pernet_operations nfnetlink_net_ops = { .init = nfnetlink_net_init, .exit_batch = nfnetlink_net_exit_batch, .id = &nfnetlink_pernet_id, .size = sizeof(struct nfnl_net), }; static int __init nfnetlink_init(void) { int i; for (i = NFNLGRP_NONE + 1; i <= NFNLGRP_MAX; i++) BUG_ON(nfnl_group2type[i] == NFNL_SUBSYS_NONE); for (i=0; i<NFNL_SUBSYS_COUNT; i++) __mutex_init(&table[i].mutex, nfnl_lockdep_names[i], &nfnl_lockdep_keys[i]); return register_pernet_subsys(&nfnetlink_net_ops); } static void __exit nfnetlink_exit(void) { unregister_pernet_subsys(&nfnetlink_net_ops); } module_init(nfnetlink_init); module_exit(nfnetlink_exit); |
| 70 65 70 25 25 3 4 4 25 21 21 65 65 65 65 48 65 21 21 65 21 21 21 20 20 10 10 10 65 23 42 65 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 | // SPDX-License-Identifier: GPL-2.0-only /* * V9FS FID Management * * Copyright (C) 2007 by Latchesar Ionkov <lucho@ionkov.net> * Copyright (C) 2005, 2006 by Eric Van Hensbergen <ericvh@gmail.com> */ #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/slab.h> #include <linux/sched.h> #include <net/9p/9p.h> #include <net/9p/client.h> #include "v9fs.h" #include "v9fs_vfs.h" #include "fid.h" static inline void __add_fid(struct dentry *dentry, struct p9_fid *fid) { hlist_add_head(&fid->dlist, (struct hlist_head *)&dentry->d_fsdata); } /** * v9fs_fid_add - add a fid to a dentry * @dentry: dentry that the fid is being added to * @pfid: fid to add, NULLed out * */ void v9fs_fid_add(struct dentry *dentry, struct p9_fid **pfid) { struct p9_fid *fid = *pfid; spin_lock(&dentry->d_lock); __add_fid(dentry, fid); spin_unlock(&dentry->d_lock); *pfid = NULL; } static bool v9fs_is_writeable(int mode) { if (mode & (P9_OWRITE|P9_ORDWR)) return true; else return false; } /** * v9fs_fid_find_inode - search for an open fid off of the inode list * @inode: return a fid pointing to a specific inode * @want_writeable: only consider fids which are writeable * @uid: return a fid belonging to the specified user * @any: ignore uid as a selection criteria * */ struct p9_fid *v9fs_fid_find_inode(struct inode *inode, bool want_writeable, kuid_t uid, bool any) { struct hlist_head *h; struct p9_fid *fid, *ret = NULL; p9_debug(P9_DEBUG_VFS, " inode: %p\n", inode); spin_lock(&inode->i_lock); h = (struct hlist_head *)&inode->i_private; hlist_for_each_entry(fid, h, ilist) { if (any || uid_eq(fid->uid, uid)) { if (want_writeable && !v9fs_is_writeable(fid->mode)) { p9_debug(P9_DEBUG_VFS, " mode: %x not writeable?\n", fid->mode); continue; } p9_fid_get(fid); ret = fid; break; } } spin_unlock(&inode->i_lock); return ret; } /** * v9fs_open_fid_add - add an open fid to an inode * @inode: inode that the fid is being added to * @pfid: fid to add, NULLed out * */ void v9fs_open_fid_add(struct inode *inode, struct p9_fid **pfid) { struct p9_fid *fid = *pfid; spin_lock(&inode->i_lock); hlist_add_head(&fid->ilist, (struct hlist_head *)&inode->i_private); spin_unlock(&inode->i_lock); *pfid = NULL; } /** * v9fs_fid_find - retrieve a fid that belongs to the specified uid * @dentry: dentry to look for fid in * @uid: return fid that belongs to the specified user * @any: if non-zero, return any fid associated with the dentry * */ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any) { struct p9_fid *fid, *ret; p9_debug(P9_DEBUG_VFS, " dentry: %pd (%p) uid %d any %d\n", dentry, dentry, from_kuid(&init_user_ns, uid), any); ret = NULL; /* we'll recheck under lock if there's anything to look in */ if (dentry->d_fsdata) { struct hlist_head *h = (struct hlist_head *)&dentry->d_fsdata; spin_lock(&dentry->d_lock); hlist_for_each_entry(fid, h, dlist) { if (any || uid_eq(fid->uid, uid)) { ret = fid; p9_fid_get(ret); break; } } spin_unlock(&dentry->d_lock); } if (!ret && dentry->d_inode) ret = v9fs_fid_find_inode(dentry->d_inode, false, uid, any); return ret; } /* * We need to hold v9ses->rename_sem as long as we hold references * to returned path array. Array element contain pointers to * dentry names. */ static int build_path_from_dentry(struct v9fs_session_info *v9ses, struct dentry *dentry, const unsigned char ***names) { int n = 0, i; const unsigned char **wnames; struct dentry *ds; for (ds = dentry; !IS_ROOT(ds); ds = ds->d_parent) n++; wnames = kmalloc_array(n, sizeof(char *), GFP_KERNEL); if (!wnames) goto err_out; for (ds = dentry, i = (n-1); i >= 0; i--, ds = ds->d_parent) wnames[i] = ds->d_name.name; *names = wnames; return n; err_out: return -ENOMEM; } static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry, kuid_t uid, int any) { struct dentry *ds; const unsigned char **wnames, *uname; int i, n, l, access; struct v9fs_session_info *v9ses; struct p9_fid *fid, *root_fid, *old_fid; v9ses = v9fs_dentry2v9ses(dentry); access = v9ses->flags & V9FS_ACCESS_MASK; fid = v9fs_fid_find(dentry, uid, any); if (fid) return fid; /* * we don't have a matching fid. To do a TWALK we need * parent fid. We need to prevent rename when we want to * look at the parent. */ down_read(&v9ses->rename_sem); ds = dentry->d_parent; fid = v9fs_fid_find(ds, uid, any); if (fid) { /* Found the parent fid do a lookup with that */ old_fid = fid; fid = p9_client_walk(old_fid, 1, &dentry->d_name.name, 1); p9_fid_put(old_fid); goto fid_out; } up_read(&v9ses->rename_sem); /* start from the root and try to do a lookup */ root_fid = v9fs_fid_find(dentry->d_sb->s_root, uid, any); if (!root_fid) { /* the user is not attached to the fs yet */ if (access == V9FS_ACCESS_SINGLE) return ERR_PTR(-EPERM); if (v9fs_proto_dotu(v9ses) || v9fs_proto_dotl(v9ses)) uname = NULL; else uname = v9ses->uname; fid = p9_client_attach(v9ses->clnt, NULL, uname, uid, v9ses->aname); if (IS_ERR(fid)) return fid; root_fid = p9_fid_get(fid); v9fs_fid_add(dentry->d_sb->s_root, &fid); } /* If we are root ourself just return that */ if (dentry->d_sb->s_root == dentry) return root_fid; /* * Do a multipath walk with attached root. * When walking parent we need to make sure we * don't have a parallel rename happening */ down_read(&v9ses->rename_sem); n = build_path_from_dentry(v9ses, dentry, &wnames); if (n < 0) { fid = ERR_PTR(n); goto err_out; } fid = root_fid; old_fid = root_fid; i = 0; while (i < n) { l = min(n - i, P9_MAXWELEM); /* * We need to hold rename lock when doing a multipath * walk to ensure none of the path components change */ fid = p9_client_walk(old_fid, l, &wnames[i], old_fid == root_fid /* clone */); /* non-cloning walk will return the same fid */ if (fid != old_fid) { p9_fid_put(old_fid); old_fid = fid; } if (IS_ERR(fid)) { kfree(wnames); goto err_out; } i += l; } kfree(wnames); fid_out: if (!IS_ERR(fid)) { spin_lock(&dentry->d_lock); if (d_unhashed(dentry)) { spin_unlock(&dentry->d_lock); p9_fid_put(fid); fid = ERR_PTR(-ENOENT); } else { __add_fid(dentry, fid); p9_fid_get(fid); spin_unlock(&dentry->d_lock); } } err_out: up_read(&v9ses->rename_sem); return fid; } /** * v9fs_fid_lookup - lookup for a fid, try to walk if not found * @dentry: dentry to look for fid in * * Look for a fid in the specified dentry for the current user. * If no fid is found, try to create one walking from a fid from the parent * dentry (if it has one), or the root dentry. If the user haven't accessed * the fs yet, attach now and walk from the root. */ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry) { kuid_t uid; int any, access; struct v9fs_session_info *v9ses; v9ses = v9fs_dentry2v9ses(dentry); access = v9ses->flags & V9FS_ACCESS_MASK; switch (access) { case V9FS_ACCESS_SINGLE: case V9FS_ACCESS_USER: case V9FS_ACCESS_CLIENT: uid = current_fsuid(); any = 0; break; case V9FS_ACCESS_ANY: uid = v9ses->uid; any = 1; break; default: uid = INVALID_UID; any = 0; break; } return v9fs_fid_lookup_with_uid(dentry, uid, any); } |
| 29 33 33 33 8 8 7 8 33 2 12 2 2 1 6 3 28 3 21 13 10 8 8 8 12 21 27 8 23 31 20 20 21 6 15 20 17 12 17 3 2 12 17 20 6 6 2 4 6 6 15 15 15 14 13 11 10 13 11 10 5 4 1 3 10 13 14 10 4 4 12 14 2 11 10 2 9 10 19 19 18 10 8 4 4 16 16 4 11 16 1 16 16 19 25 24 25 22 21 6 21 21 6 15 21 3 1 2 21 21 21 14 10 9 5 1 3 9 27 25 9 20 27 14 14 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 | // SPDX-License-Identifier: GPL-2.0 /* * fs/timerfd.c * * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> * * * Thanks to Thomas Gleixner for code reviews and useful comments. * */ #include <linux/alarmtimer.h> #include <linux/file.h> #include <linux/poll.h> #include <linux/init.h> #include <linux/fs.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/time.h> #include <linux/hrtimer.h> #include <linux/anon_inodes.h> #include <linux/timerfd.h> #include <linux/syscalls.h> #include <linux/compat.h> #include <linux/rcupdate.h> #include <linux/time_namespace.h> struct timerfd_ctx { union { struct hrtimer tmr; struct alarm alarm; } t; ktime_t tintv; ktime_t moffs; wait_queue_head_t wqh; u64 ticks; int clockid; short unsigned expired; short unsigned settime_flags; /* to show in fdinfo */ struct rcu_head rcu; struct list_head clist; spinlock_t cancel_lock; bool might_cancel; }; static LIST_HEAD(cancel_list); static DEFINE_SPINLOCK(cancel_lock); static inline bool isalarm(struct timerfd_ctx *ctx) { return ctx->clockid == CLOCK_REALTIME_ALARM || ctx->clockid == CLOCK_BOOTTIME_ALARM; } /* * This gets called when the timer event triggers. We set the "expired" * flag, but we do not re-arm the timer (in case it's necessary, * tintv != 0) until the timer is accessed. */ static void timerfd_triggered(struct timerfd_ctx *ctx) { unsigned long flags; spin_lock_irqsave(&ctx->wqh.lock, flags); ctx->expired = 1; ctx->ticks++; wake_up_locked_poll(&ctx->wqh, EPOLLIN); spin_unlock_irqrestore(&ctx->wqh.lock, flags); } static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) { struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, t.tmr); timerfd_triggered(ctx); return HRTIMER_NORESTART; } static void timerfd_alarmproc(struct alarm *alarm, ktime_t now) { struct timerfd_ctx *ctx = container_of(alarm, struct timerfd_ctx, t.alarm); timerfd_triggered(ctx); } /* * Called when the clock was set to cancel the timers in the cancel * list. This will wake up processes waiting on these timers. The * wake-up requires ctx->ticks to be non zero, therefore we increment * it before calling wake_up_locked(). */ void timerfd_clock_was_set(void) { ktime_t moffs = ktime_mono_to_real(0); struct timerfd_ctx *ctx; unsigned long flags; rcu_read_lock(); list_for_each_entry_rcu(ctx, &cancel_list, clist) { if (!ctx->might_cancel) continue; spin_lock_irqsave(&ctx->wqh.lock, flags); if (ctx->moffs != moffs) { ctx->moffs = KTIME_MAX; ctx->ticks++; wake_up_locked_poll(&ctx->wqh, EPOLLIN); } spin_unlock_irqrestore(&ctx->wqh.lock, flags); } rcu_read_unlock(); } static void timerfd_resume_work(struct work_struct *work) { timerfd_clock_was_set(); } static DECLARE_WORK(timerfd_work, timerfd_resume_work); /* * Invoked from timekeeping_resume(). Defer the actual update to work so * timerfd_clock_was_set() runs in task context. */ void timerfd_resume(void) { schedule_work(&timerfd_work); } static void __timerfd_remove_cancel(struct timerfd_ctx *ctx) { if (ctx->might_cancel) { ctx->might_cancel = false; spin_lock(&cancel_lock); list_del_rcu(&ctx->clist); spin_unlock(&cancel_lock); } } static void timerfd_remove_cancel(struct timerfd_ctx *ctx) { spin_lock(&ctx->cancel_lock); __timerfd_remove_cancel(ctx); spin_unlock(&ctx->cancel_lock); } static bool timerfd_canceled(struct timerfd_ctx *ctx) { if (!ctx->might_cancel || ctx->moffs != KTIME_MAX) return false; ctx->moffs = ktime_mono_to_real(0); return true; } static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) { spin_lock(&ctx->cancel_lock); if ((ctx->clockid == CLOCK_REALTIME || ctx->clockid == CLOCK_REALTIME_ALARM) && (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) { if (!ctx->might_cancel) { ctx->might_cancel = true; spin_lock(&cancel_lock); list_add_rcu(&ctx->clist, &cancel_list); spin_unlock(&cancel_lock); } } else { __timerfd_remove_cancel(ctx); } spin_unlock(&ctx->cancel_lock); } static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) { ktime_t remaining; if (isalarm(ctx)) remaining = alarm_expires_remaining(&ctx->t.alarm); else remaining = hrtimer_expires_remaining_adjusted(&ctx->t.tmr); return remaining < 0 ? 0: remaining; } static int timerfd_setup(struct timerfd_ctx *ctx, int flags, const struct itimerspec64 *ktmr) { enum hrtimer_mode htmode; ktime_t texp; int clockid = ctx->clockid; htmode = (flags & TFD_TIMER_ABSTIME) ? HRTIMER_MODE_ABS: HRTIMER_MODE_REL; texp = timespec64_to_ktime(ktmr->it_value); ctx->expired = 0; ctx->ticks = 0; ctx->tintv = timespec64_to_ktime(ktmr->it_interval); if (isalarm(ctx)) { alarm_init(&ctx->t.alarm, ctx->clockid == CLOCK_REALTIME_ALARM ? ALARM_REALTIME : ALARM_BOOTTIME, timerfd_alarmproc); } else { hrtimer_setup(&ctx->t.tmr, timerfd_tmrproc, clockid, htmode); hrtimer_set_expires(&ctx->t.tmr, texp); } if (texp != 0) { if (flags & TFD_TIMER_ABSTIME) texp = timens_ktime_to_host(clockid, texp); if (isalarm(ctx)) { if (flags & TFD_TIMER_ABSTIME) alarm_start(&ctx->t.alarm, texp); else alarm_start_relative(&ctx->t.alarm, texp); } else { hrtimer_start(&ctx->t.tmr, texp, htmode); } if (timerfd_canceled(ctx)) return -ECANCELED; } ctx->settime_flags = flags & TFD_SETTIME_FLAGS; return 0; } static int timerfd_release(struct inode *inode, struct file *file) { struct timerfd_ctx *ctx = file->private_data; timerfd_remove_cancel(ctx); if (isalarm(ctx)) alarm_cancel(&ctx->t.alarm); else hrtimer_cancel(&ctx->t.tmr); kfree_rcu(ctx, rcu); return 0; } static __poll_t timerfd_poll(struct file *file, poll_table *wait) { struct timerfd_ctx *ctx = file->private_data; __poll_t events = 0; unsigned long flags; poll_wait(file, &ctx->wqh, wait); spin_lock_irqsave(&ctx->wqh.lock, flags); if (ctx->ticks) events |= EPOLLIN; spin_unlock_irqrestore(&ctx->wqh.lock, flags); return events; } static ssize_t timerfd_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct timerfd_ctx *ctx = file->private_data; ssize_t res; u64 ticks = 0; if (iov_iter_count(to) < sizeof(ticks)) return -EINVAL; spin_lock_irq(&ctx->wqh.lock); if (file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT) res = -EAGAIN; else res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); /* * If clock has changed, we do not care about the * ticks and we do not rearm the timer. Userspace must * reevaluate anyway. */ if (timerfd_canceled(ctx)) { ctx->ticks = 0; ctx->expired = 0; res = -ECANCELED; } if (ctx->ticks) { ticks = ctx->ticks; if (ctx->expired && ctx->tintv) { /* * If tintv != 0, this is a periodic timer that * needs to be re-armed. We avoid doing it in the timer * callback to avoid DoS attacks specifying a very * short timer period. */ if (isalarm(ctx)) { ticks += alarm_forward_now( &ctx->t.alarm, ctx->tintv) - 1; alarm_restart(&ctx->t.alarm); } else { ticks += hrtimer_forward_now(&ctx->t.tmr, ctx->tintv) - 1; hrtimer_restart(&ctx->t.tmr); } } ctx->expired = 0; ctx->ticks = 0; } spin_unlock_irq(&ctx->wqh.lock); if (ticks) { res = copy_to_iter(&ticks, sizeof(ticks), to); if (!res) res = -EFAULT; } return res; } #ifdef CONFIG_PROC_FS static void timerfd_show(struct seq_file *m, struct file *file) { struct timerfd_ctx *ctx = file->private_data; struct timespec64 value, interval; spin_lock_irq(&ctx->wqh.lock); value = ktime_to_timespec64(timerfd_get_remaining(ctx)); interval = ktime_to_timespec64(ctx->tintv); spin_unlock_irq(&ctx->wqh.lock); seq_printf(m, "clockid: %d\n" "ticks: %llu\n" "settime flags: 0%o\n" "it_value: (%llu, %llu)\n" "it_interval: (%llu, %llu)\n", ctx->clockid, (unsigned long long)ctx->ticks, ctx->settime_flags, (unsigned long long)value.tv_sec, (unsigned long long)value.tv_nsec, (unsigned long long)interval.tv_sec, (unsigned long long)interval.tv_nsec); } #else #define timerfd_show NULL #endif #ifdef CONFIG_CHECKPOINT_RESTORE static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct timerfd_ctx *ctx = file->private_data; int ret = 0; switch (cmd) { case TFD_IOC_SET_TICKS: { u64 ticks; if (copy_from_user(&ticks, (u64 __user *)arg, sizeof(ticks))) return -EFAULT; if (!ticks) return -EINVAL; spin_lock_irq(&ctx->wqh.lock); if (!timerfd_canceled(ctx)) { ctx->ticks = ticks; wake_up_locked_poll(&ctx->wqh, EPOLLIN); } else ret = -ECANCELED; spin_unlock_irq(&ctx->wqh.lock); break; } default: ret = -ENOTTY; break; } return ret; } #else #define timerfd_ioctl NULL #endif static const struct file_operations timerfd_fops = { .release = timerfd_release, .poll = timerfd_poll, .read_iter = timerfd_read_iter, .llseek = noop_llseek, .show_fdinfo = timerfd_show, .unlocked_ioctl = timerfd_ioctl, }; SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) { int ufd; struct timerfd_ctx *ctx; struct file *file; /* Check the TFD_* constants for consistency. */ BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC); BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK); if ((flags & ~TFD_CREATE_FLAGS) || (clockid != CLOCK_MONOTONIC && clockid != CLOCK_REALTIME && clockid != CLOCK_REALTIME_ALARM && clockid != CLOCK_BOOTTIME && clockid != CLOCK_BOOTTIME_ALARM)) return -EINVAL; if ((clockid == CLOCK_REALTIME_ALARM || clockid == CLOCK_BOOTTIME_ALARM) && !capable(CAP_WAKE_ALARM)) return -EPERM; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; init_waitqueue_head(&ctx->wqh); spin_lock_init(&ctx->cancel_lock); ctx->clockid = clockid; if (isalarm(ctx)) alarm_init(&ctx->t.alarm, ctx->clockid == CLOCK_REALTIME_ALARM ? ALARM_REALTIME : ALARM_BOOTTIME, timerfd_alarmproc); else hrtimer_setup(&ctx->t.tmr, timerfd_tmrproc, clockid, HRTIMER_MODE_ABS); ctx->moffs = ktime_mono_to_real(0); ufd = get_unused_fd_flags(flags & TFD_SHARED_FCNTL_FLAGS); if (ufd < 0) { kfree(ctx); return ufd; } file = anon_inode_getfile_fmode("[timerfd]", &timerfd_fops, ctx, O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS), FMODE_NOWAIT); if (IS_ERR(file)) { put_unused_fd(ufd); kfree(ctx); return PTR_ERR(file); } fd_install(ufd, file); return ufd; } static int do_timerfd_settime(int ufd, int flags, const struct itimerspec64 *new, struct itimerspec64 *old) { struct timerfd_ctx *ctx; int ret; if ((flags & ~TFD_SETTIME_FLAGS) || !itimerspec64_valid(new)) return -EINVAL; CLASS(fd, f)(ufd); if (fd_empty(f)) return -EBADF; if (fd_file(f)->f_op != &timerfd_fops) return -EINVAL; ctx = fd_file(f)->private_data; if (isalarm(ctx) && !capable(CAP_WAKE_ALARM)) return -EPERM; timerfd_setup_cancel(ctx, flags); /* * We need to stop the existing timer before reprogramming * it to the new values. */ for (;;) { spin_lock_irq(&ctx->wqh.lock); if (isalarm(ctx)) { if (alarm_try_to_cancel(&ctx->t.alarm) >= 0) break; } else { if (hrtimer_try_to_cancel(&ctx->t.tmr) >= 0) break; } spin_unlock_irq(&ctx->wqh.lock); if (isalarm(ctx)) hrtimer_cancel_wait_running(&ctx->t.alarm.timer); else hrtimer_cancel_wait_running(&ctx->t.tmr); } /* * If the timer is expired and it's periodic, we need to advance it * because the caller may want to know the previous expiration time. * We do not update "ticks" and "expired" since the timer will be * re-programmed again in the following timerfd_setup() call. */ if (ctx->expired && ctx->tintv) { if (isalarm(ctx)) alarm_forward_now(&ctx->t.alarm, ctx->tintv); else hrtimer_forward_now(&ctx->t.tmr, ctx->tintv); } old->it_value = ktime_to_timespec64(timerfd_get_remaining(ctx)); old->it_interval = ktime_to_timespec64(ctx->tintv); /* * Re-program the timer to the new value ... */ ret = timerfd_setup(ctx, flags, new); spin_unlock_irq(&ctx->wqh.lock); return ret; } static int do_timerfd_gettime(int ufd, struct itimerspec64 *t) { struct timerfd_ctx *ctx; CLASS(fd, f)(ufd); if (fd_empty(f)) return -EBADF; if (fd_file(f)->f_op != &timerfd_fops) return -EINVAL; ctx = fd_file(f)->private_data; spin_lock_irq(&ctx->wqh.lock); if (ctx->expired && ctx->tintv) { ctx->expired = 0; if (isalarm(ctx)) { ctx->ticks += alarm_forward_now( &ctx->t.alarm, ctx->tintv) - 1; alarm_restart(&ctx->t.alarm); } else { ctx->ticks += hrtimer_forward_now(&ctx->t.tmr, ctx->tintv) - 1; hrtimer_restart(&ctx->t.tmr); } } t->it_value = ktime_to_timespec64(timerfd_get_remaining(ctx)); t->it_interval = ktime_to_timespec64(ctx->tintv); spin_unlock_irq(&ctx->wqh.lock); return 0; } SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, const struct __kernel_itimerspec __user *, utmr, struct __kernel_itimerspec __user *, otmr) { struct itimerspec64 new, old; int ret; if (get_itimerspec64(&new, utmr)) return -EFAULT; ret = do_timerfd_settime(ufd, flags, &new, &old); if (ret) return ret; if (otmr && put_itimerspec64(&old, otmr)) return -EFAULT; return ret; } SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct __kernel_itimerspec __user *, otmr) { struct itimerspec64 kotmr; int ret = do_timerfd_gettime(ufd, &kotmr); if (ret) return ret; return put_itimerspec64(&kotmr, otmr) ? -EFAULT : 0; } #ifdef CONFIG_COMPAT_32BIT_TIME SYSCALL_DEFINE4(timerfd_settime32, int, ufd, int, flags, const struct old_itimerspec32 __user *, utmr, struct old_itimerspec32 __user *, otmr) { struct itimerspec64 new, old; int ret; if (get_old_itimerspec32(&new, utmr)) return -EFAULT; ret = do_timerfd_settime(ufd, flags, &new, &old); if (ret) return ret; if (otmr && put_old_itimerspec32(&old, otmr)) return -EFAULT; return ret; } SYSCALL_DEFINE2(timerfd_gettime32, int, ufd, struct old_itimerspec32 __user *, otmr) { struct itimerspec64 kotmr; int ret = do_timerfd_gettime(ufd, &kotmr); if (ret) return ret; return put_old_itimerspec32(&kotmr, otmr) ? -EFAULT : 0; } #endif |
| 5 1 1 5 1 1 1 1 4 5 5 5 5 5 5 5 5 5 5 5 5 5 4 23 8 22 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 | // SPDX-License-Identifier: GPL-2.0-or-later /* * * Copyright Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) * Copyright Darryl Miles G7LED (dlm@g7led.demon.co.uk) */ #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/slab.h> #include <net/ax25.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/sock.h> #include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <net/netrom.h> /* * This is where all NET/ROM frames pass, except for IP-over-NET/ROM which * cannot be fragmented in this manner. */ void nr_output(struct sock *sk, struct sk_buff *skb) { struct sk_buff *skbn; unsigned char transport[NR_TRANSPORT_LEN]; int err, frontlen, len; if (skb->len - NR_TRANSPORT_LEN > NR_MAX_PACKET_SIZE) { /* Save a copy of the Transport Header */ skb_copy_from_linear_data(skb, transport, NR_TRANSPORT_LEN); skb_pull(skb, NR_TRANSPORT_LEN); frontlen = skb_headroom(skb); while (skb->len > 0) { if ((skbn = sock_alloc_send_skb(sk, frontlen + NR_MAX_PACKET_SIZE, 0, &err)) == NULL) return; skb_reserve(skbn, frontlen); len = (NR_MAX_PACKET_SIZE > skb->len) ? skb->len : NR_MAX_PACKET_SIZE; /* Copy the user data */ skb_copy_from_linear_data(skb, skb_put(skbn, len), len); skb_pull(skb, len); /* Duplicate the Transport Header */ skb_push(skbn, NR_TRANSPORT_LEN); skb_copy_to_linear_data(skbn, transport, NR_TRANSPORT_LEN); if (skb->len > 0) skbn->data[4] |= NR_MORE_FLAG; skb_queue_tail(&sk->sk_write_queue, skbn); /* Throw it on the queue */ } kfree_skb(skb); } else { skb_queue_tail(&sk->sk_write_queue, skb); /* Throw it on the queue */ } nr_kick(sk); } /* * This procedure is passed a buffer descriptor for an iframe. It builds * the rest of the control part of the frame and then writes it out. */ static void nr_send_iframe(struct sock *sk, struct sk_buff *skb) { struct nr_sock *nr = nr_sk(sk); if (skb == NULL) return; skb->data[2] = nr->vs; skb->data[3] = nr->vr; if (nr->condition & NR_COND_OWN_RX_BUSY) skb->data[4] |= NR_CHOKE_FLAG; nr_start_idletimer(sk); nr_transmit_buffer(sk, skb); } void nr_send_nak_frame(struct sock *sk) { struct sk_buff *skb, *skbn; struct nr_sock *nr = nr_sk(sk); if ((skb = skb_peek(&nr->ack_queue)) == NULL) return; if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) return; skbn->data[2] = nr->va; skbn->data[3] = nr->vr; if (nr->condition & NR_COND_OWN_RX_BUSY) skbn->data[4] |= NR_CHOKE_FLAG; nr_transmit_buffer(sk, skbn); nr->condition &= ~NR_COND_ACK_PENDING; nr->vl = nr->vr; nr_stop_t1timer(sk); } void nr_kick(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); struct sk_buff *skb, *skbn; unsigned short start, end; if (nr->state != NR_STATE_3) return; if (nr->condition & NR_COND_PEER_RX_BUSY) return; if (!skb_peek(&sk->sk_write_queue)) return; start = (skb_peek(&nr->ack_queue) == NULL) ? nr->va : nr->vs; end = (nr->va + nr->window) % NR_MODULUS; if (start == end) return; nr->vs = start; /* * Transmit data until either we're out of data to send or * the window is full. */ /* * Dequeue the frame and copy it. */ skb = skb_dequeue(&sk->sk_write_queue); do { if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) { skb_queue_head(&sk->sk_write_queue, skb); break; } skb_set_owner_w(skbn, sk); /* * Transmit the frame copy. */ nr_send_iframe(sk, skbn); nr->vs = (nr->vs + 1) % NR_MODULUS; /* * Requeue the original data frame. */ skb_queue_tail(&nr->ack_queue, skb); } while (nr->vs != end && (skb = skb_dequeue(&sk->sk_write_queue)) != NULL); nr->vl = nr->vr; nr->condition &= ~NR_COND_ACK_PENDING; if (!nr_t1timer_running(sk)) nr_start_t1timer(sk); } void nr_transmit_buffer(struct sock *sk, struct sk_buff *skb) { struct nr_sock *nr = nr_sk(sk); unsigned char *dptr; /* * Add the protocol byte and network header. */ dptr = skb_push(skb, NR_NETWORK_LEN); memcpy(dptr, &nr->source_addr, AX25_ADDR_LEN); dptr[6] &= ~AX25_CBIT; dptr[6] &= ~AX25_EBIT; dptr[6] |= AX25_SSSID_SPARE; dptr += AX25_ADDR_LEN; memcpy(dptr, &nr->dest_addr, AX25_ADDR_LEN); dptr[6] &= ~AX25_CBIT; dptr[6] |= AX25_EBIT; dptr[6] |= AX25_SSSID_SPARE; dptr += AX25_ADDR_LEN; *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser); if (!nr_route_frame(skb, NULL)) { kfree_skb(skb); nr_disconnect(sk, ENETUNREACH); } } /* * The following routines are taken from page 170 of the 7th ARRL Computer * Networking Conference paper, as is the whole state machine. */ void nr_establish_data_link(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); nr->condition = 0x00; nr->n2count = 0; nr_write_internal(sk, NR_CONNREQ); nr_stop_t2timer(sk); nr_stop_t4timer(sk); nr_stop_idletimer(sk); nr_start_t1timer(sk); } /* * Never send a NAK when we are CHOKEd. */ void nr_enquiry_response(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); int frametype = NR_INFOACK; if (nr->condition & NR_COND_OWN_RX_BUSY) { frametype |= NR_CHOKE_FLAG; } else { if (skb_peek(&nr->reseq_queue) != NULL) frametype |= NR_NAK_FLAG; } nr_write_internal(sk, frametype); nr->vl = nr->vr; nr->condition &= ~NR_COND_ACK_PENDING; } void nr_check_iframes_acked(struct sock *sk, unsigned short nr) { struct nr_sock *nrom = nr_sk(sk); if (nrom->vs == nr) { nr_frames_acked(sk, nr); nr_stop_t1timer(sk); nrom->n2count = 0; } else { if (nrom->va != nr) { nr_frames_acked(sk, nr); nr_start_t1timer(sk); } } } |
| 4 4 4 4 2 3 3 3 3 30 3 2 3 1 1 9 9 9 3 9 2 9 2 9 1 1 2 1 9 1 1 1 1 1 1 6 6 2 2 5 5 5 4 4 4 1 1 3 2 3 1 3 1 3 3 3 3 3 3 3 3 3 3 3 7 2 1 1 1 1 1 11 11 11 11 11 11 1 1 1 1 1 1 4 4 14 5 5 4 20 16 20 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 5 5 5 5 5 16 15 14 14 16 14 14 14 1 1 1 8 1 8 1 8 1 8 8 8 8 9 7 7 7 7 7 9 17 17 11 11 11 11 11 11 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 | /* * Copyright (c) 2003 Patrick McHardy, <kaber@trash.net> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * 2003-10-17 - Ported from altq */ /* * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved. * * Permission to use, copy, modify, and distribute this software and * its documentation is hereby granted (including for commercial or * for-profit use), provided that both the copyright notice and this * permission notice appear in all copies of the software, derivative * works, or modified versions, and any portions thereof. * * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * Carnegie Mellon encourages (but does not require) users of this * software to return any improvements or extensions that they make, * and to grant Carnegie Mellon the rights to redistribute these * changes without encumbrance. */ /* * H-FSC is described in Proceedings of SIGCOMM'97, * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing, * Real-Time and Priority Service" * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng. * * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing. * when a class has an upperlimit, the fit-time is computed from the * upperlimit service curve. the link-sharing scheduler does not schedule * a class whose fit-time exceeds the current time. */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/types.h> #include <linux/errno.h> #include <linux/compiler.h> #include <linux/spinlock.h> #include <linux/skbuff.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/list.h> #include <linux/rbtree.h> #include <linux/init.h> #include <linux/rtnetlink.h> #include <linux/pkt_sched.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <asm/div64.h> /* * kernel internal service curve representation: * coordinates are given by 64 bit unsigned integers. * x-axis: unit is clock count. * y-axis: unit is byte. * * The service curve parameters are converted to the internal * representation. The slope values are scaled to avoid overflow. * the inverse slope values as well as the y-projection of the 1st * segment are kept in order to avoid 64-bit divide operations * that are expensive on 32-bit architectures. */ struct internal_sc { u64 sm1; /* scaled slope of the 1st segment */ u64 ism1; /* scaled inverse-slope of the 1st segment */ u64 dx; /* the x-projection of the 1st segment */ u64 dy; /* the y-projection of the 1st segment */ u64 sm2; /* scaled slope of the 2nd segment */ u64 ism2; /* scaled inverse-slope of the 2nd segment */ }; /* runtime service curve */ struct runtime_sc { u64 x; /* current starting position on x-axis */ u64 y; /* current starting position on y-axis */ u64 sm1; /* scaled slope of the 1st segment */ u64 ism1; /* scaled inverse-slope of the 1st segment */ u64 dx; /* the x-projection of the 1st segment */ u64 dy; /* the y-projection of the 1st segment */ u64 sm2; /* scaled slope of the 2nd segment */ u64 ism2; /* scaled inverse-slope of the 2nd segment */ }; enum hfsc_class_flags { HFSC_RSC = 0x1, HFSC_FSC = 0x2, HFSC_USC = 0x4 }; struct hfsc_class { struct Qdisc_class_common cl_common; struct gnet_stats_basic_sync bstats; struct gnet_stats_queue qstats; struct net_rate_estimator __rcu *rate_est; struct tcf_proto __rcu *filter_list; /* filter list */ struct tcf_block *block; unsigned int level; /* class level in hierarchy */ struct hfsc_sched *sched; /* scheduler data */ struct hfsc_class *cl_parent; /* parent class */ struct list_head siblings; /* sibling classes */ struct list_head children; /* child classes */ struct Qdisc *qdisc; /* leaf qdisc */ struct rb_node el_node; /* qdisc's eligible tree member */ struct rb_root vt_tree; /* active children sorted by cl_vt */ struct rb_node vt_node; /* parent's vt_tree member */ struct rb_root cf_tree; /* active children sorted by cl_f */ struct rb_node cf_node; /* parent's cf_heap member */ u64 cl_total; /* total work in bytes */ u64 cl_cumul; /* cumulative work in bytes done by real-time criteria */ u64 cl_d; /* deadline*/ u64 cl_e; /* eligible time */ u64 cl_vt; /* virtual time */ u64 cl_f; /* time when this class will fit for link-sharing, max(myf, cfmin) */ u64 cl_myf; /* my fit-time (calculated from this class's own upperlimit curve) */ u64 cl_cfmin; /* earliest children's fit-time (used with cl_myf to obtain cl_f) */ u64 cl_cvtmin; /* minimal virtual time among the children fit for link-sharing (monotonic within a period) */ u64 cl_vtadj; /* intra-period cumulative vt adjustment */ u64 cl_cvtoff; /* largest virtual time seen among the children */ struct internal_sc cl_rsc; /* internal real-time service curve */ struct internal_sc cl_fsc; /* internal fair service curve */ struct internal_sc cl_usc; /* internal upperlimit service curve */ struct runtime_sc cl_deadline; /* deadline curve */ struct runtime_sc cl_eligible; /* eligible curve */ struct runtime_sc cl_virtual; /* virtual curve */ struct runtime_sc cl_ulimit; /* upperlimit curve */ u8 cl_flags; /* which curves are valid */ u32 cl_vtperiod; /* vt period sequence number */ u32 cl_parentperiod;/* parent's vt period sequence number*/ u32 cl_nactive; /* number of active children */ }; struct hfsc_sched { u16 defcls; /* default class id */ struct hfsc_class root; /* root class */ struct Qdisc_class_hash clhash; /* class hash */ struct rb_root eligible; /* eligible tree */ struct qdisc_watchdog watchdog; /* watchdog timer */ }; #define HT_INFINITY 0xffffffffffffffffULL /* infinite time value */ static bool cl_in_el_or_vttree(struct hfsc_class *cl) { return ((cl->cl_flags & HFSC_FSC) && cl->cl_nactive) || ((cl->cl_flags & HFSC_RSC) && !RB_EMPTY_NODE(&cl->el_node)); } /* * eligible tree holds backlogged classes being sorted by their eligible times. * there is one eligible tree per hfsc instance. */ static void eltree_insert(struct hfsc_class *cl) { struct rb_node **p = &cl->sched->eligible.rb_node; struct rb_node *parent = NULL; struct hfsc_class *cl1; while (*p != NULL) { parent = *p; cl1 = rb_entry(parent, struct hfsc_class, el_node); if (cl->cl_e >= cl1->cl_e) p = &parent->rb_right; else p = &parent->rb_left; } rb_link_node(&cl->el_node, parent, p); rb_insert_color(&cl->el_node, &cl->sched->eligible); } static inline void eltree_remove(struct hfsc_class *cl) { if (!RB_EMPTY_NODE(&cl->el_node)) { rb_erase(&cl->el_node, &cl->sched->eligible); RB_CLEAR_NODE(&cl->el_node); } } static inline void eltree_update(struct hfsc_class *cl) { eltree_remove(cl); eltree_insert(cl); } /* find the class with the minimum deadline among the eligible classes */ static inline struct hfsc_class * eltree_get_mindl(struct hfsc_sched *q, u64 cur_time) { struct hfsc_class *p, *cl = NULL; struct rb_node *n; for (n = rb_first(&q->eligible); n != NULL; n = rb_next(n)) { p = rb_entry(n, struct hfsc_class, el_node); if (p->cl_e > cur_time) break; if (cl == NULL || p->cl_d < cl->cl_d) cl = p; } return cl; } /* find the class with minimum eligible time among the eligible classes */ static inline struct hfsc_class * eltree_get_minel(struct hfsc_sched *q) { struct rb_node *n; n = rb_first(&q->eligible); if (n == NULL) return NULL; return rb_entry(n, struct hfsc_class, el_node); } /* * vttree holds holds backlogged child classes being sorted by their virtual * time. each intermediate class has one vttree. */ static void vttree_insert(struct hfsc_class *cl) { struct rb_node **p = &cl->cl_parent->vt_tree.rb_node; struct rb_node *parent = NULL; struct hfsc_class *cl1; while (*p != NULL) { parent = *p; cl1 = rb_entry(parent, struct hfsc_class, vt_node); if (cl->cl_vt >= cl1->cl_vt) p = &parent->rb_right; else p = &parent->rb_left; } rb_link_node(&cl->vt_node, parent, p); rb_insert_color(&cl->vt_node, &cl->cl_parent->vt_tree); } static inline void vttree_remove(struct hfsc_class *cl) { rb_erase(&cl->vt_node, &cl->cl_parent->vt_tree); } static inline void vttree_update(struct hfsc_class *cl) { vttree_remove(cl); vttree_insert(cl); } static inline struct hfsc_class * vttree_firstfit(struct hfsc_class *cl, u64 cur_time) { struct hfsc_class *p; struct rb_node *n; for (n = rb_first(&cl->vt_tree); n != NULL; n = rb_next(n)) { p = rb_entry(n, struct hfsc_class, vt_node); if (p->cl_f <= cur_time) return p; } return NULL; } /* * get the leaf class with the minimum vt in the hierarchy */ static struct hfsc_class * vttree_get_minvt(struct hfsc_class *cl, u64 cur_time) { /* if root-class's cfmin is bigger than cur_time nothing to do */ if (cl->cl_cfmin > cur_time) return NULL; while (cl->level > 0) { cl = vttree_firstfit(cl, cur_time); if (cl == NULL) return NULL; /* * update parent's cl_cvtmin. */ if (cl->cl_parent->cl_cvtmin < cl->cl_vt) cl->cl_parent->cl_cvtmin = cl->cl_vt; } return cl; } static void cftree_insert(struct hfsc_class *cl) { struct rb_node **p = &cl->cl_parent->cf_tree.rb_node; struct rb_node *parent = NULL; struct hfsc_class *cl1; while (*p != NULL) { parent = *p; cl1 = rb_entry(parent, struct hfsc_class, cf_node); if (cl->cl_f >= cl1->cl_f) p = &parent->rb_right; else p = &parent->rb_left; } rb_link_node(&cl->cf_node, parent, p); rb_insert_color(&cl->cf_node, &cl->cl_parent->cf_tree); } static inline void cftree_remove(struct hfsc_class *cl) { rb_erase(&cl->cf_node, &cl->cl_parent->cf_tree); } static inline void cftree_update(struct hfsc_class *cl) { cftree_remove(cl); cftree_insert(cl); } /* * service curve support functions * * external service curve parameters * m: bps * d: us * internal service curve parameters * sm: (bytes/psched_us) << SM_SHIFT * ism: (psched_us/byte) << ISM_SHIFT * dx: psched_us * * The clock source resolution with ktime and PSCHED_SHIFT 10 is 1.024us. * * sm and ism are scaled in order to keep effective digits. * SM_SHIFT and ISM_SHIFT are selected to keep at least 4 effective * digits in decimal using the following table. * * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps * ------------+------------------------------------------------------- * bytes/1.024us 12.8e-3 128e-3 1280e-3 12800e-3 128000e-3 * * 1.024us/byte 78.125 7.8125 0.78125 0.078125 0.0078125 * * So, for PSCHED_SHIFT 10 we need: SM_SHIFT 20, ISM_SHIFT 18. */ #define SM_SHIFT (30 - PSCHED_SHIFT) #define ISM_SHIFT (8 + PSCHED_SHIFT) #define SM_MASK ((1ULL << SM_SHIFT) - 1) #define ISM_MASK ((1ULL << ISM_SHIFT) - 1) static inline u64 seg_x2y(u64 x, u64 sm) { u64 y; /* * compute * y = x * sm >> SM_SHIFT * but divide it for the upper and lower bits to avoid overflow */ y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT); return y; } static inline u64 seg_y2x(u64 y, u64 ism) { u64 x; if (y == 0) x = 0; else if (ism == HT_INFINITY) x = HT_INFINITY; else { x = (y >> ISM_SHIFT) * ism + (((y & ISM_MASK) * ism) >> ISM_SHIFT); } return x; } /* Convert m (bps) into sm (bytes/psched us) */ static u64 m2sm(u32 m) { u64 sm; sm = ((u64)m << SM_SHIFT); sm += PSCHED_TICKS_PER_SEC - 1; do_div(sm, PSCHED_TICKS_PER_SEC); return sm; } /* convert m (bps) into ism (psched us/byte) */ static u64 m2ism(u32 m) { u64 ism; if (m == 0) ism = HT_INFINITY; else { ism = ((u64)PSCHED_TICKS_PER_SEC << ISM_SHIFT); ism += m - 1; do_div(ism, m); } return ism; } /* convert d (us) into dx (psched us) */ static u64 d2dx(u32 d) { u64 dx; dx = ((u64)d * PSCHED_TICKS_PER_SEC); dx += USEC_PER_SEC - 1; do_div(dx, USEC_PER_SEC); return dx; } /* convert sm (bytes/psched us) into m (bps) */ static u32 sm2m(u64 sm) { u64 m; m = (sm * PSCHED_TICKS_PER_SEC) >> SM_SHIFT; return (u32)m; } /* convert dx (psched us) into d (us) */ static u32 dx2d(u64 dx) { u64 d; d = dx * USEC_PER_SEC; do_div(d, PSCHED_TICKS_PER_SEC); return (u32)d; } static void sc2isc(struct tc_service_curve *sc, struct internal_sc *isc) { isc->sm1 = m2sm(sc->m1); isc->ism1 = m2ism(sc->m1); isc->dx = d2dx(sc->d); isc->dy = seg_x2y(isc->dx, isc->sm1); isc->sm2 = m2sm(sc->m2); isc->ism2 = m2ism(sc->m2); } /* * initialize the runtime service curve with the given internal * service curve starting at (x, y). */ static void rtsc_init(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y) { rtsc->x = x; rtsc->y = y; rtsc->sm1 = isc->sm1; rtsc->ism1 = isc->ism1; rtsc->dx = isc->dx; rtsc->dy = isc->dy; rtsc->sm2 = isc->sm2; rtsc->ism2 = isc->ism2; } /* * calculate the y-projection of the runtime service curve by the * given x-projection value */ static u64 rtsc_y2x(struct runtime_sc *rtsc, u64 y) { u64 x; if (y < rtsc->y) x = rtsc->x; else if (y <= rtsc->y + rtsc->dy) { /* x belongs to the 1st segment */ if (rtsc->dy == 0) x = rtsc->x + rtsc->dx; else x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1); } else { /* x belongs to the 2nd segment */ x = rtsc->x + rtsc->dx + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2); } return x; } static u64 rtsc_x2y(struct runtime_sc *rtsc, u64 x) { u64 y; if (x <= rtsc->x) y = rtsc->y; else if (x <= rtsc->x + rtsc->dx) /* y belongs to the 1st segment */ y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1); else /* y belongs to the 2nd segment */ y = rtsc->y + rtsc->dy + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2); return y; } /* * update the runtime service curve by taking the minimum of the current * runtime service curve and the service curve starting at (x, y). */ static void rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y) { u64 y1, y2, dx, dy; u32 dsm; if (isc->sm1 <= isc->sm2) { /* service curve is convex */ y1 = rtsc_x2y(rtsc, x); if (y1 < y) /* the current rtsc is smaller */ return; rtsc->x = x; rtsc->y = y; return; } /* * service curve is concave * compute the two y values of the current rtsc * y1: at x * y2: at (x + dx) */ y1 = rtsc_x2y(rtsc, x); if (y1 <= y) { /* rtsc is below isc, no change to rtsc */ return; } y2 = rtsc_x2y(rtsc, x + isc->dx); if (y2 >= y + isc->dy) { /* rtsc is above isc, replace rtsc by isc */ rtsc->x = x; rtsc->y = y; rtsc->dx = isc->dx; rtsc->dy = isc->dy; return; } /* * the two curves intersect * compute the offsets (dx, dy) using the reverse * function of seg_x2y() * seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y) */ dx = (y1 - y) << SM_SHIFT; dsm = isc->sm1 - isc->sm2; do_div(dx, dsm); /* * check if (x, y1) belongs to the 1st segment of rtsc. * if so, add the offset. */ if (rtsc->x + rtsc->dx > x) dx += rtsc->x + rtsc->dx - x; dy = seg_x2y(dx, isc->sm1); rtsc->x = x; rtsc->y = y; rtsc->dx = dx; rtsc->dy = dy; } static void init_ed(struct hfsc_class *cl, unsigned int next_len) { u64 cur_time = psched_get_time(); /* update the deadline curve */ rtsc_min(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul); /* * update the eligible curve. * for concave, it is equal to the deadline curve. * for convex, it is a linear curve with slope m2. */ cl->cl_eligible = cl->cl_deadline; if (cl->cl_rsc.sm1 <= cl->cl_rsc.sm2) { cl->cl_eligible.dx = 0; cl->cl_eligible.dy = 0; } /* compute e and d */ cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul); cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); eltree_insert(cl); } static void update_ed(struct hfsc_class *cl, unsigned int next_len) { cl->cl_e = rtsc_y2x(&cl->cl_eligible, cl->cl_cumul); cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); eltree_update(cl); } static inline void update_d(struct hfsc_class *cl, unsigned int next_len) { cl->cl_d = rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len); } static inline void update_cfmin(struct hfsc_class *cl) { struct rb_node *n = rb_first(&cl->cf_tree); struct hfsc_class *p; if (n == NULL) { cl->cl_cfmin = 0; return; } p = rb_entry(n, struct hfsc_class, cf_node); cl->cl_cfmin = p->cl_f; } static void init_vf(struct hfsc_class *cl, unsigned int len) { struct hfsc_class *max_cl; struct rb_node *n; u64 vt, f, cur_time; int go_active; cur_time = 0; go_active = 1; for (; cl->cl_parent != NULL; cl = cl->cl_parent) { if (go_active && cl->cl_nactive++ == 0) go_active = 1; else go_active = 0; if (go_active) { n = rb_last(&cl->cl_parent->vt_tree); if (n != NULL) { max_cl = rb_entry(n, struct hfsc_class, vt_node); /* * set vt to the average of the min and max * classes. if the parent's period didn't * change, don't decrease vt of the class. */ vt = max_cl->cl_vt; if (cl->cl_parent->cl_cvtmin != 0) vt = (cl->cl_parent->cl_cvtmin + vt)/2; if (cl->cl_parent->cl_vtperiod != cl->cl_parentperiod || vt > cl->cl_vt) cl->cl_vt = vt; } else { /* * first child for a new parent backlog period. * initialize cl_vt to the highest value seen * among the siblings. this is analogous to * what cur_time would provide in realtime case. */ cl->cl_vt = cl->cl_parent->cl_cvtoff; cl->cl_parent->cl_cvtmin = 0; } /* update the virtual curve */ rtsc_min(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total); cl->cl_vtadj = 0; cl->cl_vtperiod++; /* increment vt period */ cl->cl_parentperiod = cl->cl_parent->cl_vtperiod; if (cl->cl_parent->cl_nactive == 0) cl->cl_parentperiod++; cl->cl_f = 0; vttree_insert(cl); cftree_insert(cl); if (cl->cl_flags & HFSC_USC) { /* class has upper limit curve */ if (cur_time == 0) cur_time = psched_get_time(); /* update the ulimit curve */ rtsc_min(&cl->cl_ulimit, &cl->cl_usc, cur_time, cl->cl_total); /* compute myf */ cl->cl_myf = rtsc_y2x(&cl->cl_ulimit, cl->cl_total); } } f = max(cl->cl_myf, cl->cl_cfmin); if (f != cl->cl_f) { cl->cl_f = f; cftree_update(cl); } update_cfmin(cl->cl_parent); } } static void update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) { u64 f; /* , myf_bound, delta; */ int go_passive = 0; if (cl->qdisc->q.qlen == 0 && cl->cl_flags & HFSC_FSC) go_passive = 1; for (; cl->cl_parent != NULL; cl = cl->cl_parent) { cl->cl_total += len; if (!(cl->cl_flags & HFSC_FSC) || cl->cl_nactive == 0) continue; if (go_passive && --cl->cl_nactive == 0) go_passive = 1; else go_passive = 0; /* update vt */ cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) + cl->cl_vtadj; /* * if vt of the class is smaller than cvtmin, * the class was skipped in the past due to non-fit. * if so, we need to adjust vtadj. */ if (cl->cl_vt < cl->cl_parent->cl_cvtmin) { cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt; cl->cl_vt = cl->cl_parent->cl_cvtmin; } if (go_passive) { /* no more active child, going passive */ /* update cvtoff of the parent class */ if (cl->cl_vt > cl->cl_parent->cl_cvtoff) cl->cl_parent->cl_cvtoff = cl->cl_vt; /* remove this class from the vt tree */ vttree_remove(cl); cftree_remove(cl); update_cfmin(cl->cl_parent); continue; } /* update the vt tree */ vttree_update(cl); /* update f */ if (cl->cl_flags & HFSC_USC) { cl->cl_myf = rtsc_y2x(&cl->cl_ulimit, cl->cl_total); #if 0 cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit, cl->cl_total); /* * This code causes classes to stay way under their * limit when multiple classes are used at gigabit * speed. needs investigation. -kaber */ /* * if myf lags behind by more than one clock tick * from the current time, adjust myfadj to prevent * a rate-limited class from going greedy. * in a steady state under rate-limiting, myf * fluctuates within one clock tick. */ myf_bound = cur_time - PSCHED_JIFFIE2US(1); if (cl->cl_myf < myf_bound) { delta = cur_time - cl->cl_myf; cl->cl_myfadj += delta; cl->cl_myf += delta; } #endif } f = max(cl->cl_myf, cl->cl_cfmin); if (f != cl->cl_f) { cl->cl_f = f; cftree_update(cl); update_cfmin(cl->cl_parent); } } } static void hfsc_adjust_levels(struct hfsc_class *cl) { struct hfsc_class *p; unsigned int level; do { level = 0; list_for_each_entry(p, &cl->children, siblings) { if (p->level >= level) level = p->level + 1; } cl->level = level; } while ((cl = cl->cl_parent) != NULL); } static inline struct hfsc_class * hfsc_find_class(u32 classid, struct Qdisc *sch) { struct hfsc_sched *q = qdisc_priv(sch); struct Qdisc_class_common *clc; clc = qdisc_class_find(&q->clhash, classid); if (clc == NULL) return NULL; return container_of(clc, struct hfsc_class, cl_common); } static void hfsc_change_rsc(struct hfsc_class *cl, struct tc_service_curve *rsc, u64 cur_time) { sc2isc(rsc, &cl->cl_rsc); rtsc_init(&cl->cl_deadline, &cl->cl_rsc, cur_time, cl->cl_cumul); cl->cl_eligible = cl->cl_deadline; if (cl->cl_rsc.sm1 <= cl->cl_rsc.sm2) { cl->cl_eligible.dx = 0; cl->cl_eligible.dy = 0; } cl->cl_flags |= HFSC_RSC; } static void hfsc_change_fsc(struct hfsc_class *cl, struct tc_service_curve *fsc) { sc2isc(fsc, &cl->cl_fsc); rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total); cl->cl_flags |= HFSC_FSC; } static void hfsc_change_usc(struct hfsc_class *cl, struct tc_service_curve *usc, u64 cur_time) { sc2isc(usc, &cl->cl_usc); rtsc_init(&cl->cl_ulimit, &cl->cl_usc, cur_time, cl->cl_total); cl->cl_flags |= HFSC_USC; } static void hfsc_upgrade_rt(struct hfsc_class *cl) { cl->cl_fsc = cl->cl_rsc; rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total); cl->cl_flags |= HFSC_FSC; } static const struct nla_policy hfsc_policy[TCA_HFSC_MAX + 1] = { [TCA_HFSC_RSC] = { .len = sizeof(struct tc_service_curve) }, [TCA_HFSC_FSC] = { .len = sizeof(struct tc_service_curve) }, [TCA_HFSC_USC] = { .len = sizeof(struct tc_service_curve) }, }; static int hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca, unsigned long *arg, struct netlink_ext_ack *extack) { struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *cl = (struct hfsc_class *)*arg; struct hfsc_class *parent = NULL; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_HFSC_MAX + 1]; struct tc_service_curve *rsc = NULL, *fsc = NULL, *usc = NULL; u64 cur_time; int err; if (opt == NULL) return -EINVAL; err = nla_parse_nested_deprecated(tb, TCA_HFSC_MAX, opt, hfsc_policy, NULL); if (err < 0) return err; if (tb[TCA_HFSC_RSC]) { rsc = nla_data(tb[TCA_HFSC_RSC]); if (rsc->m1 == 0 && rsc->m2 == 0) rsc = NULL; } if (tb[TCA_HFSC_FSC]) { fsc = nla_data(tb[TCA_HFSC_FSC]); if (fsc->m1 == 0 && fsc->m2 == 0) fsc = NULL; } if (tb[TCA_HFSC_USC]) { usc = nla_data(tb[TCA_HFSC_USC]); if (usc->m1 == 0 && usc->m2 == 0) usc = NULL; } if (cl != NULL) { int old_flags; int len = 0; if (parentid) { if (cl->cl_parent && cl->cl_parent->cl_common.classid != parentid) return -EINVAL; if (cl->cl_parent == NULL && parentid != TC_H_ROOT) return -EINVAL; } cur_time = psched_get_time(); if (tca[TCA_RATE]) { err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, true, tca[TCA_RATE]); if (err) return err; } sch_tree_lock(sch); old_flags = cl->cl_flags; if (rsc != NULL) hfsc_change_rsc(cl, rsc, cur_time); if (fsc != NULL) hfsc_change_fsc(cl, fsc); if (usc != NULL) hfsc_change_usc(cl, usc, cur_time); if (cl->qdisc->q.qlen != 0) len = qdisc_peek_len(cl->qdisc); /* Check queue length again since some qdisc implementations * (e.g., netem/codel) might empty the queue during the peek * operation. */ if (cl->qdisc->q.qlen != 0) { if (cl->cl_flags & HFSC_RSC) { if (old_flags & HFSC_RSC) update_ed(cl, len); else init_ed(cl, len); } if (cl->cl_flags & HFSC_FSC) { if (old_flags & HFSC_FSC) update_vf(cl, 0, cur_time); else init_vf(cl, len); } } sch_tree_unlock(sch); return 0; } if (parentid == TC_H_ROOT) return -EEXIST; parent = &q->root; if (parentid) { parent = hfsc_find_class(parentid, sch); if (parent == NULL) return -ENOENT; } if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0) return -EINVAL; if (hfsc_find_class(classid, sch)) return -EEXIST; if (rsc == NULL && fsc == NULL) return -EINVAL; cl = kzalloc(sizeof(struct hfsc_class), GFP_KERNEL); if (cl == NULL) return -ENOBUFS; RB_CLEAR_NODE(&cl->el_node); err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack); if (err) { kfree(cl); return err; } if (tca[TCA_RATE]) { err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est, NULL, true, tca[TCA_RATE]); if (err) { tcf_block_put(cl->block); kfree(cl); return err; } } if (rsc != NULL) hfsc_change_rsc(cl, rsc, 0); if (fsc != NULL) hfsc_change_fsc(cl, fsc); if (usc != NULL) hfsc_change_usc(cl, usc, 0); cl->cl_common.classid = classid; cl->sched = q; cl->cl_parent = parent; cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid, NULL); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; else qdisc_hash_add(cl->qdisc, true); INIT_LIST_HEAD(&cl->children); cl->vt_tree = RB_ROOT; cl->cf_tree = RB_ROOT; sch_tree_lock(sch); /* Check if the inner class is a misconfigured 'rt' */ if (!(parent->cl_flags & HFSC_FSC) && parent != &q->root) { NL_SET_ERR_MSG(extack, "Forced curve change on parent 'rt' to 'sc'"); hfsc_upgrade_rt(parent); } qdisc_class_hash_insert(&q->clhash, &cl->cl_common); list_add_tail(&cl->siblings, &parent->children); if (parent->level == 0) qdisc_purge_queue(parent->qdisc); hfsc_adjust_levels(parent); sch_tree_unlock(sch); qdisc_class_hash_grow(sch, &q->clhash); *arg = (unsigned long)cl; return 0; } static void hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) { struct hfsc_sched *q = qdisc_priv(sch); tcf_block_put(cl->block); qdisc_put(cl->qdisc); gen_kill_estimator(&cl->rate_est); if (cl != &q->root) kfree(cl); } static int hfsc_delete_class(struct Qdisc *sch, unsigned long arg, struct netlink_ext_ack *extack) { struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *cl = (struct hfsc_class *)arg; if (cl->level > 0 || qdisc_class_in_use(&cl->cl_common) || cl == &q->root) { NL_SET_ERR_MSG(extack, "HFSC class in use"); return -EBUSY; } sch_tree_lock(sch); list_del(&cl->siblings); hfsc_adjust_levels(cl->cl_parent); qdisc_purge_queue(cl->qdisc); qdisc_class_hash_remove(&q->clhash, &cl->cl_common); sch_tree_unlock(sch); hfsc_destroy_class(sch, cl); return 0; } static struct hfsc_class * hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) { struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *head, *cl; struct tcf_result res; struct tcf_proto *tcf; int result; if (TC_H_MAJ(skb->priority ^ sch->handle) == 0 && (cl = hfsc_find_class(skb->priority, sch)) != NULL) if (cl->level == 0) return cl; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; head = &q->root; tcf = rcu_dereference_bh(q->root.filter_list); while (tcf && (result = tcf_classify(skb, NULL, tcf, &res, false)) >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: case TC_ACT_STOLEN: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; fallthrough; case TC_ACT_SHOT: return NULL; } #endif cl = (struct hfsc_class *)res.class; if (!cl) { cl = hfsc_find_class(res.classid, sch); if (!cl) break; /* filter selected invalid classid */ if (cl->level >= head->level) break; /* filter may only point downwards */ } if (cl->level == 0) return cl; /* hit leaf class */ /* apply inner filter chain */ tcf = rcu_dereference_bh(cl->filter_list); head = cl; } /* classification failed, try default class */ cl = hfsc_find_class(TC_H_MAKE(TC_H_MAJ(sch->handle), READ_ONCE(q->defcls)), sch); if (cl == NULL || cl->level > 0) return NULL; return cl; } static int hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct Qdisc **old, struct netlink_ext_ack *extack) { struct hfsc_class *cl = (struct hfsc_class *)arg; if (cl->level > 0) return -EINVAL; if (new == NULL) { new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, cl->cl_common.classid, NULL); if (new == NULL) new = &noop_qdisc; } *old = qdisc_replace(sch, new, &cl->qdisc); return 0; } static struct Qdisc * hfsc_class_leaf(struct Qdisc *sch, unsigned long arg) { struct hfsc_class *cl = (struct hfsc_class *)arg; if (cl->level == 0) return cl->qdisc; return NULL; } static void hfsc_qlen_notify(struct Qdisc *sch, unsigned long arg) { struct hfsc_class *cl = (struct hfsc_class *)arg; /* vttree is now handled in update_vf() so that update_vf(cl, 0, 0) * needs to be called explicitly to remove a class from vttree. */ if (cl->cl_nactive) update_vf(cl, 0, 0); if (cl->cl_flags & HFSC_RSC) eltree_remove(cl); } static unsigned long hfsc_search_class(struct Qdisc *sch, u32 classid) { return (unsigned long)hfsc_find_class(classid, sch); } static unsigned long hfsc_bind_tcf(struct Qdisc *sch, unsigned long parent, u32 classid) { struct hfsc_class *p = (struct hfsc_class *)parent; struct hfsc_class *cl = hfsc_find_class(classid, sch); if (cl != NULL) { if (p != NULL && p->level <= cl->level) return 0; qdisc_class_get(&cl->cl_common); } return (unsigned long)cl; } static void hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg) { struct hfsc_class *cl = (struct hfsc_class *)arg; qdisc_class_put(&cl->cl_common); } static struct tcf_block *hfsc_tcf_block(struct Qdisc *sch, unsigned long arg, struct netlink_ext_ack *extack) { struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *cl = (struct hfsc_class *)arg; if (cl == NULL) cl = &q->root; return cl->block; } static int hfsc_dump_sc(struct sk_buff *skb, int attr, struct internal_sc *sc) { struct tc_service_curve tsc; tsc.m1 = sm2m(sc->sm1); tsc.d = dx2d(sc->dx); tsc.m2 = sm2m(sc->sm2); if (nla_put(skb, attr, sizeof(tsc), &tsc)) goto nla_put_failure; return skb->len; nla_put_failure: return -1; } static int hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl) { if ((cl->cl_flags & HFSC_RSC) && (hfsc_dump_sc(skb, TCA_HFSC_RSC, &cl->cl_rsc) < 0)) goto nla_put_failure; if ((cl->cl_flags & HFSC_FSC) && (hfsc_dump_sc(skb, TCA_HFSC_FSC, &cl->cl_fsc) < 0)) goto nla_put_failure; if ((cl->cl_flags & HFSC_USC) && (hfsc_dump_sc(skb, TCA_HFSC_USC, &cl->cl_usc) < 0)) goto nla_put_failure; return skb->len; nla_put_failure: return -1; } static int hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, struct tcmsg *tcm) { struct hfsc_class *cl = (struct hfsc_class *)arg; struct nlattr *nest; tcm->tcm_parent = cl->cl_parent ? cl->cl_parent->cl_common.classid : TC_H_ROOT; tcm->tcm_handle = cl->cl_common.classid; if (cl->level == 0) tcm->tcm_info = cl->qdisc->handle; nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (hfsc_dump_curves(skb, cl) < 0) goto nla_put_failure; return nla_nest_end(skb, nest); nla_put_failure: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static int hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) { struct hfsc_class *cl = (struct hfsc_class *)arg; struct tc_hfsc_stats xstats; __u32 qlen; qdisc_qstats_qlen_backlog(cl->qdisc, &qlen, &cl->qstats.backlog); xstats.level = cl->level; xstats.period = cl->cl_vtperiod; xstats.work = cl->cl_total; xstats.rtwork = cl->cl_cumul; if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 || gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0) return -1; return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); } static void hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg) { struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *cl; unsigned int i; if (arg->stop) return; for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) { if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg)) return; } } } static void hfsc_schedule_watchdog(struct Qdisc *sch) { struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *cl; u64 next_time = 0; cl = eltree_get_minel(q); if (cl) next_time = cl->cl_e; if (q->root.cl_cfmin != 0) { if (next_time == 0 || next_time > q->root.cl_cfmin) next_time = q->root.cl_cfmin; } if (next_time) qdisc_watchdog_schedule(&q->watchdog, next_time); } static int hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct hfsc_sched *q = qdisc_priv(sch); struct tc_hfsc_qopt *qopt; int err; qdisc_watchdog_init(&q->watchdog, sch); if (!opt || nla_len(opt) < sizeof(*qopt)) return -EINVAL; qopt = nla_data(opt); q->defcls = qopt->defcls; err = qdisc_class_hash_init(&q->clhash); if (err < 0) return err; q->eligible = RB_ROOT; err = tcf_block_get(&q->root.block, &q->root.filter_list, sch, extack); if (err) return err; gnet_stats_basic_sync_init(&q->root.bstats); q->root.cl_common.classid = sch->handle; q->root.sched = q; q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle, NULL); if (q->root.qdisc == NULL) q->root.qdisc = &noop_qdisc; else qdisc_hash_add(q->root.qdisc, true); INIT_LIST_HEAD(&q->root.children); q->root.vt_tree = RB_ROOT; q->root.cf_tree = RB_ROOT; qdisc_class_hash_insert(&q->clhash, &q->root.cl_common); qdisc_class_hash_grow(sch, &q->clhash); return 0; } static int hfsc_change_qdisc(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct hfsc_sched *q = qdisc_priv(sch); struct tc_hfsc_qopt *qopt; if (nla_len(opt) < sizeof(*qopt)) return -EINVAL; qopt = nla_data(opt); WRITE_ONCE(q->defcls, qopt->defcls); return 0; } static void hfsc_reset_class(struct hfsc_class *cl) { cl->cl_total = 0; cl->cl_cumul = 0; cl->cl_d = 0; cl->cl_e = 0; cl->cl_vt = 0; cl->cl_vtadj = 0; cl->cl_cvtmin = 0; cl->cl_cvtoff = 0; cl->cl_vtperiod = 0; cl->cl_parentperiod = 0; cl->cl_f = 0; cl->cl_myf = 0; cl->cl_cfmin = 0; cl->cl_nactive = 0; cl->vt_tree = RB_ROOT; cl->cf_tree = RB_ROOT; qdisc_reset(cl->qdisc); if (cl->cl_flags & HFSC_RSC) rtsc_init(&cl->cl_deadline, &cl->cl_rsc, 0, 0); if (cl->cl_flags & HFSC_FSC) rtsc_init(&cl->cl_virtual, &cl->cl_fsc, 0, 0); if (cl->cl_flags & HFSC_USC) rtsc_init(&cl->cl_ulimit, &cl->cl_usc, 0, 0); } static void hfsc_reset_qdisc(struct Qdisc *sch) { struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *cl; unsigned int i; for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) hfsc_reset_class(cl); } q->eligible = RB_ROOT; qdisc_watchdog_cancel(&q->watchdog); } static void hfsc_destroy_qdisc(struct Qdisc *sch) { struct hfsc_sched *q = qdisc_priv(sch); struct hlist_node *next; struct hfsc_class *cl; unsigned int i; for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) { tcf_block_put(cl->block); cl->block = NULL; } } for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], cl_common.hnode) hfsc_destroy_class(sch, cl); } qdisc_class_hash_destroy(&q->clhash); qdisc_watchdog_cancel(&q->watchdog); } static int hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) { struct hfsc_sched *q = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); struct tc_hfsc_qopt qopt; qopt.defcls = READ_ONCE(q->defcls); if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt)) goto nla_put_failure; return skb->len; nla_put_failure: nlmsg_trim(skb, b); return -1; } static int hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb); struct hfsc_class *cl; int err; bool first; cl = hfsc_classify(skb, sch, &err); if (cl == NULL) { if (err & __NET_XMIT_BYPASS) qdisc_qstats_drop(sch); __qdisc_drop(skb, to_free); return err; } first = !cl->qdisc->q.qlen; err = qdisc_enqueue(skb, cl->qdisc, to_free); if (unlikely(err != NET_XMIT_SUCCESS)) { if (net_xmit_drop_count(err)) { cl->qstats.drops++; qdisc_qstats_drop(sch); } return err; } sch->qstats.backlog += len; sch->q.qlen++; if (first && !cl_in_el_or_vttree(cl)) { if (cl->cl_flags & HFSC_RSC) init_ed(cl, len); if (cl->cl_flags & HFSC_FSC) init_vf(cl, len); /* * If this is the first packet, isolate the head so an eventual * head drop before the first dequeue operation has no chance * to invalidate the deadline. */ if (cl->cl_flags & HFSC_RSC) cl->qdisc->ops->peek(cl->qdisc); } return NET_XMIT_SUCCESS; } static struct sk_buff * hfsc_dequeue(struct Qdisc *sch) { struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *cl; struct sk_buff *skb; u64 cur_time; unsigned int next_len; int realtime = 0; if (sch->q.qlen == 0) return NULL; cur_time = psched_get_time(); /* * if there are eligible classes, use real-time criteria. * find the class with the minimum deadline among * the eligible classes. */ cl = eltree_get_mindl(q, cur_time); if (cl) { realtime = 1; } else { /* * use link-sharing criteria * get the class with the minimum vt in the hierarchy */ cl = vttree_get_minvt(&q->root, cur_time); if (cl == NULL) { qdisc_qstats_overlimit(sch); hfsc_schedule_watchdog(sch); return NULL; } } skb = qdisc_dequeue_peeked(cl->qdisc); if (skb == NULL) { qdisc_warn_nonwc("HFSC", cl->qdisc); return NULL; } bstats_update(&cl->bstats, skb); update_vf(cl, qdisc_pkt_len(skb), cur_time); if (realtime) cl->cl_cumul += qdisc_pkt_len(skb); if (cl->cl_flags & HFSC_RSC) { if (cl->qdisc->q.qlen != 0) { /* update ed */ next_len = qdisc_peek_len(cl->qdisc); /* Check queue length again since some qdisc implementations * (e.g., netem/codel) might empty the queue during the peek * operation. */ if (cl->qdisc->q.qlen != 0) { if (realtime) update_ed(cl, next_len); else update_d(cl, next_len); } } else { /* the class becomes passive */ eltree_remove(cl); } } qdisc_bstats_update(sch, skb); qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; return skb; } static const struct Qdisc_class_ops hfsc_class_ops = { .change = hfsc_change_class, .delete = hfsc_delete_class, .graft = hfsc_graft_class, .leaf = hfsc_class_leaf, .qlen_notify = hfsc_qlen_notify, .find = hfsc_search_class, .bind_tcf = hfsc_bind_tcf, .unbind_tcf = hfsc_unbind_tcf, .tcf_block = hfsc_tcf_block, .dump = hfsc_dump_class, .dump_stats = hfsc_dump_class_stats, .walk = hfsc_walk }; static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = { .id = "hfsc", .init = hfsc_init_qdisc, .change = hfsc_change_qdisc, .reset = hfsc_reset_qdisc, .destroy = hfsc_destroy_qdisc, .dump = hfsc_dump_qdisc, .enqueue = hfsc_enqueue, .dequeue = hfsc_dequeue, .peek = qdisc_peek_dequeued, .cl_ops = &hfsc_class_ops, .priv_size = sizeof(struct hfsc_sched), .owner = THIS_MODULE }; MODULE_ALIAS_NET_SCH("hfsc"); static int __init hfsc_init(void) { return register_qdisc(&hfsc_qdisc_ops); } static void __exit hfsc_cleanup(void) { unregister_qdisc(&hfsc_qdisc_ops); } MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Hierarchical Fair Service Curve scheduler"); module_init(hfsc_init); module_exit(hfsc_cleanup); |
| 61 86 91 78 98 15 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * vivid-core.h - core datastructures * * Copyright 2014 Cisco Systems, Inc. and/or its affiliates. All rights reserved. */ #ifndef _VIVID_CORE_H_ #define _VIVID_CORE_H_ #include <linux/fb.h> #include <linux/workqueue.h> #include <media/cec.h> #include <media/videobuf2-v4l2.h> #include <media/v4l2-device.h> #include <media/v4l2-dev.h> #include <media/v4l2-ctrls.h> #include <media/tpg/v4l2-tpg.h> #include "vivid-rds-gen.h" #include "vivid-vbi-gen.h" #define dprintk(dev, level, fmt, arg...) \ v4l2_dbg(level, vivid_debug, &dev->v4l2_dev, fmt, ## arg) /* The maximum number of inputs */ #define MAX_INPUTS 16 /* The maximum number of outputs */ #define MAX_OUTPUTS 16 /* The maximum number of video capture buffers */ #define MAX_VID_CAP_BUFFERS 64 /* The maximum up or down scaling factor is 4 */ #define MAX_ZOOM 4 /* The maximum image width/height are set to 4K DMT */ #define MAX_WIDTH 4096 #define MAX_HEIGHT 2160 /* The minimum image width/height */ #define MIN_WIDTH 16 #define MIN_HEIGHT MIN_WIDTH /* Pixel Array control divider */ #define PIXEL_ARRAY_DIV MIN_WIDTH /* The data_offset of plane 0 for the multiplanar formats */ #define PLANE0_DATA_OFFSET 128 /* The supported TV frequency range in MHz */ #define MIN_TV_FREQ (44U * 16U) #define MAX_TV_FREQ (958U * 16U) /* The number of samples returned in every SDR buffer */ #define SDR_CAP_SAMPLES_PER_BUF 0x4000 /* used by the threads to know when to resync internal counters */ #define JIFFIES_PER_DAY (3600U * 24U * HZ) #define JIFFIES_RESYNC (JIFFIES_PER_DAY * (0xf0000000U / JIFFIES_PER_DAY)) /* * Maximum number of HDMI inputs allowed by vivid, due to limitations * of the Physical Address in the EDID and used by CEC we stop at 15 * inputs and outputs. */ #define MAX_HDMI_INPUTS 15 #define MAX_HDMI_OUTPUTS 15 /* Maximum number of S-Video inputs allowed by vivid */ #define MAX_SVID_INPUTS 16 /* The maximum number of items in a menu control */ #define MAX_MENU_ITEMS BITS_PER_LONG_LONG /* Number of fixed menu items in the 'Connected To' menu controls */ #define FIXED_MENU_ITEMS 2 /* The maximum number of vivid devices */ #define VIVID_MAX_DEVS CONFIG_VIDEO_VIVID_MAX_DEVS extern const struct v4l2_rect vivid_min_rect; extern const struct v4l2_rect vivid_max_rect; extern unsigned vivid_debug; /* * NULL-terminated string array for the HDMI 'Connected To' menu controls * with the list of possible HDMI outputs. * * The first two items are fixed ("TPG" and "None"). */ extern char *vivid_ctrl_hdmi_to_output_strings[1 + MAX_MENU_ITEMS]; /* Menu control skip mask of all HDMI outputs that are in use */ extern u64 hdmi_to_output_menu_skip_mask; /* * Bitmask of which vivid instances need to update any connected * HDMI outputs. */ extern u64 hdmi_input_update_outputs_mask; /* * Spinlock for access to hdmi_to_output_menu_skip_mask and * hdmi_input_update_outputs_mask. */ extern spinlock_t hdmi_output_skip_mask_lock; /* * Workqueue that updates the menu controls whenever the HDMI menu skip mask * changes. */ extern struct workqueue_struct *update_hdmi_ctrls_workqueue; /* * The HDMI menu control value (index in the menu list) maps to an HDMI * output that is part of the given vivid_dev instance and has the given * output index (as returned by VIDIOC_G_OUTPUT). * * NULL/0 if not available. */ extern struct vivid_dev *vivid_ctrl_hdmi_to_output_instance[MAX_MENU_ITEMS]; extern unsigned int vivid_ctrl_hdmi_to_output_index[MAX_MENU_ITEMS]; /* * NULL-terminated string array for the S-Video 'Connected To' menu controls * with the list of possible S-Video outputs. * * The first two items are fixed ("TPG" and "None"). */ extern char *vivid_ctrl_svid_to_output_strings[1 + MAX_MENU_ITEMS]; /* Menu control skip mask of all S-Video outputs that are in use */ extern u64 svid_to_output_menu_skip_mask; /* Spinlock for access to svid_to_output_menu_skip_mask */ extern spinlock_t svid_output_skip_mask_lock; /* * Workqueue that updates the menu controls whenever the S-Video menu skip mask * changes. */ extern struct workqueue_struct *update_svid_ctrls_workqueue; /* * The S-Video menu control value (index in the menu list) maps to an S-Video * output that is part of the given vivid_dev instance and has the given * output index (as returned by VIDIOC_G_OUTPUT). * * NULL/0 if not available. */ extern struct vivid_dev *vivid_ctrl_svid_to_output_instance[MAX_MENU_ITEMS]; extern unsigned int vivid_ctrl_svid_to_output_index[MAX_MENU_ITEMS]; extern struct vivid_dev *vivid_devs[VIVID_MAX_DEVS]; extern unsigned int n_devs; struct vivid_fmt { u32 fourcc; /* v4l2 format id */ enum tgp_color_enc color_enc; bool can_do_overlay; u8 vdownsampling[TPG_MAX_PLANES]; u32 alpha_mask; u8 planes; u8 buffers; u32 data_offset[TPG_MAX_PLANES]; u32 bit_depth[TPG_MAX_PLANES]; }; extern struct vivid_fmt vivid_formats[]; /* buffer for one video frame */ struct vivid_buffer { /* common v4l buffer stuff -- must be first */ struct vb2_v4l2_buffer vb; struct list_head list; }; enum vivid_input { WEBCAM, TV, SVID, HDMI, }; enum vivid_signal_mode { CURRENT_DV_TIMINGS, CURRENT_STD = CURRENT_DV_TIMINGS, NO_SIGNAL, NO_LOCK, OUT_OF_RANGE, SELECTED_DV_TIMINGS, SELECTED_STD = SELECTED_DV_TIMINGS, CYCLE_DV_TIMINGS, CYCLE_STD = CYCLE_DV_TIMINGS, CUSTOM_DV_TIMINGS, }; enum vivid_colorspace { VIVID_CS_170M, VIVID_CS_709, VIVID_CS_SRGB, VIVID_CS_OPRGB, VIVID_CS_2020, VIVID_CS_DCI_P3, VIVID_CS_240M, VIVID_CS_SYS_M, VIVID_CS_SYS_BG, }; #define VIVID_INVALID_SIGNAL(mode) \ ((mode) == NO_SIGNAL || (mode) == NO_LOCK || (mode) == OUT_OF_RANGE) struct vivid_cec_xfer { struct cec_adapter *adap; u8 msg[CEC_MAX_MSG_SIZE]; u32 len; u32 sft; }; struct vivid_dev { u8 inst; struct v4l2_device v4l2_dev; #ifdef CONFIG_MEDIA_CONTROLLER struct media_device mdev; struct media_pad vid_cap_pad; struct media_pad vid_out_pad; struct media_pad vbi_cap_pad; struct media_pad vbi_out_pad; struct media_pad sdr_cap_pad; struct media_pad meta_cap_pad; struct media_pad meta_out_pad; struct media_pad touch_cap_pad; #endif struct v4l2_ctrl_handler ctrl_hdl_user_gen; struct v4l2_ctrl_handler ctrl_hdl_user_vid; struct v4l2_ctrl_handler ctrl_hdl_user_aud; struct v4l2_ctrl_handler ctrl_hdl_streaming; struct v4l2_ctrl_handler ctrl_hdl_sdtv_cap; struct v4l2_ctrl_handler ctrl_hdl_loop_cap; struct v4l2_ctrl_handler ctrl_hdl_fb; struct video_device vid_cap_dev; struct v4l2_ctrl_handler ctrl_hdl_vid_cap; struct video_device vid_out_dev; struct v4l2_ctrl_handler ctrl_hdl_vid_out; struct video_device vbi_cap_dev; struct v4l2_ctrl_handler ctrl_hdl_vbi_cap; struct video_device vbi_out_dev; struct v4l2_ctrl_handler ctrl_hdl_vbi_out; struct video_device radio_rx_dev; struct v4l2_ctrl_handler ctrl_hdl_radio_rx; struct video_device radio_tx_dev; struct v4l2_ctrl_handler ctrl_hdl_radio_tx; struct video_device sdr_cap_dev; struct v4l2_ctrl_handler ctrl_hdl_sdr_cap; struct video_device meta_cap_dev; struct v4l2_ctrl_handler ctrl_hdl_meta_cap; struct video_device meta_out_dev; struct v4l2_ctrl_handler ctrl_hdl_meta_out; struct video_device touch_cap_dev; struct v4l2_ctrl_handler ctrl_hdl_touch_cap; spinlock_t slock; struct mutex mutex; struct work_struct update_hdmi_ctrl_work; struct work_struct update_svid_ctrl_work; /* capabilities */ u32 vid_cap_caps; u32 vid_out_caps; u32 vbi_cap_caps; u32 vbi_out_caps; u32 sdr_cap_caps; u32 radio_rx_caps; u32 radio_tx_caps; u32 meta_cap_caps; u32 meta_out_caps; u32 touch_cap_caps; /* supported features */ bool multiplanar; u8 num_inputs; u8 num_hdmi_inputs; u8 num_svid_inputs; u8 input_type[MAX_INPUTS]; u8 input_name_counter[MAX_INPUTS]; u8 num_outputs; u8 num_hdmi_outputs; u8 output_type[MAX_OUTPUTS]; u8 output_name_counter[MAX_OUTPUTS]; bool has_audio_inputs; bool has_audio_outputs; bool has_vid_cap; bool has_vid_out; bool has_vbi_cap; bool has_raw_vbi_cap; bool has_sliced_vbi_cap; bool has_vbi_out; bool has_raw_vbi_out; bool has_sliced_vbi_out; bool has_radio_rx; bool has_radio_tx; bool has_sdr_cap; bool has_fb; bool has_meta_cap; bool has_meta_out; bool has_tv_tuner; bool has_touch_cap; /* Output index (0-MAX_OUTPUTS) to vivid instance of connected input */ struct vivid_dev *output_to_input_instance[MAX_OUTPUTS]; /* Output index (0-MAX_OUTPUTS) to input index (0-MAX_INPUTS) of connected input */ u8 output_to_input_index[MAX_OUTPUTS]; /* Output index (0-MAX_OUTPUTS) to HDMI or S-Video output index (0-MAX_HDMI/SVID_OUTPUTS) */ u8 output_to_iface_index[MAX_OUTPUTS]; /* ctrl_hdmi_to_output or ctrl_svid_to_output control value for each input */ s32 input_is_connected_to_output[MAX_INPUTS]; /* HDMI index (0-MAX_HDMI_OUTPUTS) to output index (0-MAX_OUTPUTS) */ u8 hdmi_index_to_output_index[MAX_HDMI_OUTPUTS]; /* HDMI index (0-MAX_HDMI_INPUTS) to input index (0-MAX_INPUTS) */ u8 hdmi_index_to_input_index[MAX_HDMI_INPUTS]; /* S-Video index (0-MAX_SVID_INPUTS) to input index (0-MAX_INPUTS) */ u8 svid_index_to_input_index[MAX_SVID_INPUTS]; /* controls */ struct v4l2_ctrl *brightness; struct v4l2_ctrl *contrast; struct v4l2_ctrl *saturation; struct v4l2_ctrl *hue; struct { /* autogain/gain cluster */ struct v4l2_ctrl *autogain; struct v4l2_ctrl *gain; }; struct v4l2_ctrl *volume; struct v4l2_ctrl *mute; struct v4l2_ctrl *alpha; struct v4l2_ctrl *button; struct v4l2_ctrl *boolean; struct v4l2_ctrl *int32; struct v4l2_ctrl *int64; struct v4l2_ctrl *menu; struct v4l2_ctrl *string; struct v4l2_ctrl *bitmask; struct v4l2_ctrl *int_menu; struct v4l2_ctrl *ro_int32; struct v4l2_ctrl *pixel_array; struct v4l2_ctrl *test_pattern; struct v4l2_ctrl *colorspace; struct v4l2_ctrl *rgb_range_cap; struct v4l2_ctrl *real_rgb_range_cap; struct { /* std_signal_mode/standard cluster */ struct v4l2_ctrl *ctrl_std_signal_mode; struct v4l2_ctrl *ctrl_standard; }; struct { /* dv_timings_signal_mode/timings cluster */ struct v4l2_ctrl *ctrl_dv_timings_signal_mode; struct v4l2_ctrl *ctrl_dv_timings; }; struct v4l2_ctrl *ctrl_has_crop_cap; struct v4l2_ctrl *ctrl_has_compose_cap; struct v4l2_ctrl *ctrl_has_scaler_cap; struct v4l2_ctrl *ctrl_has_crop_out; struct v4l2_ctrl *ctrl_has_compose_out; struct v4l2_ctrl *ctrl_has_scaler_out; struct v4l2_ctrl *ctrl_tx_mode; struct v4l2_ctrl *ctrl_tx_rgb_range; struct v4l2_ctrl *ctrl_tx_edid_present; struct v4l2_ctrl *ctrl_tx_hotplug; struct v4l2_ctrl *ctrl_tx_rxsense; struct v4l2_ctrl *ctrl_rx_power_present; struct v4l2_ctrl *radio_tx_rds_pi; struct v4l2_ctrl *radio_tx_rds_pty; struct v4l2_ctrl *radio_tx_rds_mono_stereo; struct v4l2_ctrl *radio_tx_rds_art_head; struct v4l2_ctrl *radio_tx_rds_compressed; struct v4l2_ctrl *radio_tx_rds_dyn_pty; struct v4l2_ctrl *radio_tx_rds_ta; struct v4l2_ctrl *radio_tx_rds_tp; struct v4l2_ctrl *radio_tx_rds_ms; struct v4l2_ctrl *radio_tx_rds_psname; struct v4l2_ctrl *radio_tx_rds_radiotext; struct v4l2_ctrl *radio_rx_rds_pty; struct v4l2_ctrl *radio_rx_rds_ta; struct v4l2_ctrl *radio_rx_rds_tp; struct v4l2_ctrl *radio_rx_rds_ms; struct v4l2_ctrl *radio_rx_rds_psname; struct v4l2_ctrl *radio_rx_rds_radiotext; struct v4l2_ctrl *ctrl_hdmi_to_output[MAX_HDMI_INPUTS]; char ctrl_hdmi_to_output_names[MAX_HDMI_INPUTS][32]; struct v4l2_ctrl *ctrl_svid_to_output[MAX_SVID_INPUTS]; char ctrl_svid_to_output_names[MAX_SVID_INPUTS][32]; unsigned input_brightness[MAX_INPUTS]; unsigned osd_mode; unsigned button_pressed; bool sensor_hflip; bool sensor_vflip; bool hflip; bool vflip; bool vbi_cap_interlaced; bool loop_video; bool reduced_fps; /* Framebuffer */ unsigned long video_pbase; void *video_vbase; u32 video_buffer_size; int display_width; int display_height; int display_byte_stride; int bits_per_pixel; int bytes_per_pixel; #ifdef CONFIG_VIDEO_VIVID_OSD struct fb_info fb_info; struct fb_var_screeninfo fb_defined; struct fb_fix_screeninfo fb_fix; #endif /* Error injection */ bool disconnect_error; bool queue_setup_error; bool buf_prepare_error; bool start_streaming_error; bool dqbuf_error; bool req_validate_error; bool seq_wrap; u64 time_wrap; u64 time_wrap_offset; unsigned perc_dropped_buffers; enum vivid_signal_mode std_signal_mode[MAX_INPUTS]; unsigned int query_std_last[MAX_INPUTS]; v4l2_std_id query_std[MAX_INPUTS]; enum tpg_video_aspect std_aspect_ratio[MAX_INPUTS]; enum vivid_signal_mode dv_timings_signal_mode[MAX_INPUTS]; char **query_dv_timings_qmenu; char *query_dv_timings_qmenu_strings; unsigned query_dv_timings_size; unsigned int query_dv_timings_last[MAX_INPUTS]; unsigned int query_dv_timings[MAX_INPUTS]; enum tpg_video_aspect dv_timings_aspect_ratio[MAX_INPUTS]; /* Input */ unsigned input; v4l2_std_id std_cap[MAX_INPUTS]; struct v4l2_dv_timings dv_timings_cap[MAX_INPUTS]; int dv_timings_cap_sel[MAX_INPUTS]; u32 service_set_cap; struct vivid_vbi_gen_data vbi_gen; u8 *edid; unsigned edid_blocks; unsigned edid_max_blocks; unsigned webcam_size_idx; unsigned webcam_ival_idx; unsigned tv_freq; unsigned tv_audmode; unsigned tv_field_cap; unsigned tv_audio_input; u32 power_present; /* Output */ unsigned output; v4l2_std_id std_out; struct v4l2_dv_timings dv_timings_out; u32 colorspace_out; u32 ycbcr_enc_out; u32 hsv_enc_out; u32 quantization_out; u32 xfer_func_out; u32 service_set_out; unsigned bytesperline_out[TPG_MAX_PLANES]; unsigned tv_field_out; unsigned tv_audio_output; bool vbi_out_have_wss; u8 vbi_out_wss[2]; bool vbi_out_have_cc[2]; u8 vbi_out_cc[2][2]; bool dvi_d_out; u8 *scaled_line; u8 *blended_line; unsigned cur_scaled_line; /* Output Overlay */ void *fb_vbase_out; bool overlay_out_enabled; int overlay_out_top, overlay_out_left; unsigned fbuf_out_flags; u32 chromakey_out; u8 global_alpha_out; /* video capture */ struct tpg_data tpg; unsigned ms_vid_cap; bool must_blank[MAX_VID_CAP_BUFFERS]; const struct vivid_fmt *fmt_cap; struct v4l2_fract timeperframe_vid_cap; enum v4l2_field field_cap; struct v4l2_rect src_rect; struct v4l2_rect fmt_cap_rect; struct v4l2_rect crop_cap; struct v4l2_rect compose_cap; struct v4l2_rect crop_bounds_cap; struct vb2_queue vb_vid_cap_q; struct list_head vid_cap_active; struct vb2_queue vb_vbi_cap_q; struct list_head vbi_cap_active; struct vb2_queue vb_meta_cap_q; struct list_head meta_cap_active; struct vb2_queue vb_touch_cap_q; struct list_head touch_cap_active; /* thread for generating video capture stream */ struct task_struct *kthread_vid_cap; unsigned long jiffies_vid_cap; u64 cap_stream_start; u64 cap_frame_period; u64 cap_frame_eof_offset; u32 cap_seq_offset; u32 cap_seq_count; bool cap_seq_resync; u32 vid_cap_seq_start; u32 vid_cap_seq_count; bool vid_cap_streaming; u32 vbi_cap_seq_start; u32 vbi_cap_seq_count; bool vbi_cap_streaming; u32 meta_cap_seq_start; u32 meta_cap_seq_count; bool meta_cap_streaming; /* Touch capture */ struct task_struct *kthread_touch_cap; unsigned long jiffies_touch_cap; u64 touch_cap_stream_start; u32 touch_cap_seq_offset; bool touch_cap_seq_resync; u32 touch_cap_seq_start; u32 touch_cap_seq_count; u32 touch_cap_with_seq_wrap_count; bool touch_cap_streaming; struct v4l2_fract timeperframe_tch_cap; struct v4l2_pix_format tch_format; int tch_pat_random; /* video output */ const struct vivid_fmt *fmt_out; struct v4l2_fract timeperframe_vid_out; enum v4l2_field field_out; struct v4l2_rect sink_rect; struct v4l2_rect fmt_out_rect; struct v4l2_rect crop_out; struct v4l2_rect compose_out; struct v4l2_rect compose_bounds_out; struct vb2_queue vb_vid_out_q; struct list_head vid_out_active; struct vb2_queue vb_vbi_out_q; struct list_head vbi_out_active; struct vb2_queue vb_meta_out_q; struct list_head meta_out_active; /* video loop precalculated rectangles */ /* * Intersection between what the output side composes and the capture side * crops. I.e., what actually needs to be copied from the output buffer to * the capture buffer. */ struct v4l2_rect loop_vid_copy; /* The part of the output buffer that (after scaling) corresponds to loop_vid_copy. */ struct v4l2_rect loop_vid_out; /* The part of the capture buffer that (after scaling) corresponds to loop_vid_copy. */ struct v4l2_rect loop_vid_cap; /* * The intersection of the framebuffer, the overlay output window and * loop_vid_copy. I.e., the part of the framebuffer that actually should be * blended with the compose_out rectangle. This uses the framebuffer origin. */ struct v4l2_rect loop_fb_copy; /* The same as loop_fb_copy but with compose_out origin. */ struct v4l2_rect loop_vid_overlay; /* * The part of the capture buffer that (after scaling) corresponds * to loop_vid_overlay. */ struct v4l2_rect loop_vid_overlay_cap; /* thread for generating video output stream */ struct task_struct *kthread_vid_out; unsigned long jiffies_vid_out; u32 out_seq_offset; u32 out_seq_count; bool out_seq_resync; u32 vid_out_seq_start; u32 vid_out_seq_count; bool vid_out_streaming; u32 vbi_out_seq_start; u32 vbi_out_seq_count; bool vbi_out_streaming; bool stream_sliced_vbi_out; u32 meta_out_seq_start; u32 meta_out_seq_count; bool meta_out_streaming; /* SDR capture */ struct vb2_queue vb_sdr_cap_q; struct list_head sdr_cap_active; u32 sdr_pixelformat; /* v4l2 format id */ unsigned sdr_buffersize; unsigned sdr_adc_freq; unsigned sdr_fm_freq; unsigned sdr_fm_deviation; int sdr_fixp_src_phase; int sdr_fixp_mod_phase; bool tstamp_src_is_soe; bool has_crop_cap; bool has_compose_cap; bool has_scaler_cap; bool has_crop_out; bool has_compose_out; bool has_scaler_out; /* thread for generating SDR stream */ struct task_struct *kthread_sdr_cap; unsigned long jiffies_sdr_cap; u32 sdr_cap_seq_offset; u32 sdr_cap_seq_start; u32 sdr_cap_seq_count; u32 sdr_cap_with_seq_wrap_count; bool sdr_cap_seq_resync; /* RDS generator */ struct vivid_rds_gen rds_gen; /* Radio receiver */ unsigned radio_rx_freq; unsigned radio_rx_audmode; int radio_rx_sig_qual; unsigned radio_rx_hw_seek_mode; bool radio_rx_hw_seek_prog_lim; bool radio_rx_rds_controls; bool radio_rx_rds_enabled; unsigned radio_rx_rds_use_alternates; unsigned radio_rx_rds_last_block; struct v4l2_fh *radio_rx_rds_owner; /* Radio transmitter */ unsigned radio_tx_freq; unsigned radio_tx_subchans; bool radio_tx_rds_controls; unsigned radio_tx_rds_last_block; struct v4l2_fh *radio_tx_rds_owner; /* Shared between radio receiver and transmitter */ bool radio_rds_loop; ktime_t radio_rds_init_time; /* CEC */ struct cec_adapter *cec_rx_adap; struct cec_adapter *cec_tx_adap[MAX_HDMI_OUTPUTS]; struct task_struct *kthread_cec; wait_queue_head_t kthread_waitq_cec; struct vivid_cec_xfer xfers[MAX_OUTPUTS]; spinlock_t cec_xfers_slock; /* read and write cec messages */ u32 cec_sft; /* bus signal free time, in bit periods */ u8 last_initiator; /* CEC OSD String */ char osd[14]; unsigned long osd_jiffies; bool meta_pts; bool meta_scr; }; static inline bool vivid_is_webcam(const struct vivid_dev *dev) { return dev->input_type[dev->input] == WEBCAM; } static inline bool vivid_is_tv_cap(const struct vivid_dev *dev) { return dev->input_type[dev->input] == TV; } static inline bool vivid_is_svid_cap(const struct vivid_dev *dev) { return dev->input_type[dev->input] == SVID; } static inline bool vivid_is_hdmi_cap(const struct vivid_dev *dev) { return dev->input_type[dev->input] == HDMI; } static inline bool vivid_is_sdtv_cap(const struct vivid_dev *dev) { return vivid_is_tv_cap(dev) || vivid_is_svid_cap(dev); } static inline bool vivid_is_svid_out(const struct vivid_dev *dev) { return dev->output_type[dev->output] == SVID; } static inline bool vivid_is_hdmi_out(const struct vivid_dev *dev) { return dev->output_type[dev->output] == HDMI; } #endif |
| 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | #ifndef LLC_CONN_H #define LLC_CONN_H /* * Copyright (c) 1997 by Procom Technology, Inc. * 2001, 2002 by Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program can be redistributed or modified under the terms of the * GNU General Public License as published by the Free Software Foundation. * This program is distributed without any warranty or implied warranty * of merchantability or fitness for a particular purpose. * * See the GNU General Public License for more details. */ #include <linux/timer.h> #include <net/llc_if.h> #include <net/sock.h> #include <linux/llc.h> #define LLC_EVENT 1 #define LLC_PACKET 2 #define LLC2_P_TIME 2 #define LLC2_ACK_TIME 1 #define LLC2_REJ_TIME 3 #define LLC2_BUSY_TIME 3 struct llc_timer { struct timer_list timer; unsigned long expire; /* timer expire time */ }; struct llc_sock { /* struct sock must be the first member of llc_sock */ struct sock sk; struct sockaddr_llc addr; /* address sock is bound to */ u8 state; /* state of connection */ struct llc_sap *sap; /* pointer to parent SAP */ struct llc_addr laddr; /* lsap/mac pair */ struct llc_addr daddr; /* dsap/mac pair */ struct net_device *dev; /* device to send to remote */ netdevice_tracker dev_tracker; u32 copied_seq; /* head of yet unread data */ u8 retry_count; /* number of retries */ u8 ack_must_be_send; u8 first_pdu_Ns; u8 npta; struct llc_timer ack_timer; struct llc_timer pf_cycle_timer; struct llc_timer rej_sent_timer; struct llc_timer busy_state_timer; /* ind busy clr at remote LLC */ u8 vS; /* seq# next in-seq I-PDU tx'd*/ u8 vR; /* seq# next in-seq I-PDU rx'd*/ u32 n2; /* max nbr re-tx's for timeout*/ u32 n1; /* max nbr octets in I PDU */ u8 k; /* tx window size; max = 127 */ u8 rw; /* rx window size; max = 127 */ u8 p_flag; /* state flags */ u8 f_flag; u8 s_flag; u8 data_flag; u8 remote_busy_flag; u8 cause_flag; struct sk_buff_head pdu_unack_q; /* PUDs sent/waiting ack */ u16 link; /* network layer link number */ u8 X; /* a temporary variable */ u8 ack_pf; /* this flag indicates what is the P-bit of acknowledge */ u8 failed_data_req; /* recognize that already exist a failed llc_data_req_handler (tx_buffer_full or unacceptable state */ u8 dec_step; u8 inc_cntr; u8 dec_cntr; u8 connect_step; u8 last_nr; /* NR of last pdu received */ u32 rx_pdu_hdr; /* used for saving header of last pdu received and caused sending FRMR. Used for resending FRMR */ u32 cmsg_flags; struct hlist_node dev_hash_node; }; static inline struct llc_sock *llc_sk(const struct sock *sk) { return (struct llc_sock *)sk; } static __inline__ void llc_set_backlog_type(struct sk_buff *skb, char type) { skb->cb[sizeof(skb->cb) - 1] = type; } static __inline__ char llc_backlog_type(struct sk_buff *skb) { return skb->cb[sizeof(skb->cb) - 1]; } struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern); void llc_sk_stop_all_timers(struct sock *sk, bool sync); void llc_sk_free(struct sock *sk); void llc_sk_reset(struct sock *sk); /* Access to a connection */ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb); void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb); void llc_conn_rtn_pdu(struct sock *sk, struct sk_buff *skb); void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit); void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit); int llc_conn_remove_acked_pdus(struct sock *conn, u8 nr, u16 *how_many_unacked); struct sock *llc_lookup_established(struct llc_sap *sap, struct llc_addr *daddr, struct llc_addr *laddr, const struct net *net); void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk); void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk); u8 llc_data_accept_state(u8 state); void llc_build_offset_table(void); #endif /* LLC_CONN_H */ |
| 492 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Filesystem parameter description and parser * * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #ifndef _LINUX_FS_PARSER_H #define _LINUX_FS_PARSER_H #include <linux/fs_context.h> struct path; struct constant_table { const char *name; int value; }; struct fs_parameter_spec; struct fs_parse_result; typedef int fs_param_type(struct p_log *, const struct fs_parameter_spec *, struct fs_parameter *, struct fs_parse_result *); /* * The type of parameter expected. */ fs_param_type fs_param_is_bool, fs_param_is_u32, fs_param_is_s32, fs_param_is_u64, fs_param_is_enum, fs_param_is_string, fs_param_is_blob, fs_param_is_blockdev, fs_param_is_path, fs_param_is_fd, fs_param_is_uid, fs_param_is_gid, fs_param_is_file_or_string; /* * Specification of the type of value a parameter wants. * * Note that the fsparam_flag(), fsparam_string(), fsparam_u32(), ... macros * should be used to generate elements of this type. */ struct fs_parameter_spec { const char *name; fs_param_type *type; /* The desired parameter type */ u8 opt; /* Option number (returned by fs_parse()) */ unsigned short flags; #define fs_param_neg_with_no 0x0002 /* "noxxx" is negative param */ #define fs_param_can_be_empty 0x0004 /* "xxx=" is allowed */ #define fs_param_deprecated 0x0008 /* The param is deprecated */ const void *data; }; /* * Result of parse. */ struct fs_parse_result { bool negated; /* T if param was "noxxx" */ union { bool boolean; /* For spec_bool */ int int_32; /* For spec_s32/spec_enum */ unsigned int uint_32; /* For spec_u32{,_octal,_hex}/spec_enum */ u64 uint_64; /* For spec_u64 */ kuid_t uid; kgid_t gid; }; }; extern int __fs_parse(struct p_log *log, const struct fs_parameter_spec *desc, struct fs_parameter *value, struct fs_parse_result *result); static inline int fs_parse(struct fs_context *fc, const struct fs_parameter_spec *desc, struct fs_parameter *param, struct fs_parse_result *result) { return __fs_parse(&fc->log, desc, param, result); } extern int fs_lookup_param(struct fs_context *fc, struct fs_parameter *param, bool want_bdev, unsigned int flags, struct path *_path); extern int lookup_constant(const struct constant_table tbl[], const char *name, int not_found); extern const struct constant_table bool_names[]; #ifdef CONFIG_VALIDATE_FS_PARSER extern bool fs_validate_description(const char *name, const struct fs_parameter_spec *desc); #else static inline bool fs_validate_description(const char *name, const struct fs_parameter_spec *desc) { return true; } #endif /* * Parameter type, name, index and flags element constructors. Use as: * * fsparam_xxxx("foo", Opt_foo) * * If existing helpers are not enough, direct use of __fsparam() would * work, but any such case is probably a sign that new helper is needed. * Helpers will remain stable; low-level implementation may change. */ #define __fsparam(TYPE, NAME, OPT, FLAGS, DATA) \ { \ .name = NAME, \ .opt = OPT, \ .type = TYPE, \ .flags = FLAGS, \ .data = DATA \ } #define fsparam_flag(NAME, OPT) __fsparam(NULL, NAME, OPT, 0, NULL) #define fsparam_flag_no(NAME, OPT) \ __fsparam(NULL, NAME, OPT, fs_param_neg_with_no, NULL) #define fsparam_bool(NAME, OPT) __fsparam(fs_param_is_bool, NAME, OPT, 0, NULL) #define fsparam_u32(NAME, OPT) __fsparam(fs_param_is_u32, NAME, OPT, 0, NULL) #define fsparam_u32oct(NAME, OPT) \ __fsparam(fs_param_is_u32, NAME, OPT, 0, (void *)8) #define fsparam_s32(NAME, OPT) __fsparam(fs_param_is_s32, NAME, OPT, 0, NULL) #define fsparam_u64(NAME, OPT) __fsparam(fs_param_is_u64, NAME, OPT, 0, NULL) #define fsparam_enum(NAME, OPT, array) __fsparam(fs_param_is_enum, NAME, OPT, 0, array) #define fsparam_string(NAME, OPT) \ __fsparam(fs_param_is_string, NAME, OPT, 0, NULL) #define fsparam_blob(NAME, OPT) __fsparam(fs_param_is_blob, NAME, OPT, 0, NULL) #define fsparam_bdev(NAME, OPT) __fsparam(fs_param_is_blockdev, NAME, OPT, 0, NULL) #define fsparam_path(NAME, OPT) __fsparam(fs_param_is_path, NAME, OPT, 0, NULL) #define fsparam_fd(NAME, OPT) __fsparam(fs_param_is_fd, NAME, OPT, 0, NULL) #define fsparam_file_or_string(NAME, OPT) \ __fsparam(fs_param_is_file_or_string, NAME, OPT, 0, NULL) #define fsparam_uid(NAME, OPT) __fsparam(fs_param_is_uid, NAME, OPT, 0, NULL) #define fsparam_gid(NAME, OPT) __fsparam(fs_param_is_gid, NAME, OPT, 0, NULL) /* String parameter that allows empty argument */ #define fsparam_string_empty(NAME, OPT) \ __fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL) #endif /* _LINUX_FS_PARSER_H */ |
| 2 31 29 29 26 26 9 7 14 7 4 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * V9FS FID Management * * Copyright (C) 2005 by Eric Van Hensbergen <ericvh@gmail.com> */ #ifndef FS_9P_FID_H #define FS_9P_FID_H #include <linux/list.h> #include "v9fs.h" struct p9_fid *v9fs_fid_find_inode(struct inode *inode, bool want_writeable, kuid_t uid, bool any); struct p9_fid *v9fs_fid_lookup(struct dentry *dentry); static inline struct p9_fid *v9fs_parent_fid(struct dentry *dentry) { return v9fs_fid_lookup(dentry->d_parent); } void v9fs_fid_add(struct dentry *dentry, struct p9_fid **fid); void v9fs_open_fid_add(struct inode *inode, struct p9_fid **fid); static inline struct p9_fid *clone_fid(struct p9_fid *fid) { return IS_ERR(fid) ? fid : p9_client_walk(fid, 0, NULL, 1); } static inline struct p9_fid *v9fs_fid_clone(struct dentry *dentry) { struct p9_fid *fid, *nfid; fid = v9fs_fid_lookup(dentry); if (!fid || IS_ERR(fid)) return fid; nfid = clone_fid(fid); p9_fid_put(fid); return nfid; } /** * v9fs_fid_addmodes - add cache flags to fid mode (for client use only) * @fid: fid to augment * @s_flags: session info mount flags * @s_cache: session info cache flags * @f_flags: unix open flags * * make sure mode reflects flags of underlying mounts * also qid.version == 0 reflects a synthetic or legacy file system * NOTE: these are set after open so only reflect 9p client not * underlying file system on server. */ static inline void v9fs_fid_add_modes(struct p9_fid *fid, unsigned int s_flags, unsigned int s_cache, unsigned int f_flags) { if ((!s_cache) || ((fid->qid.version == 0) && !(s_flags & V9FS_IGNORE_QV)) || (s_flags & V9FS_DIRECT_IO) || (f_flags & O_DIRECT)) { fid->mode |= P9L_DIRECT; /* no read or write cache */ } else if ((!(s_cache & CACHE_WRITEBACK)) || (f_flags & O_DSYNC) || (s_flags & V9FS_SYNC)) { fid->mode |= P9L_NOWRITECACHE; } } #endif |
| 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 | // SPDX-License-Identifier: GPL-2.0-only /* * Marvell NFC driver: Firmware downloader * * Copyright (C) 2015, Marvell International Ltd. */ #include <linux/module.h> #include <linux/unaligned.h> #include <linux/firmware.h> #include <linux/nfc.h> #include <net/nfc/nci.h> #include <net/nfc/nci_core.h> #include "nfcmrvl.h" #define FW_DNLD_TIMEOUT 15000 #define NCI_OP_PROPRIETARY_BOOT_CMD nci_opcode_pack(NCI_GID_PROPRIETARY, \ NCI_OP_PROP_BOOT_CMD) /* FW download states */ enum { STATE_RESET = 0, STATE_INIT, STATE_SET_REF_CLOCK, STATE_SET_HI_CONFIG, STATE_OPEN_LC, STATE_FW_DNLD, STATE_CLOSE_LC, STATE_BOOT }; enum { SUBSTATE_WAIT_COMMAND = 0, SUBSTATE_WAIT_ACK_CREDIT, SUBSTATE_WAIT_NACK_CREDIT, SUBSTATE_WAIT_DATA_CREDIT, }; /* * Patterns for responses */ static const uint8_t nci_pattern_core_reset_ntf[] = { 0x60, 0x00, 0x02, 0xA0, 0x01 }; static const uint8_t nci_pattern_core_init_rsp[] = { 0x40, 0x01, 0x11 }; static const uint8_t nci_pattern_core_set_config_rsp[] = { 0x40, 0x02, 0x02, 0x00, 0x00 }; static const uint8_t nci_pattern_core_conn_create_rsp[] = { 0x40, 0x04, 0x04, 0x00 }; static const uint8_t nci_pattern_core_conn_close_rsp[] = { 0x40, 0x05, 0x01, 0x00 }; static const uint8_t nci_pattern_core_conn_credits_ntf[] = { 0x60, 0x06, 0x03, 0x01, NCI_CORE_LC_CONNID_PROP_FW_DL, 0x01 }; static const uint8_t nci_pattern_proprietary_boot_rsp[] = { 0x4F, 0x3A, 0x01, 0x00 }; static struct sk_buff *alloc_lc_skb(struct nfcmrvl_private *priv, uint8_t plen) { struct sk_buff *skb; struct nci_data_hdr *hdr; skb = nci_skb_alloc(priv->ndev, (NCI_DATA_HDR_SIZE + plen), GFP_KERNEL); if (!skb) return NULL; hdr = skb_put(skb, NCI_DATA_HDR_SIZE); hdr->conn_id = NCI_CORE_LC_CONNID_PROP_FW_DL; hdr->rfu = 0; hdr->plen = plen; nci_mt_set((__u8 *)hdr, NCI_MT_DATA_PKT); nci_pbf_set((__u8 *)hdr, NCI_PBF_LAST); return skb; } static void fw_dnld_over(struct nfcmrvl_private *priv, u32 error) { if (priv->fw_dnld.fw) { release_firmware(priv->fw_dnld.fw); priv->fw_dnld.fw = NULL; priv->fw_dnld.header = NULL; priv->fw_dnld.binary_config = NULL; } atomic_set(&priv->ndev->cmd_cnt, 0); if (timer_pending(&priv->ndev->cmd_timer)) timer_delete_sync(&priv->ndev->cmd_timer); if (timer_pending(&priv->fw_dnld.timer)) timer_delete_sync(&priv->fw_dnld.timer); nfc_info(priv->dev, "FW loading over (%d)]\n", error); if (error != 0) { /* failed, halt the chip to avoid power consumption */ nfcmrvl_chip_halt(priv); } nfc_fw_download_done(priv->ndev->nfc_dev, priv->fw_dnld.name, error); } static void fw_dnld_timeout(struct timer_list *t) { struct nfcmrvl_private *priv = timer_container_of(priv, t, fw_dnld.timer); nfc_err(priv->dev, "FW loading timeout"); priv->fw_dnld.state = STATE_RESET; fw_dnld_over(priv, -ETIMEDOUT); } static int process_state_reset(struct nfcmrvl_private *priv, const struct sk_buff *skb) { if (sizeof(nci_pattern_core_reset_ntf) != skb->len || memcmp(skb->data, nci_pattern_core_reset_ntf, sizeof(nci_pattern_core_reset_ntf))) return -EINVAL; nfc_info(priv->dev, "BootROM reset, start fw download\n"); /* Start FW download state machine */ priv->fw_dnld.state = STATE_INIT; nci_send_cmd(priv->ndev, NCI_OP_CORE_INIT_CMD, 0, NULL); return 0; } static int process_state_init(struct nfcmrvl_private *priv, const struct sk_buff *skb) { struct nci_core_set_config_cmd cmd; if (sizeof(nci_pattern_core_init_rsp) >= skb->len || memcmp(skb->data, nci_pattern_core_init_rsp, sizeof(nci_pattern_core_init_rsp))) return -EINVAL; cmd.num_params = 1; cmd.param.id = NFCMRVL_PROP_REF_CLOCK; cmd.param.len = 4; memcpy(cmd.param.val, &priv->fw_dnld.header->ref_clock, 4); nci_send_cmd(priv->ndev, NCI_OP_CORE_SET_CONFIG_CMD, 3 + cmd.param.len, &cmd); priv->fw_dnld.state = STATE_SET_REF_CLOCK; return 0; } static void create_lc(struct nfcmrvl_private *priv) { uint8_t param[2] = { NCI_CORE_LC_PROP_FW_DL, 0x0 }; priv->fw_dnld.state = STATE_OPEN_LC; nci_send_cmd(priv->ndev, NCI_OP_CORE_CONN_CREATE_CMD, 2, param); } static int process_state_set_ref_clock(struct nfcmrvl_private *priv, const struct sk_buff *skb) { struct nci_core_set_config_cmd cmd; if (sizeof(nci_pattern_core_set_config_rsp) != skb->len || memcmp(skb->data, nci_pattern_core_set_config_rsp, skb->len)) return -EINVAL; cmd.num_params = 1; cmd.param.id = NFCMRVL_PROP_SET_HI_CONFIG; switch (priv->phy) { case NFCMRVL_PHY_UART: cmd.param.len = 5; memcpy(cmd.param.val, &priv->fw_dnld.binary_config->uart.baudrate, 4); cmd.param.val[4] = priv->fw_dnld.binary_config->uart.flow_control; break; case NFCMRVL_PHY_I2C: cmd.param.len = 5; memcpy(cmd.param.val, &priv->fw_dnld.binary_config->i2c.clk, 4); cmd.param.val[4] = 0; break; case NFCMRVL_PHY_SPI: cmd.param.len = 5; memcpy(cmd.param.val, &priv->fw_dnld.binary_config->spi.clk, 4); cmd.param.val[4] = 0; break; default: create_lc(priv); return 0; } priv->fw_dnld.state = STATE_SET_HI_CONFIG; nci_send_cmd(priv->ndev, NCI_OP_CORE_SET_CONFIG_CMD, 3 + cmd.param.len, &cmd); return 0; } static int process_state_set_hi_config(struct nfcmrvl_private *priv, const struct sk_buff *skb) { if (sizeof(nci_pattern_core_set_config_rsp) != skb->len || memcmp(skb->data, nci_pattern_core_set_config_rsp, skb->len)) return -EINVAL; create_lc(priv); return 0; } static int process_state_open_lc(struct nfcmrvl_private *priv, const struct sk_buff *skb) { if (sizeof(nci_pattern_core_conn_create_rsp) >= skb->len || memcmp(skb->data, nci_pattern_core_conn_create_rsp, sizeof(nci_pattern_core_conn_create_rsp))) return -EINVAL; priv->fw_dnld.state = STATE_FW_DNLD; priv->fw_dnld.substate = SUBSTATE_WAIT_COMMAND; priv->fw_dnld.offset = priv->fw_dnld.binary_config->offset; return 0; } static int process_state_fw_dnld(struct nfcmrvl_private *priv, struct sk_buff *skb) { uint16_t len; uint16_t comp_len; struct sk_buff *out_skb; switch (priv->fw_dnld.substate) { case SUBSTATE_WAIT_COMMAND: /* * Command format: * B0..2: NCI header * B3 : Helper command (0xA5) * B4..5: le16 data size * B6..7: le16 data size complement (~) * B8..N: payload */ /* Remove NCI HDR */ skb_pull(skb, 3); if (skb->data[0] != HELPER_CMD_PACKET_FORMAT || skb->len != 5) { nfc_err(priv->dev, "bad command"); return -EINVAL; } skb_pull(skb, 1); len = get_unaligned_le16(skb->data); skb_pull(skb, 2); comp_len = get_unaligned_le16(skb->data); memcpy(&comp_len, skb->data, 2); skb_pull(skb, 2); if (((~len) & 0xFFFF) != comp_len) { nfc_err(priv->dev, "bad len complement: %x %x %x", len, comp_len, (~len & 0xFFFF)); out_skb = alloc_lc_skb(priv, 1); if (!out_skb) return -ENOMEM; skb_put_u8(out_skb, 0xBF); nci_send_frame(priv->ndev, out_skb); priv->fw_dnld.substate = SUBSTATE_WAIT_NACK_CREDIT; return 0; } priv->fw_dnld.chunk_len = len; out_skb = alloc_lc_skb(priv, 1); if (!out_skb) return -ENOMEM; skb_put_u8(out_skb, HELPER_ACK_PACKET_FORMAT); nci_send_frame(priv->ndev, out_skb); priv->fw_dnld.substate = SUBSTATE_WAIT_ACK_CREDIT; break; case SUBSTATE_WAIT_ACK_CREDIT: if (sizeof(nci_pattern_core_conn_credits_ntf) != skb->len || memcmp(nci_pattern_core_conn_credits_ntf, skb->data, skb->len)) { nfc_err(priv->dev, "bad packet: waiting for credit"); return -EINVAL; } if (priv->fw_dnld.chunk_len == 0) { /* FW Loading is done */ uint8_t conn_id = NCI_CORE_LC_CONNID_PROP_FW_DL; priv->fw_dnld.state = STATE_CLOSE_LC; nci_send_cmd(priv->ndev, NCI_OP_CORE_CONN_CLOSE_CMD, 1, &conn_id); } else { out_skb = alloc_lc_skb(priv, priv->fw_dnld.chunk_len); if (!out_skb) return -ENOMEM; skb_put_data(out_skb, ((uint8_t *)priv->fw_dnld.fw->data) + priv->fw_dnld.offset, priv->fw_dnld.chunk_len); nci_send_frame(priv->ndev, out_skb); priv->fw_dnld.substate = SUBSTATE_WAIT_DATA_CREDIT; } break; case SUBSTATE_WAIT_DATA_CREDIT: if (sizeof(nci_pattern_core_conn_credits_ntf) != skb->len || memcmp(nci_pattern_core_conn_credits_ntf, skb->data, skb->len)) { nfc_err(priv->dev, "bad packet: waiting for credit"); return -EINVAL; } priv->fw_dnld.offset += priv->fw_dnld.chunk_len; priv->fw_dnld.chunk_len = 0; priv->fw_dnld.substate = SUBSTATE_WAIT_COMMAND; break; case SUBSTATE_WAIT_NACK_CREDIT: if (sizeof(nci_pattern_core_conn_credits_ntf) != skb->len || memcmp(nci_pattern_core_conn_credits_ntf, skb->data, skb->len)) { nfc_err(priv->dev, "bad packet: waiting for credit"); return -EINVAL; } priv->fw_dnld.substate = SUBSTATE_WAIT_COMMAND; break; } return 0; } static int process_state_close_lc(struct nfcmrvl_private *priv, const struct sk_buff *skb) { if (sizeof(nci_pattern_core_conn_close_rsp) != skb->len || memcmp(skb->data, nci_pattern_core_conn_close_rsp, skb->len)) return -EINVAL; priv->fw_dnld.state = STATE_BOOT; nci_send_cmd(priv->ndev, NCI_OP_PROPRIETARY_BOOT_CMD, 0, NULL); return 0; } static int process_state_boot(struct nfcmrvl_private *priv, const struct sk_buff *skb) { if (sizeof(nci_pattern_proprietary_boot_rsp) != skb->len || memcmp(skb->data, nci_pattern_proprietary_boot_rsp, skb->len)) return -EINVAL; /* * Update HI config to use the right configuration for the next * data exchanges. */ priv->if_ops->nci_update_config(priv, &priv->fw_dnld.binary_config->config); if (priv->fw_dnld.binary_config == &priv->fw_dnld.header->helper) { /* * This is the case where an helper was needed and we have * uploaded it. Now we have to wait the next RESET NTF to start * FW download. */ priv->fw_dnld.state = STATE_RESET; priv->fw_dnld.binary_config = &priv->fw_dnld.header->firmware; nfc_info(priv->dev, "FW loading: helper loaded"); } else { nfc_info(priv->dev, "FW loading: firmware loaded"); fw_dnld_over(priv, 0); } return 0; } static void fw_dnld_rx_work(struct work_struct *work) { int ret; struct sk_buff *skb; struct nfcmrvl_fw_dnld *fw_dnld = container_of(work, struct nfcmrvl_fw_dnld, rx_work); struct nfcmrvl_private *priv = container_of(fw_dnld, struct nfcmrvl_private, fw_dnld); while ((skb = skb_dequeue(&fw_dnld->rx_q))) { nfc_send_to_raw_sock(priv->ndev->nfc_dev, skb, RAW_PAYLOAD_NCI, NFC_DIRECTION_RX); switch (fw_dnld->state) { case STATE_RESET: ret = process_state_reset(priv, skb); break; case STATE_INIT: ret = process_state_init(priv, skb); break; case STATE_SET_REF_CLOCK: ret = process_state_set_ref_clock(priv, skb); break; case STATE_SET_HI_CONFIG: ret = process_state_set_hi_config(priv, skb); break; case STATE_OPEN_LC: ret = process_state_open_lc(priv, skb); break; case STATE_FW_DNLD: ret = process_state_fw_dnld(priv, skb); break; case STATE_CLOSE_LC: ret = process_state_close_lc(priv, skb); break; case STATE_BOOT: ret = process_state_boot(priv, skb); break; default: ret = -EFAULT; } kfree_skb(skb); if (ret != 0) { nfc_err(priv->dev, "FW loading error"); fw_dnld_over(priv, ret); break; } } } int nfcmrvl_fw_dnld_init(struct nfcmrvl_private *priv) { char name[32]; INIT_WORK(&priv->fw_dnld.rx_work, fw_dnld_rx_work); snprintf(name, sizeof(name), "%s_nfcmrvl_fw_dnld_rx_wq", dev_name(&priv->ndev->nfc_dev->dev)); priv->fw_dnld.rx_wq = create_singlethread_workqueue(name); if (!priv->fw_dnld.rx_wq) return -ENOMEM; skb_queue_head_init(&priv->fw_dnld.rx_q); return 0; } void nfcmrvl_fw_dnld_deinit(struct nfcmrvl_private *priv) { destroy_workqueue(priv->fw_dnld.rx_wq); } void nfcmrvl_fw_dnld_recv_frame(struct nfcmrvl_private *priv, struct sk_buff *skb) { /* Discard command timer */ if (timer_pending(&priv->ndev->cmd_timer)) timer_delete_sync(&priv->ndev->cmd_timer); /* Allow next command */ atomic_set(&priv->ndev->cmd_cnt, 1); /* Queue and trigger rx work */ skb_queue_tail(&priv->fw_dnld.rx_q, skb); queue_work(priv->fw_dnld.rx_wq, &priv->fw_dnld.rx_work); } void nfcmrvl_fw_dnld_abort(struct nfcmrvl_private *priv) { fw_dnld_over(priv, -EHOSTDOWN); } int nfcmrvl_fw_dnld_start(struct nci_dev *ndev, const char *firmware_name) { struct nfcmrvl_private *priv = nci_get_drvdata(ndev); struct nfcmrvl_fw_dnld *fw_dnld = &priv->fw_dnld; int res; if (!priv->support_fw_dnld) return -ENOTSUPP; if (!firmware_name || !firmware_name[0]) return -EINVAL; strcpy(fw_dnld->name, firmware_name); /* * Retrieve FW binary file and parse it to initialize FW download * state machine. */ /* Retrieve FW binary */ res = request_firmware(&fw_dnld->fw, firmware_name, &ndev->nfc_dev->dev); if (res < 0) { nfc_err(priv->dev, "failed to retrieve FW %s", firmware_name); return -ENOENT; } fw_dnld->header = (const struct nfcmrvl_fw *) priv->fw_dnld.fw->data; if (fw_dnld->header->magic != NFCMRVL_FW_MAGIC || fw_dnld->header->phy != priv->phy) { nfc_err(priv->dev, "bad firmware binary %s magic=0x%x phy=%d", firmware_name, fw_dnld->header->magic, fw_dnld->header->phy); release_firmware(fw_dnld->fw); fw_dnld->header = NULL; return -EINVAL; } if (fw_dnld->header->helper.offset != 0) { nfc_info(priv->dev, "loading helper"); fw_dnld->binary_config = &fw_dnld->header->helper; } else { nfc_info(priv->dev, "loading firmware"); fw_dnld->binary_config = &fw_dnld->header->firmware; } /* Configure a timer for timeout */ timer_setup(&priv->fw_dnld.timer, fw_dnld_timeout, 0); mod_timer(&priv->fw_dnld.timer, jiffies + msecs_to_jiffies(FW_DNLD_TIMEOUT)); /* Ronfigure HI to be sure that it is the bootrom values */ priv->if_ops->nci_update_config(priv, &fw_dnld->header->bootrom.config); /* Allow first command */ atomic_set(&priv->ndev->cmd_cnt, 1); /* First, reset the chip */ priv->fw_dnld.state = STATE_RESET; nfcmrvl_chip_reset(priv); /* Now wait for CORE_RESET_NTF or timeout */ return 0; } |
| 5 5 2 2 2 3 3 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Cryptographic API. * * RIPEMD-160 - RACE Integrity Primitives Evaluation Message Digest. * * Based on the reference implementation by Antoon Bosselaers, ESAT-COSIC * * Copyright (c) 2008 Adrian-Ken Rueegsegger <ken@codelabs.ch> */ #include <crypto/internal/hash.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/string.h> #include "ripemd.h" struct rmd160_ctx { u64 byte_count; u32 state[5]; }; #define K1 RMD_K1 #define K2 RMD_K2 #define K3 RMD_K3 #define K4 RMD_K4 #define K5 RMD_K5 #define KK1 RMD_K6 #define KK2 RMD_K7 #define KK3 RMD_K8 #define KK4 RMD_K9 #define KK5 RMD_K1 #define F1(x, y, z) (x ^ y ^ z) /* XOR */ #define F2(x, y, z) (z ^ (x & (y ^ z))) /* x ? y : z */ #define F3(x, y, z) ((x | ~y) ^ z) #define F4(x, y, z) (y ^ (z & (x ^ y))) /* z ? x : y */ #define F5(x, y, z) (x ^ (y | ~z)) #define ROUND(a, b, c, d, e, f, k, x, s) { \ (a) += f((b), (c), (d)) + le32_to_cpup(&(x)) + (k); \ (a) = rol32((a), (s)) + (e); \ (c) = rol32((c), 10); \ } static void rmd160_transform(u32 *state, const __le32 *in) { u32 aa, bb, cc, dd, ee, aaa, bbb, ccc, ddd, eee; /* Initialize left lane */ aa = state[0]; bb = state[1]; cc = state[2]; dd = state[3]; ee = state[4]; /* Initialize right lane */ aaa = state[0]; bbb = state[1]; ccc = state[2]; ddd = state[3]; eee = state[4]; /* round 1: left lane */ ROUND(aa, bb, cc, dd, ee, F1, K1, in[0], 11); ROUND(ee, aa, bb, cc, dd, F1, K1, in[1], 14); ROUND(dd, ee, aa, bb, cc, F1, K1, in[2], 15); ROUND(cc, dd, ee, aa, bb, F1, K1, in[3], 12); ROUND(bb, cc, dd, ee, aa, F1, K1, in[4], 5); ROUND(aa, bb, cc, dd, ee, F1, K1, in[5], 8); ROUND(ee, aa, bb, cc, dd, F1, K1, in[6], 7); ROUND(dd, ee, aa, bb, cc, F1, K1, in[7], 9); ROUND(cc, dd, ee, aa, bb, F1, K1, in[8], 11); ROUND(bb, cc, dd, ee, aa, F1, K1, in[9], 13); ROUND(aa, bb, cc, dd, ee, F1, K1, in[10], 14); ROUND(ee, aa, bb, cc, dd, F1, K1, in[11], 15); ROUND(dd, ee, aa, bb, cc, F1, K1, in[12], 6); ROUND(cc, dd, ee, aa, bb, F1, K1, in[13], 7); ROUND(bb, cc, dd, ee, aa, F1, K1, in[14], 9); ROUND(aa, bb, cc, dd, ee, F1, K1, in[15], 8); /* round 2: left lane" */ ROUND(ee, aa, bb, cc, dd, F2, K2, in[7], 7); ROUND(dd, ee, aa, bb, cc, F2, K2, in[4], 6); ROUND(cc, dd, ee, aa, bb, F2, K2, in[13], 8); ROUND(bb, cc, dd, ee, aa, F2, K2, in[1], 13); ROUND(aa, bb, cc, dd, ee, F2, K2, in[10], 11); ROUND(ee, aa, bb, cc, dd, F2, K2, in[6], 9); ROUND(dd, ee, aa, bb, cc, F2, K2, in[15], 7); ROUND(cc, dd, ee, aa, bb, F2, K2, in[3], 15); ROUND(bb, cc, dd, ee, aa, F2, K2, in[12], 7); ROUND(aa, bb, cc, dd, ee, F2, K2, in[0], 12); ROUND(ee, aa, bb, cc, dd, F2, K2, in[9], 15); ROUND(dd, ee, aa, bb, cc, F2, K2, in[5], 9); ROUND(cc, dd, ee, aa, bb, F2, K2, in[2], 11); ROUND(bb, cc, dd, ee, aa, F2, K2, in[14], 7); ROUND(aa, bb, cc, dd, ee, F2, K2, in[11], 13); ROUND(ee, aa, bb, cc, dd, F2, K2, in[8], 12); /* round 3: left lane" */ ROUND(dd, ee, aa, bb, cc, F3, K3, in[3], 11); ROUND(cc, dd, ee, aa, bb, F3, K3, in[10], 13); ROUND(bb, cc, dd, ee, aa, F3, K3, in[14], 6); ROUND(aa, bb, cc, dd, ee, F3, K3, in[4], 7); ROUND(ee, aa, bb, cc, dd, F3, K3, in[9], 14); ROUND(dd, ee, aa, bb, cc, F3, K3, in[15], 9); ROUND(cc, dd, ee, aa, bb, F3, K3, in[8], 13); ROUND(bb, cc, dd, ee, aa, F3, K3, in[1], 15); ROUND(aa, bb, cc, dd, ee, F3, K3, in[2], 14); ROUND(ee, aa, bb, cc, dd, F3, K3, in[7], 8); ROUND(dd, ee, aa, bb, cc, F3, K3, in[0], 13); ROUND(cc, dd, ee, aa, bb, F3, K3, in[6], 6); ROUND(bb, cc, dd, ee, aa, F3, K3, in[13], 5); ROUND(aa, bb, cc, dd, ee, F3, K3, in[11], 12); ROUND(ee, aa, bb, cc, dd, F3, K3, in[5], 7); ROUND(dd, ee, aa, bb, cc, F3, K3, in[12], 5); /* round 4: left lane" */ ROUND(cc, dd, ee, aa, bb, F4, K4, in[1], 11); ROUND(bb, cc, dd, ee, aa, F4, K4, in[9], 12); ROUND(aa, bb, cc, dd, ee, F4, K4, in[11], 14); ROUND(ee, aa, bb, cc, dd, F4, K4, in[10], 15); ROUND(dd, ee, aa, bb, cc, F4, K4, in[0], 14); ROUND(cc, dd, ee, aa, bb, F4, K4, in[8], 15); ROUND(bb, cc, dd, ee, aa, F4, K4, in[12], 9); ROUND(aa, bb, cc, dd, ee, F4, K4, in[4], 8); ROUND(ee, aa, bb, cc, dd, F4, K4, in[13], 9); ROUND(dd, ee, aa, bb, cc, F4, K4, in[3], 14); ROUND(cc, dd, ee, aa, bb, F4, K4, in[7], 5); ROUND(bb, cc, dd, ee, aa, F4, K4, in[15], 6); ROUND(aa, bb, cc, dd, ee, F4, K4, in[14], 8); ROUND(ee, aa, bb, cc, dd, F4, K4, in[5], 6); ROUND(dd, ee, aa, bb, cc, F4, K4, in[6], 5); ROUND(cc, dd, ee, aa, bb, F4, K4, in[2], 12); /* round 5: left lane" */ ROUND(bb, cc, dd, ee, aa, F5, K5, in[4], 9); ROUND(aa, bb, cc, dd, ee, F5, K5, in[0], 15); ROUND(ee, aa, bb, cc, dd, F5, K5, in[5], 5); ROUND(dd, ee, aa, bb, cc, F5, K5, in[9], 11); ROUND(cc, dd, ee, aa, bb, F5, K5, in[7], 6); ROUND(bb, cc, dd, ee, aa, F5, K5, in[12], 8); ROUND(aa, bb, cc, dd, ee, F5, K5, in[2], 13); ROUND(ee, aa, bb, cc, dd, F5, K5, in[10], 12); ROUND(dd, ee, aa, bb, cc, F5, K5, in[14], 5); ROUND(cc, dd, ee, aa, bb, F5, K5, in[1], 12); ROUND(bb, cc, dd, ee, aa, F5, K5, in[3], 13); ROUND(aa, bb, cc, dd, ee, F5, K5, in[8], 14); ROUND(ee, aa, bb, cc, dd, F5, K5, in[11], 11); ROUND(dd, ee, aa, bb, cc, F5, K5, in[6], 8); ROUND(cc, dd, ee, aa, bb, F5, K5, in[15], 5); ROUND(bb, cc, dd, ee, aa, F5, K5, in[13], 6); /* round 1: right lane */ ROUND(aaa, bbb, ccc, ddd, eee, F5, KK1, in[5], 8); ROUND(eee, aaa, bbb, ccc, ddd, F5, KK1, in[14], 9); ROUND(ddd, eee, aaa, bbb, ccc, F5, KK1, in[7], 9); ROUND(ccc, ddd, eee, aaa, bbb, F5, KK1, in[0], 11); ROUND(bbb, ccc, ddd, eee, aaa, F5, KK1, in[9], 13); ROUND(aaa, bbb, ccc, ddd, eee, F5, KK1, in[2], 15); ROUND(eee, aaa, bbb, ccc, ddd, F5, KK1, in[11], 15); ROUND(ddd, eee, aaa, bbb, ccc, F5, KK1, in[4], 5); ROUND(ccc, ddd, eee, aaa, bbb, F5, KK1, in[13], 7); ROUND(bbb, ccc, ddd, eee, aaa, F5, KK1, in[6], 7); ROUND(aaa, bbb, ccc, ddd, eee, F5, KK1, in[15], 8); ROUND(eee, aaa, bbb, ccc, ddd, F5, KK1, in[8], 11); ROUND(ddd, eee, aaa, bbb, ccc, F5, KK1, in[1], 14); ROUND(ccc, ddd, eee, aaa, bbb, F5, KK1, in[10], 14); ROUND(bbb, ccc, ddd, eee, aaa, F5, KK1, in[3], 12); ROUND(aaa, bbb, ccc, ddd, eee, F5, KK1, in[12], 6); /* round 2: right lane */ ROUND(eee, aaa, bbb, ccc, ddd, F4, KK2, in[6], 9); ROUND(ddd, eee, aaa, bbb, ccc, F4, KK2, in[11], 13); ROUND(ccc, ddd, eee, aaa, bbb, F4, KK2, in[3], 15); ROUND(bbb, ccc, ddd, eee, aaa, F4, KK2, in[7], 7); ROUND(aaa, bbb, ccc, ddd, eee, F4, KK2, in[0], 12); ROUND(eee, aaa, bbb, ccc, ddd, F4, KK2, in[13], 8); ROUND(ddd, eee, aaa, bbb, ccc, F4, KK2, in[5], 9); ROUND(ccc, ddd, eee, aaa, bbb, F4, KK2, in[10], 11); ROUND(bbb, ccc, ddd, eee, aaa, F4, KK2, in[14], 7); ROUND(aaa, bbb, ccc, ddd, eee, F4, KK2, in[15], 7); ROUND(eee, aaa, bbb, ccc, ddd, F4, KK2, in[8], 12); ROUND(ddd, eee, aaa, bbb, ccc, F4, KK2, in[12], 7); ROUND(ccc, ddd, eee, aaa, bbb, F4, KK2, in[4], 6); ROUND(bbb, ccc, ddd, eee, aaa, F4, KK2, in[9], 15); ROUND(aaa, bbb, ccc, ddd, eee, F4, KK2, in[1], 13); ROUND(eee, aaa, bbb, ccc, ddd, F4, KK2, in[2], 11); /* round 3: right lane */ ROUND(ddd, eee, aaa, bbb, ccc, F3, KK3, in[15], 9); ROUND(ccc, ddd, eee, aaa, bbb, F3, KK3, in[5], 7); ROUND(bbb, ccc, ddd, eee, aaa, F3, KK3, in[1], 15); ROUND(aaa, bbb, ccc, ddd, eee, F3, KK3, in[3], 11); ROUND(eee, aaa, bbb, ccc, ddd, F3, KK3, in[7], 8); ROUND(ddd, eee, aaa, bbb, ccc, F3, KK3, in[14], 6); ROUND(ccc, ddd, eee, aaa, bbb, F3, KK3, in[6], 6); ROUND(bbb, ccc, ddd, eee, aaa, F3, KK3, in[9], 14); ROUND(aaa, bbb, ccc, ddd, eee, F3, KK3, in[11], 12); ROUND(eee, aaa, bbb, ccc, ddd, F3, KK3, in[8], 13); ROUND(ddd, eee, aaa, bbb, ccc, F3, KK3, in[12], 5); ROUND(ccc, ddd, eee, aaa, bbb, F3, KK3, in[2], 14); ROUND(bbb, ccc, ddd, eee, aaa, F3, KK3, in[10], 13); ROUND(aaa, bbb, ccc, ddd, eee, F3, KK3, in[0], 13); ROUND(eee, aaa, bbb, ccc, ddd, F3, KK3, in[4], 7); ROUND(ddd, eee, aaa, bbb, ccc, F3, KK3, in[13], 5); /* round 4: right lane */ ROUND(ccc, ddd, eee, aaa, bbb, F2, KK4, in[8], 15); ROUND(bbb, ccc, ddd, eee, aaa, F2, KK4, in[6], 5); ROUND(aaa, bbb, ccc, ddd, eee, F2, KK4, in[4], 8); ROUND(eee, aaa, bbb, ccc, ddd, F2, KK4, in[1], 11); ROUND(ddd, eee, aaa, bbb, ccc, F2, KK4, in[3], 14); ROUND(ccc, ddd, eee, aaa, bbb, F2, KK4, in[11], 14); ROUND(bbb, ccc, ddd, eee, aaa, F2, KK4, in[15], 6); ROUND(aaa, bbb, ccc, ddd, eee, F2, KK4, in[0], 14); ROUND(eee, aaa, bbb, ccc, ddd, F2, KK4, in[5], 6); ROUND(ddd, eee, aaa, bbb, ccc, F2, KK4, in[12], 9); ROUND(ccc, ddd, eee, aaa, bbb, F2, KK4, in[2], 12); ROUND(bbb, ccc, ddd, eee, aaa, F2, KK4, in[13], 9); ROUND(aaa, bbb, ccc, ddd, eee, F2, KK4, in[9], 12); ROUND(eee, aaa, bbb, ccc, ddd, F2, KK4, in[7], 5); ROUND(ddd, eee, aaa, bbb, ccc, F2, KK4, in[10], 15); ROUND(ccc, ddd, eee, aaa, bbb, F2, KK4, in[14], 8); /* round 5: right lane */ ROUND(bbb, ccc, ddd, eee, aaa, F1, KK5, in[12], 8); ROUND(aaa, bbb, ccc, ddd, eee, F1, KK5, in[15], 5); ROUND(eee, aaa, bbb, ccc, ddd, F1, KK5, in[10], 12); ROUND(ddd, eee, aaa, bbb, ccc, F1, KK5, in[4], 9); ROUND(ccc, ddd, eee, aaa, bbb, F1, KK5, in[1], 12); ROUND(bbb, ccc, ddd, eee, aaa, F1, KK5, in[5], 5); ROUND(aaa, bbb, ccc, ddd, eee, F1, KK5, in[8], 14); ROUND(eee, aaa, bbb, ccc, ddd, F1, KK5, in[7], 6); ROUND(ddd, eee, aaa, bbb, ccc, F1, KK5, in[6], 8); ROUND(ccc, ddd, eee, aaa, bbb, F1, KK5, in[2], 13); ROUND(bbb, ccc, ddd, eee, aaa, F1, KK5, in[13], 6); ROUND(aaa, bbb, ccc, ddd, eee, F1, KK5, in[14], 5); ROUND(eee, aaa, bbb, ccc, ddd, F1, KK5, in[0], 15); ROUND(ddd, eee, aaa, bbb, ccc, F1, KK5, in[3], 13); ROUND(ccc, ddd, eee, aaa, bbb, F1, KK5, in[9], 11); ROUND(bbb, ccc, ddd, eee, aaa, F1, KK5, in[11], 11); /* combine results */ ddd += cc + state[1]; /* final result for state[0] */ state[1] = state[2] + dd + eee; state[2] = state[3] + ee + aaa; state[3] = state[4] + aa + bbb; state[4] = state[0] + bb + ccc; state[0] = ddd; } static int rmd160_init(struct shash_desc *desc) { struct rmd160_ctx *rctx = shash_desc_ctx(desc); rctx->byte_count = 0; rctx->state[0] = RMD_H0; rctx->state[1] = RMD_H1; rctx->state[2] = RMD_H2; rctx->state[3] = RMD_H3; rctx->state[4] = RMD_H4; return 0; } static int rmd160_update(struct shash_desc *desc, const u8 *data, unsigned int len) { int remain = len - round_down(len, RMD160_BLOCK_SIZE); struct rmd160_ctx *rctx = shash_desc_ctx(desc); __le32 buffer[RMD160_BLOCK_SIZE / 4]; rctx->byte_count += len - remain; do { memcpy(buffer, data, sizeof(buffer)); rmd160_transform(rctx->state, buffer); data += sizeof(buffer); len -= sizeof(buffer); } while (len >= sizeof(buffer)); memzero_explicit(buffer, sizeof(buffer)); return remain; } /* Add padding and return the message digest. */ static int rmd160_finup(struct shash_desc *desc, const u8 *src, unsigned int len, u8 *out) { unsigned int bit_offset = RMD160_BLOCK_SIZE / 8 - 1; struct rmd160_ctx *rctx = shash_desc_ctx(desc); union { __le64 l64[RMD160_BLOCK_SIZE / 4]; __le32 l32[RMD160_BLOCK_SIZE / 2]; u8 u8[RMD160_BLOCK_SIZE * 2]; } block = {}; __le32 *dst = (__le32 *)out; u32 i; rctx->byte_count += len; if (len >= bit_offset * 8) bit_offset += RMD160_BLOCK_SIZE / 8; memcpy(&block, src, len); block.u8[len] = 0x80; block.l64[bit_offset] = cpu_to_le64(rctx->byte_count << 3); rmd160_transform(rctx->state, block.l32); if (bit_offset > RMD160_BLOCK_SIZE / 8) rmd160_transform(rctx->state, block.l32 + RMD160_BLOCK_SIZE / 4); memzero_explicit(&block, sizeof(block)); /* Store state in digest */ for (i = 0; i < 5; i++) dst[i] = cpu_to_le32p(&rctx->state[i]); return 0; } static struct shash_alg alg = { .digestsize = RMD160_DIGEST_SIZE, .init = rmd160_init, .update = rmd160_update, .finup = rmd160_finup, .descsize = sizeof(struct rmd160_ctx), .base = { .cra_name = "rmd160", .cra_driver_name = "rmd160-generic", .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, .cra_blocksize = RMD160_BLOCK_SIZE, .cra_module = THIS_MODULE, } }; static int __init rmd160_mod_init(void) { return crypto_register_shash(&alg); } static void __exit rmd160_mod_fini(void) { crypto_unregister_shash(&alg); } module_init(rmd160_mod_init); module_exit(rmd160_mod_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Adrian-Ken Rueegsegger <ken@codelabs.ch>"); MODULE_DESCRIPTION("RIPEMD-160 Message Digest"); MODULE_ALIAS_CRYPTO("rmd160"); |
| 58 58 58 57 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | // SPDX-License-Identifier: GPL-2.0-or-later /* mpihelp-sub.c - MPI helper functions * Copyright (C) 1994, 1996 Free Software Foundation, Inc. * Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. * * This file is part of GnuPG. * * Note: This code is heavily based on the GNU MP Library. * Actually it's the same code with only minor changes in the * way the data is stored; this is to support the abstraction * of an optional secure memory allocation which may be used * to avoid revealing of sensitive data due to paging etc. * The GNU MP Library itself is published under the LGPL; * however I decided to publish this code under the plain GPL. */ #include "mpi-internal.h" /**************** * Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE. * There are no restrictions on the relative sizes of * the two arguments. * Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2. */ int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size) { mpi_size_t i; mpi_limb_t op1_word, op2_word; for (i = size - 1; i >= 0; i--) { op1_word = op1_ptr[i]; op2_word = op2_ptr[i]; if (op1_word != op2_word) goto diff; } return 0; diff: /* This can *not* be simplified to * op2_word - op2_word * since that expression might give signed overflow. */ return (op1_word > op2_word) ? 1 : -1; } |
| 13 38 38 11 13 21 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * * Definitions for the SMC module (socket related) * * Copyright IBM Corp. 2016 * * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> */ #ifndef __SMC_H #define __SMC_H #include <linux/socket.h> #include <linux/types.h> #include <linux/compiler.h> /* __aligned */ #include <net/genetlink.h> #include <net/sock.h> #include "smc_ib.h" #define SMC_V1 1 /* SMC version V1 */ #define SMC_V2 2 /* SMC version V2 */ #define SMC_RELEASE_0 0 #define SMC_RELEASE_1 1 #define SMC_RELEASE SMC_RELEASE_1 /* the latest release version */ #define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */ #define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */ #define SMC_AUTOCORKING_DEFAULT_SIZE 0x10000 /* 64K by default */ extern struct proto smc_proto; extern struct proto smc_proto6; extern struct smc_hashinfo smc_v4_hashinfo; extern struct smc_hashinfo smc_v6_hashinfo; int smc_hash_sk(struct sock *sk); void smc_unhash_sk(struct sock *sk); void smc_release_cb(struct sock *sk); int smc_release(struct socket *sock); int smc_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); int smc_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags); int smc_accept(struct socket *sock, struct socket *new_sock, struct proto_accept_arg *arg); int smc_getname(struct socket *sock, struct sockaddr *addr, int peer); __poll_t smc_poll(struct file *file, struct socket *sock, poll_table *wait); int smc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int smc_listen(struct socket *sock, int backlog); int smc_shutdown(struct socket *sock, int how); int smc_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen); int smc_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen); int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len); int smc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); ssize_t smc_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); /* smc sock initialization */ void smc_sk_init(struct net *net, struct sock *sk, int protocol); /* clcsock initialization */ int smc_create_clcsk(struct net *net, struct sock *sk, int family); #ifdef ATOMIC64_INIT #define KERNEL_HAS_ATOMIC64 #endif enum smc_state { /* possible states of an SMC socket */ SMC_ACTIVE = 1, SMC_INIT = 2, SMC_CLOSED = 7, SMC_LISTEN = 10, /* normal close */ SMC_PEERCLOSEWAIT1 = 20, SMC_PEERCLOSEWAIT2 = 21, SMC_APPFINCLOSEWAIT = 24, SMC_APPCLOSEWAIT1 = 22, SMC_APPCLOSEWAIT2 = 23, SMC_PEERFINCLOSEWAIT = 25, /* abnormal close */ SMC_PEERABORTWAIT = 26, SMC_PROCESSABORT = 27, }; enum smc_supplemental_features { SMC_SPF_EMULATED_ISM_DEV = 0, }; #define SMC_FEATURE_MASK \ (BIT(SMC_SPF_EMULATED_ISM_DEV)) struct smc_link_group; struct smc_wr_rx_hdr { /* common prefix part of LLC and CDC to demultiplex */ union { u8 type; #if defined(__BIG_ENDIAN_BITFIELD) struct { u8 llc_version:4, llc_type:4; }; #elif defined(__LITTLE_ENDIAN_BITFIELD) struct { u8 llc_type:4, llc_version:4; }; #endif }; } __aligned(1); struct smc_cdc_conn_state_flags { #if defined(__BIG_ENDIAN_BITFIELD) u8 peer_done_writing : 1; /* Sending done indicator */ u8 peer_conn_closed : 1; /* Peer connection closed indicator */ u8 peer_conn_abort : 1; /* Abnormal close indicator */ u8 reserved : 5; #elif defined(__LITTLE_ENDIAN_BITFIELD) u8 reserved : 5; u8 peer_conn_abort : 1; u8 peer_conn_closed : 1; u8 peer_done_writing : 1; #endif }; struct smc_cdc_producer_flags { #if defined(__BIG_ENDIAN_BITFIELD) u8 write_blocked : 1; /* Writing Blocked, no rx buf space */ u8 urg_data_pending : 1; /* Urgent Data Pending */ u8 urg_data_present : 1; /* Urgent Data Present */ u8 cons_curs_upd_req : 1; /* cursor update requested */ u8 failover_validation : 1;/* message replay due to failover */ u8 reserved : 3; #elif defined(__LITTLE_ENDIAN_BITFIELD) u8 reserved : 3; u8 failover_validation : 1; u8 cons_curs_upd_req : 1; u8 urg_data_present : 1; u8 urg_data_pending : 1; u8 write_blocked : 1; #endif }; /* in host byte order */ union smc_host_cursor { /* SMC cursor - an offset in an RMBE */ struct { u16 reserved; u16 wrap; /* window wrap sequence number */ u32 count; /* cursor (= offset) part */ }; #ifdef KERNEL_HAS_ATOMIC64 atomic64_t acurs; /* for atomic processing */ #else u64 acurs; /* for atomic processing */ #endif } __aligned(8); /* in host byte order, except for flag bitfields in network byte order */ struct smc_host_cdc_msg { /* Connection Data Control message */ struct smc_wr_rx_hdr common; /* .type = 0xFE */ u8 len; /* length = 44 */ u16 seqno; /* connection seq # */ u32 token; /* alert_token */ union smc_host_cursor prod; /* producer cursor */ union smc_host_cursor cons; /* consumer cursor, * piggy backed "ack" */ struct smc_cdc_producer_flags prod_flags; /* conn. tx/rx status */ struct smc_cdc_conn_state_flags conn_state_flags; /* peer conn. status*/ u8 reserved[18]; } __aligned(8); enum smc_urg_state { SMC_URG_VALID = 1, /* data present */ SMC_URG_NOTYET = 2, /* data pending */ SMC_URG_READ = 3, /* data was already read */ }; struct smc_mark_woken { bool woken; void *key; wait_queue_entry_t wait_entry; }; struct smc_connection { struct rb_node alert_node; struct smc_link_group *lgr; /* link group of connection */ struct smc_link *lnk; /* assigned SMC-R link */ u32 alert_token_local; /* unique conn. id */ u8 peer_rmbe_idx; /* from tcp handshake */ int peer_rmbe_size; /* size of peer rx buffer */ atomic_t peer_rmbe_space;/* remaining free bytes in peer * rmbe */ int rtoken_idx; /* idx to peer RMB rkey/addr */ struct smc_buf_desc *sndbuf_desc; /* send buffer descriptor */ struct smc_buf_desc *rmb_desc; /* RMBE descriptor */ int rmbe_size_comp; /* compressed notation */ int rmbe_update_limit; /* lower limit for consumer * cursor update */ struct smc_host_cdc_msg local_tx_ctrl; /* host byte order staging * buffer for CDC msg send * .prod cf. TCP snd_nxt * .cons cf. TCP sends ack */ union smc_host_cursor local_tx_ctrl_fin; /* prod crsr - confirmed by peer */ union smc_host_cursor tx_curs_prep; /* tx - prepared data * snd_max..wmem_alloc */ union smc_host_cursor tx_curs_sent; /* tx - sent data * snd_nxt ? */ union smc_host_cursor tx_curs_fin; /* tx - confirmed by peer * snd-wnd-begin ? */ atomic_t sndbuf_space; /* remaining space in sndbuf */ u16 tx_cdc_seq; /* sequence # for CDC send */ u16 tx_cdc_seq_fin; /* sequence # - tx completed */ spinlock_t send_lock; /* protect wr_sends */ atomic_t cdc_pend_tx_wr; /* number of pending tx CDC wqe * - inc when post wqe, * - dec on polled tx cqe */ wait_queue_head_t cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/ struct delayed_work tx_work; /* retry of smc_cdc_msg_send */ u32 tx_off; /* base offset in peer rmb */ struct smc_host_cdc_msg local_rx_ctrl; /* filled during event_handl. * .prod cf. TCP rcv_nxt * .cons cf. TCP snd_una */ union smc_host_cursor rx_curs_confirmed; /* confirmed to peer * source of snd_una ? */ union smc_host_cursor urg_curs; /* points at urgent byte */ enum smc_urg_state urg_state; bool urg_tx_pend; /* urgent data staged */ bool urg_rx_skip_pend; /* indicate urgent oob data * read, but previous regular * data still pending */ char urg_rx_byte; /* urgent byte */ bool tx_in_release_sock; /* flush pending tx data in * sock release_cb() */ atomic_t bytes_to_rcv; /* arrived data, * not yet received */ atomic_t splice_pending; /* number of spliced bytes * pending processing */ #ifndef KERNEL_HAS_ATOMIC64 spinlock_t acurs_lock; /* protect cursors */ #endif struct work_struct close_work; /* peer sent some closing */ struct work_struct abort_work; /* abort the connection */ struct tasklet_struct rx_tsklet; /* Receiver tasklet for SMC-D */ u8 rx_off; /* receive offset: * 0 for SMC-R, 32 for SMC-D */ u64 peer_token; /* SMC-D token of peer */ u8 killed : 1; /* abnormal termination */ u8 freed : 1; /* normal termination */ u8 out_of_sync : 1; /* out of sync with peer */ }; struct smc_sock { /* smc sock container */ struct sock sk; #if IS_ENABLED(CONFIG_IPV6) struct ipv6_pinfo *pinet6; #endif struct socket *clcsock; /* internal tcp socket */ void (*clcsk_state_change)(struct sock *sk); /* original stat_change fct. */ void (*clcsk_data_ready)(struct sock *sk); /* original data_ready fct. */ void (*clcsk_write_space)(struct sock *sk); /* original write_space fct. */ void (*clcsk_error_report)(struct sock *sk); /* original error_report fct. */ struct smc_connection conn; /* smc connection */ struct smc_sock *listen_smc; /* listen parent */ struct work_struct connect_work; /* handle non-blocking connect*/ struct work_struct tcp_listen_work;/* handle tcp socket accepts */ struct work_struct smc_listen_work;/* prepare new accept socket */ struct list_head accept_q; /* sockets to be accepted */ spinlock_t accept_q_lock; /* protects accept_q */ bool limit_smc_hs; /* put constraint on handshake */ bool use_fallback; /* fallback to tcp */ int fallback_rsn; /* reason for fallback */ u32 peer_diagnosis; /* decline reason from peer */ atomic_t queued_smc_hs; /* queued smc handshakes */ struct inet_connection_sock_af_ops af_ops; const struct inet_connection_sock_af_ops *ori_af_ops; /* original af ops */ int sockopt_defer_accept; /* sockopt TCP_DEFER_ACCEPT * value */ u8 wait_close_tx_prepared : 1; /* shutdown wr or close * started, waiting for unsent * data to be sent */ u8 connect_nonblock : 1; /* non-blocking connect in * flight */ struct mutex clcsock_release_lock; /* protects clcsock of a listen * socket * */ }; #define smc_sk(ptr) container_of_const(ptr, struct smc_sock, sk) static inline void smc_init_saved_callbacks(struct smc_sock *smc) { smc->clcsk_state_change = NULL; smc->clcsk_data_ready = NULL; smc->clcsk_write_space = NULL; smc->clcsk_error_report = NULL; } static inline struct smc_sock *smc_clcsock_user_data(const struct sock *clcsk) { return (struct smc_sock *) ((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY); } /* save target_cb in saved_cb, and replace target_cb with new_cb */ static inline void smc_clcsock_replace_cb(void (**target_cb)(struct sock *), void (*new_cb)(struct sock *), void (**saved_cb)(struct sock *)) { /* only save once */ if (!*saved_cb) *saved_cb = *target_cb; *target_cb = new_cb; } /* restore target_cb to saved_cb, and reset saved_cb to NULL */ static inline void smc_clcsock_restore_cb(void (**target_cb)(struct sock *), void (**saved_cb)(struct sock *)) { if (!*saved_cb) return; *target_cb = *saved_cb; *saved_cb = NULL; } extern struct workqueue_struct *smc_hs_wq; /* wq for handshake work */ extern struct workqueue_struct *smc_close_wq; /* wq for close work */ #define SMC_SYSTEMID_LEN 8 extern u8 local_systemid[SMC_SYSTEMID_LEN]; /* unique system identifier */ #define ntohll(x) be64_to_cpu(x) #define htonll(x) cpu_to_be64(x) /* convert an u32 value into network byte order, store it into a 3 byte field */ static inline void hton24(u8 *net, u32 host) { __be32 t; t = cpu_to_be32(host); memcpy(net, ((u8 *)&t) + 1, 3); } /* convert a received 3 byte field into host byte order*/ static inline u32 ntoh24(u8 *net) { __be32 t = 0; memcpy(((u8 *)&t) + 1, net, 3); return be32_to_cpu(t); } #ifdef CONFIG_XFRM static inline bool using_ipsec(struct smc_sock *smc) { return (smc->clcsock->sk->sk_policy[0] || smc->clcsock->sk->sk_policy[1]) ? true : false; } #else static inline bool using_ipsec(struct smc_sock *smc) { return false; } #endif struct smc_gidlist; struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock); void smc_close_non_accepted(struct sock *sk); void smc_fill_gid_list(struct smc_link_group *lgr, struct smc_gidlist *gidlist, struct smc_ib_device *known_dev, u8 *known_gid); /* smc handshake limitation interface for netlink */ int smc_nl_dump_hs_limitation(struct sk_buff *skb, struct netlink_callback *cb); int smc_nl_enable_hs_limitation(struct sk_buff *skb, struct genl_info *info); int smc_nl_disable_hs_limitation(struct sk_buff *skb, struct genl_info *info); static inline void smc_sock_set_flag(struct sock *sk, enum sock_flags flag) { set_bit(flag, &sk->sk_flags); } #endif /* __SMC_H */ |
| 134 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 | // SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2019 Microsoft Corporation * * Author: Lakshmi Ramasubramanian (nramas@linux.microsoft.com) * * File: ima_asymmetric_keys.c * Defines an IMA hook to measure asymmetric keys on key * create or update. */ #include <keys/asymmetric-type.h> #include <linux/user_namespace.h> #include <linux/ima.h> #include "ima.h" /** * ima_post_key_create_or_update - measure asymmetric keys * @keyring: keyring to which the key is linked to * @key: created or updated key * @payload: The data used to instantiate or update the key. * @payload_len: The length of @payload. * @flags: key flags * @create: flag indicating whether the key was created or updated * * Keys can only be measured, not appraised. * The payload data used to instantiate or update the key is measured. */ void ima_post_key_create_or_update(struct key *keyring, struct key *key, const void *payload, size_t payload_len, unsigned long flags, bool create) { bool queued = false; /* Only asymmetric keys are handled by this hook. */ if (key->type != &key_type_asymmetric) return; if (!payload || (payload_len == 0)) return; if (ima_should_queue_key()) queued = ima_queue_key(keyring, payload, payload_len); if (queued) return; /* * keyring->description points to the name of the keyring * (such as ".builtin_trusted_keys", ".ima", etc.) to * which the given key is linked to. * * The name of the keyring is passed in the "eventname" * parameter to process_buffer_measurement() and is set * in the "eventname" field in ima_event_data for * the key measurement IMA event. * * The name of the keyring is also passed in the "keyring" * parameter to process_buffer_measurement() to check * if the IMA policy is configured to measure a key linked * to the given keyring. */ process_buffer_measurement(&nop_mnt_idmap, NULL, payload, payload_len, keyring->description, KEY_CHECK, 0, keyring->description, false, NULL, 0); } |
| 86 122 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_CONNTRACK_SEQADJ_H #define _NF_CONNTRACK_SEQADJ_H #include <net/netfilter/nf_conntrack_extend.h> /** * struct nf_ct_seqadj - sequence number adjustment information * * @correction_pos: position of the last TCP sequence number modification * @offset_before: sequence number offset before last modification * @offset_after: sequence number offset after last modification */ struct nf_ct_seqadj { u32 correction_pos; s32 offset_before; s32 offset_after; }; struct nf_conn_seqadj { struct nf_ct_seqadj seq[IP_CT_DIR_MAX]; }; static inline struct nf_conn_seqadj *nfct_seqadj(const struct nf_conn *ct) { return nf_ct_ext_find(ct, NF_CT_EXT_SEQADJ); } static inline struct nf_conn_seqadj *nfct_seqadj_ext_add(struct nf_conn *ct) { return nf_ct_ext_add(ct, NF_CT_EXT_SEQADJ, GFP_ATOMIC); } int nf_ct_seqadj_init(struct nf_conn *ct, enum ip_conntrack_info ctinfo, s32 off); int nf_ct_seqadj_set(struct nf_conn *ct, enum ip_conntrack_info ctinfo, __be32 seq, s32 off); void nf_ct_tcp_seqadj_set(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, s32 off); int nf_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int protoff); s32 nf_ct_seq_offset(const struct nf_conn *ct, enum ip_conntrack_dir, u32 seq); #endif /* _NF_CONNTRACK_SEQADJ_H */ |
| 6 6 6 55 54 47 46 13 46 44 41 40 6 6 6 6 6 6 6 6 6 6 6 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 | // SPDX-License-Identifier: GPL-2.0 /* * fs/partitions/atari.c * * Code extracted from drivers/block/genhd.c * * Copyright (C) 1991-1998 Linus Torvalds * Re-organised Feb 1998 Russell King */ #include <linux/ctype.h> #include "check.h" #include "atari.h" /* ++guenther: this should be settable by the user ("make config")?. */ #define ICD_PARTS /* check if a partition entry looks valid -- Atari format is assumed if at least one of the primary entries is ok this way */ #define VALID_PARTITION(pi,hdsiz) \ (((pi)->flg & 1) && \ isalnum((pi)->id[0]) && isalnum((pi)->id[1]) && isalnum((pi)->id[2]) && \ be32_to_cpu((pi)->st) <= (hdsiz) && \ be32_to_cpu((pi)->st) + be32_to_cpu((pi)->siz) <= (hdsiz)) static inline int OK_id(char *s) { return memcmp (s, "GEM", 3) == 0 || memcmp (s, "BGM", 3) == 0 || memcmp (s, "LNX", 3) == 0 || memcmp (s, "SWP", 3) == 0 || memcmp (s, "RAW", 3) == 0 ; } int atari_partition(struct parsed_partitions *state) { Sector sect; struct rootsector *rs; struct partition_info *pi; u32 extensect; u32 hd_size; int slot; #ifdef ICD_PARTS int part_fmt = 0; /* 0:unknown, 1:AHDI, 2:ICD/Supra */ #endif /* * ATARI partition scheme supports 512 lba only. If this is not * the case, bail early to avoid miscalculating hd_size. */ if (queue_logical_block_size(state->disk->queue) != 512) return 0; rs = read_part_sector(state, 0, §); if (!rs) return -1; /* Verify this is an Atari rootsector: */ hd_size = get_capacity(state->disk); if (!VALID_PARTITION(&rs->part[0], hd_size) && !VALID_PARTITION(&rs->part[1], hd_size) && !VALID_PARTITION(&rs->part[2], hd_size) && !VALID_PARTITION(&rs->part[3], hd_size)) { /* * if there's no valid primary partition, assume that no Atari * format partition table (there's no reliable magic or the like * :-() */ put_dev_sector(sect); return 0; } pi = &rs->part[0]; strlcat(state->pp_buf, " AHDI", PAGE_SIZE); for (slot = 1; pi < &rs->part[4] && slot < state->limit; slot++, pi++) { struct rootsector *xrs; Sector sect2; ulong partsect; if ( !(pi->flg & 1) ) continue; /* active partition */ if (memcmp (pi->id, "XGM", 3) != 0) { /* we don't care about other id's */ put_partition (state, slot, be32_to_cpu(pi->st), be32_to_cpu(pi->siz)); continue; } /* extension partition */ #ifdef ICD_PARTS part_fmt = 1; #endif strlcat(state->pp_buf, " XGM<", PAGE_SIZE); partsect = extensect = be32_to_cpu(pi->st); while (1) { xrs = read_part_sector(state, partsect, §2); if (!xrs) { printk (" block %ld read failed\n", partsect); put_dev_sector(sect); return -1; } /* ++roman: sanity check: bit 0 of flg field must be set */ if (!(xrs->part[0].flg & 1)) { printk( "\nFirst sub-partition in extended partition is not valid!\n" ); put_dev_sector(sect2); break; } put_partition(state, slot, partsect + be32_to_cpu(xrs->part[0].st), be32_to_cpu(xrs->part[0].siz)); if (!(xrs->part[1].flg & 1)) { /* end of linked partition list */ put_dev_sector(sect2); break; } if (memcmp( xrs->part[1].id, "XGM", 3 ) != 0) { printk("\nID of extended partition is not XGM!\n"); put_dev_sector(sect2); break; } partsect = be32_to_cpu(xrs->part[1].st) + extensect; put_dev_sector(sect2); if (++slot == state->limit) { printk( "\nMaximum number of partitions reached!\n" ); break; } } strlcat(state->pp_buf, " >", PAGE_SIZE); } #ifdef ICD_PARTS if ( part_fmt!=1 ) { /* no extended partitions -> test ICD-format */ pi = &rs->icdpart[0]; /* sanity check: no ICD format if first partition invalid */ if (OK_id(pi->id)) { strlcat(state->pp_buf, " ICD<", PAGE_SIZE); for (; pi < &rs->icdpart[8] && slot < state->limit; slot++, pi++) { /* accept only GEM,BGM,RAW,LNX,SWP partitions */ if (!((pi->flg & 1) && OK_id(pi->id))) continue; put_partition (state, slot, be32_to_cpu(pi->st), be32_to_cpu(pi->siz)); } strlcat(state->pp_buf, " >", PAGE_SIZE); } } #endif put_dev_sector(sect); strlcat(state->pp_buf, "\n", PAGE_SIZE); return 1; } |
| 90 2785 2294 2291 2294 2289 2294 2780 2294 1259 1259 1259 1878 1879 1874 1789 1786 1788 1788 174 6694 312 307 312 11 11 10 11 6694 6673 6693 2066 2065 11 11 11 11 11 6 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 | // SPDX-License-Identifier: GPL-2.0 #define CREATE_TRACE_POINTS #include <trace/events/mmap_lock.h> #include <linux/mm.h> #include <linux/cgroup.h> #include <linux/memcontrol.h> #include <linux/mmap_lock.h> #include <linux/mutex.h> #include <linux/percpu.h> #include <linux/rcupdate.h> #include <linux/smp.h> #include <linux/trace_events.h> #include <linux/local_lock.h> EXPORT_TRACEPOINT_SYMBOL(mmap_lock_start_locking); EXPORT_TRACEPOINT_SYMBOL(mmap_lock_acquire_returned); EXPORT_TRACEPOINT_SYMBOL(mmap_lock_released); #ifdef CONFIG_TRACING /* * Trace calls must be in a separate file, as otherwise there's a circular * dependency between linux/mmap_lock.h and trace/events/mmap_lock.h. */ void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write) { trace_mmap_lock_start_locking(mm, write); } EXPORT_SYMBOL(__mmap_lock_do_trace_start_locking); void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write, bool success) { trace_mmap_lock_acquire_returned(mm, write, success); } EXPORT_SYMBOL(__mmap_lock_do_trace_acquire_returned); void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write) { trace_mmap_lock_released(mm, write); } EXPORT_SYMBOL(__mmap_lock_do_trace_released); #endif /* CONFIG_TRACING */ #ifdef CONFIG_MMU #ifdef CONFIG_PER_VMA_LOCK static inline bool __vma_enter_locked(struct vm_area_struct *vma, bool detaching) { unsigned int tgt_refcnt = VMA_LOCK_OFFSET; /* Additional refcnt if the vma is attached. */ if (!detaching) tgt_refcnt++; /* * If vma is detached then only vma_mark_attached() can raise the * vm_refcnt. mmap_write_lock prevents racing with vma_mark_attached(). */ if (!refcount_add_not_zero(VMA_LOCK_OFFSET, &vma->vm_refcnt)) return false; rwsem_acquire(&vma->vmlock_dep_map, 0, 0, _RET_IP_); rcuwait_wait_event(&vma->vm_mm->vma_writer_wait, refcount_read(&vma->vm_refcnt) == tgt_refcnt, TASK_UNINTERRUPTIBLE); lock_acquired(&vma->vmlock_dep_map, _RET_IP_); return true; } static inline void __vma_exit_locked(struct vm_area_struct *vma, bool *detached) { *detached = refcount_sub_and_test(VMA_LOCK_OFFSET, &vma->vm_refcnt); rwsem_release(&vma->vmlock_dep_map, _RET_IP_); } void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq) { bool locked; /* * __vma_enter_locked() returns false immediately if the vma is not * attached, otherwise it waits until refcnt is indicating that vma * is attached with no readers. */ locked = __vma_enter_locked(vma, false); /* * We should use WRITE_ONCE() here because we can have concurrent reads * from the early lockless pessimistic check in vma_start_read(). * We don't really care about the correctness of that early check, but * we should use WRITE_ONCE() for cleanliness and to keep KCSAN happy. */ WRITE_ONCE(vma->vm_lock_seq, mm_lock_seq); if (locked) { bool detached; __vma_exit_locked(vma, &detached); WARN_ON_ONCE(detached); /* vma should remain attached */ } } EXPORT_SYMBOL_GPL(__vma_start_write); void vma_mark_detached(struct vm_area_struct *vma) { vma_assert_write_locked(vma); vma_assert_attached(vma); /* * We are the only writer, so no need to use vma_refcount_put(). * The condition below is unlikely because the vma has been already * write-locked and readers can increment vm_refcnt only temporarily * before they check vm_lock_seq, realize the vma is locked and drop * back the vm_refcnt. That is a narrow window for observing a raised * vm_refcnt. */ if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) { /* Wait until vma is detached with no readers. */ if (__vma_enter_locked(vma, true)) { bool detached; __vma_exit_locked(vma, &detached); WARN_ON_ONCE(!detached); } } } /* * Lookup and lock a VMA under RCU protection. Returned VMA is guaranteed to be * stable and not isolated. If the VMA is not found or is being modified the * function returns NULL. */ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, unsigned long address) { MA_STATE(mas, &mm->mm_mt, address, address); struct vm_area_struct *vma; rcu_read_lock(); retry: vma = mas_walk(&mas); if (!vma) goto inval; vma = vma_start_read(mm, vma); if (IS_ERR_OR_NULL(vma)) { /* Check if the VMA got isolated after we found it */ if (PTR_ERR(vma) == -EAGAIN) { count_vm_vma_lock_event(VMA_LOCK_MISS); /* The area was replaced with another one */ goto retry; } /* Failed to lock the VMA */ goto inval; } /* * At this point, we have a stable reference to a VMA: The VMA is * locked and we know it hasn't already been isolated. * From here on, we can access the VMA without worrying about which * fields are accessible for RCU readers. */ /* Check if the vma we locked is the right one. */ if (unlikely(vma->vm_mm != mm || address < vma->vm_start || address >= vma->vm_end)) goto inval_end_read; rcu_read_unlock(); return vma; inval_end_read: vma_end_read(vma); inval: rcu_read_unlock(); count_vm_vma_lock_event(VMA_LOCK_ABORT); return NULL; } #endif /* CONFIG_PER_VMA_LOCK */ #ifdef CONFIG_LOCK_MM_AND_FIND_VMA #include <linux/extable.h> static inline bool get_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs) { if (likely(mmap_read_trylock(mm))) return true; if (regs && !user_mode(regs)) { unsigned long ip = exception_ip(regs); if (!search_exception_tables(ip)) return false; } return !mmap_read_lock_killable(mm); } static inline bool mmap_upgrade_trylock(struct mm_struct *mm) { /* * We don't have this operation yet. * * It should be easy enough to do: it's basically a * atomic_long_try_cmpxchg_acquire() * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but * it also needs the proper lockdep magic etc. */ return false; } static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs) { mmap_read_unlock(mm); if (regs && !user_mode(regs)) { unsigned long ip = exception_ip(regs); if (!search_exception_tables(ip)) return false; } return !mmap_write_lock_killable(mm); } /* * Helper for page fault handling. * * This is kind of equivalent to "mmap_read_lock()" followed * by "find_extend_vma()", except it's a lot more careful about * the locking (and will drop the lock on failure). * * For example, if we have a kernel bug that causes a page * fault, we don't want to just use mmap_read_lock() to get * the mm lock, because that would deadlock if the bug were * to happen while we're holding the mm lock for writing. * * So this checks the exception tables on kernel faults in * order to only do this all for instructions that are actually * expected to fault. * * We can also actually take the mm lock for writing if we * need to extend the vma, which helps the VM layer a lot. */ struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm, unsigned long addr, struct pt_regs *regs) { struct vm_area_struct *vma; if (!get_mmap_lock_carefully(mm, regs)) return NULL; vma = find_vma(mm, addr); if (likely(vma && (vma->vm_start <= addr))) return vma; /* * Well, dang. We might still be successful, but only * if we can extend a vma to do so. */ if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) { mmap_read_unlock(mm); return NULL; } /* * We can try to upgrade the mmap lock atomically, * in which case we can continue to use the vma * we already looked up. * * Otherwise we'll have to drop the mmap lock and * re-take it, and also look up the vma again, * re-checking it. */ if (!mmap_upgrade_trylock(mm)) { if (!upgrade_mmap_lock_carefully(mm, regs)) return NULL; vma = find_vma(mm, addr); if (!vma) goto fail; if (vma->vm_start <= addr) goto success; if (!(vma->vm_flags & VM_GROWSDOWN)) goto fail; } if (expand_stack_locked(vma, addr)) goto fail; success: mmap_write_downgrade(mm); return vma; fail: mmap_write_unlock(mm); return NULL; } #endif /* CONFIG_LOCK_MM_AND_FIND_VMA */ #else /* CONFIG_MMU */ /* * At least xtensa ends up having protection faults even with no * MMU.. No stack expansion, at least. */ struct vm_area_struct *lock_mm_and_find_vma(struct mm_struct *mm, unsigned long addr, struct pt_regs *regs) { struct vm_area_struct *vma; mmap_read_lock(mm); vma = vma_lookup(mm, addr); if (!vma) mmap_read_unlock(mm); return vma; } #endif /* CONFIG_MMU */ |
| 1 65 39 10 10 10 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * include/linux/if_team.h - Network team device driver header * Copyright (c) 2011 Jiri Pirko <jpirko@redhat.com> */ #ifndef _LINUX_IF_TEAM_H_ #define _LINUX_IF_TEAM_H_ #include <linux/netpoll.h> #include <net/sch_generic.h> #include <linux/types.h> #include <uapi/linux/if_team.h> struct team_pcpu_stats { u64_stats_t rx_packets; u64_stats_t rx_bytes; u64_stats_t rx_multicast; u64_stats_t tx_packets; u64_stats_t tx_bytes; struct u64_stats_sync syncp; u32 rx_dropped; u32 tx_dropped; u32 rx_nohandler; }; struct team; struct team_port { struct net_device *dev; struct hlist_node hlist; /* node in enabled ports hash list */ struct list_head list; /* node in ordinary list */ struct team *team; int index; /* index of enabled port. If disabled, it's set to -1 */ bool linkup; /* either state.linkup or user.linkup */ struct { bool linkup; u32 speed; u8 duplex; } state; /* Values set by userspace */ struct { bool linkup; bool linkup_enabled; } user; /* Custom gennetlink interface related flags */ bool changed; bool removed; /* * A place for storing original values of the device before it * become a port. */ struct { unsigned char dev_addr[MAX_ADDR_LEN]; unsigned int mtu; } orig; #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *np; #endif s32 priority; /* lower number ~ higher priority */ u16 queue_id; struct list_head qom_list; /* node in queue override mapping list */ struct rcu_head rcu; long mode_priv[]; }; static inline struct team_port *team_port_get_rcu(const struct net_device *dev) { return rcu_dereference(dev->rx_handler_data); } static inline bool team_port_enabled(struct team_port *port) { return port->index != -1; } static inline bool team_port_txable(struct team_port *port) { return port->linkup && team_port_enabled(port); } static inline bool team_port_dev_txable(const struct net_device *port_dev) { struct team_port *port; bool txable; rcu_read_lock(); port = team_port_get_rcu(port_dev); txable = port ? team_port_txable(port) : false; rcu_read_unlock(); return txable; } #ifdef CONFIG_NET_POLL_CONTROLLER static inline void team_netpoll_send_skb(struct team_port *port, struct sk_buff *skb) { netpoll_send_skb(port->np, skb); } #else static inline void team_netpoll_send_skb(struct team_port *port, struct sk_buff *skb) { } #endif struct team_mode_ops { int (*init)(struct team *team); void (*exit)(struct team *team); rx_handler_result_t (*receive)(struct team *team, struct team_port *port, struct sk_buff *skb); bool (*transmit)(struct team *team, struct sk_buff *skb); int (*port_enter)(struct team *team, struct team_port *port); void (*port_leave)(struct team *team, struct team_port *port); void (*port_change_dev_addr)(struct team *team, struct team_port *port); void (*port_enabled)(struct team *team, struct team_port *port); void (*port_disabled)(struct team *team, struct team_port *port); }; extern int team_modeop_port_enter(struct team *team, struct team_port *port); extern void team_modeop_port_change_dev_addr(struct team *team, struct team_port *port); enum team_option_type { TEAM_OPTION_TYPE_U32, TEAM_OPTION_TYPE_STRING, TEAM_OPTION_TYPE_BINARY, TEAM_OPTION_TYPE_BOOL, TEAM_OPTION_TYPE_S32, }; struct team_option_inst_info { u32 array_index; struct team_port *port; /* != NULL if per-port */ }; struct team_gsetter_ctx { union { u32 u32_val; const char *str_val; struct { const void *ptr; u32 len; } bin_val; bool bool_val; s32 s32_val; } data; struct team_option_inst_info *info; }; struct team_option { struct list_head list; const char *name; bool per_port; unsigned int array_size; /* != 0 means the option is array */ enum team_option_type type; void (*init)(struct team *team, struct team_option_inst_info *info); void (*getter)(struct team *team, struct team_gsetter_ctx *ctx); int (*setter)(struct team *team, struct team_gsetter_ctx *ctx); }; extern void team_option_inst_set_change(struct team_option_inst_info *opt_inst_info); extern void team_options_change_check(struct team *team); struct team_mode { const char *kind; struct module *owner; size_t priv_size; size_t port_priv_size; const struct team_mode_ops *ops; enum netdev_lag_tx_type lag_tx_type; }; #define TEAM_PORT_HASHBITS 4 #define TEAM_PORT_HASHENTRIES (1 << TEAM_PORT_HASHBITS) #define TEAM_MODE_PRIV_LONGS 4 #define TEAM_MODE_PRIV_SIZE (sizeof(long) * TEAM_MODE_PRIV_LONGS) struct team { struct net_device *dev; /* associated netdevice */ struct team_pcpu_stats __percpu *pcpu_stats; const struct header_ops *header_ops_cache; struct mutex lock; /* used for overall locking, e.g. port lists write */ /* * List of enabled ports and their count */ int en_port_count; struct hlist_head en_port_hlist[TEAM_PORT_HASHENTRIES]; struct list_head port_list; /* list of all ports */ struct list_head option_list; struct list_head option_inst_list; /* list of option instances */ const struct team_mode *mode; struct team_mode_ops ops; bool user_carrier_enabled; bool queue_override_enabled; struct list_head *qom_lists; /* array of queue override mapping lists */ bool port_mtu_change_allowed; bool notifier_ctx; struct { unsigned int count; unsigned int interval; /* in ms */ atomic_t count_pending; struct delayed_work dw; } notify_peers; struct { unsigned int count; unsigned int interval; /* in ms */ atomic_t count_pending; struct delayed_work dw; } mcast_rejoin; struct lock_class_key team_lock_key; long mode_priv[TEAM_MODE_PRIV_LONGS]; }; static inline int team_dev_queue_xmit(struct team *team, struct team_port *port, struct sk_buff *skb) { BUILD_BUG_ON(sizeof(skb->queue_mapping) != sizeof(qdisc_skb_cb(skb)->slave_dev_queue_mapping)); skb_set_queue_mapping(skb, qdisc_skb_cb(skb)->slave_dev_queue_mapping); skb->dev = port->dev; if (unlikely(netpoll_tx_running(team->dev))) { team_netpoll_send_skb(port, skb); return 0; } return dev_queue_xmit(skb); } static inline struct hlist_head *team_port_index_hash(struct team *team, int port_index) { return &team->en_port_hlist[port_index & (TEAM_PORT_HASHENTRIES - 1)]; } static inline struct team_port *team_get_port_by_index(struct team *team, int port_index) { struct team_port *port; struct hlist_head *head = team_port_index_hash(team, port_index); hlist_for_each_entry(port, head, hlist) if (port->index == port_index) return port; return NULL; } static inline int team_num_to_port_index(struct team *team, unsigned int num) { int en_port_count = READ_ONCE(team->en_port_count); if (unlikely(!en_port_count)) return 0; return num % en_port_count; } static inline struct team_port *team_get_port_by_index_rcu(struct team *team, int port_index) { struct team_port *port; struct hlist_head *head = team_port_index_hash(team, port_index); hlist_for_each_entry_rcu(port, head, hlist) if (port->index == port_index) return port; return NULL; } static inline struct team_port * team_get_first_port_txable_rcu(struct team *team, struct team_port *port) { struct team_port *cur; if (likely(team_port_txable(port))) return port; cur = port; list_for_each_entry_continue_rcu(cur, &team->port_list, list) if (team_port_txable(cur)) return cur; list_for_each_entry_rcu(cur, &team->port_list, list) { if (cur == port) break; if (team_port_txable(cur)) return cur; } return NULL; } extern int team_options_register(struct team *team, const struct team_option *option, size_t option_count); extern void team_options_unregister(struct team *team, const struct team_option *option, size_t option_count); extern int team_mode_register(const struct team_mode *mode); extern void team_mode_unregister(const struct team_mode *mode); #define TEAM_DEFAULT_NUM_TX_QUEUES 16 #define TEAM_DEFAULT_NUM_RX_QUEUES 16 #define MODULE_ALIAS_TEAM_MODE(kind) MODULE_ALIAS("team-mode-" kind) #endif /* _LINUX_IF_TEAM_H_ */ |
| 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 | // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */ /* Kernel module implementing an IP set type: the hash:ip,port,net type */ #include <linux/jhash.h> #include <linux/module.h> #include <linux/ip.h> #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/random.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/netlink.h> #include <net/tcp.h> #include <linux/netfilter.h> #include <linux/netfilter/ipset/pfxlen.h> #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_getport.h> #include <linux/netfilter/ipset/ip_set_hash.h> #define IPSET_TYPE_REV_MIN 0 /* 1 SCTP and UDPLITE support added */ /* 2 Range as input support for IPv4 added */ /* 3 nomatch flag support added */ /* 4 Counters support added */ /* 5 Comments support added */ /* 6 Forceadd support added */ /* 7 skbinfo support added */ #define IPSET_TYPE_REV_MAX 8 /* bucketsize, initval support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); IP_SET_MODULE_DESC("hash:ip,port,net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:ip,port,net"); /* Type specific function prefix */ #define HTYPE hash_ipportnet /* We squeeze the "nomatch" flag into cidr: we don't support cidr == 0 * However this way we have to store internally cidr - 1, * dancing back and forth. */ #define IP_SET_HASH_WITH_NETS_PACKED #define IP_SET_HASH_WITH_PROTO #define IP_SET_HASH_WITH_NETS /* IPv4 variant */ /* Member elements */ struct hash_ipportnet4_elem { __be32 ip; __be32 ip2; __be16 port; u8 cidr:7; u8 nomatch:1; u8 proto; }; /* Common functions */ static bool hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1, const struct hash_ipportnet4_elem *ip2, u32 *multi) { return ip1->ip == ip2->ip && ip1->ip2 == ip2->ip2 && ip1->cidr == ip2->cidr && ip1->port == ip2->port && ip1->proto == ip2->proto; } static int hash_ipportnet4_do_data_match(const struct hash_ipportnet4_elem *elem) { return elem->nomatch ? -ENOTEMPTY : 1; } static void hash_ipportnet4_data_set_flags(struct hash_ipportnet4_elem *elem, u32 flags) { elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH); } static void hash_ipportnet4_data_reset_flags(struct hash_ipportnet4_elem *elem, u8 *flags) { swap(*flags, elem->nomatch); } static void hash_ipportnet4_data_netmask(struct hash_ipportnet4_elem *elem, u8 cidr) { elem->ip2 &= ip_set_netmask(cidr); elem->cidr = cidr - 1; } static bool hash_ipportnet4_data_list(struct sk_buff *skb, const struct hash_ipportnet4_elem *data) { u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip2) || nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr + 1) || nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || (flags && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_ipportnet4_data_next(struct hash_ipportnet4_elem *next, const struct hash_ipportnet4_elem *d) { next->ip = d->ip; next->port = d->port; next->ip2 = d->ip2; } #define MTYPE hash_ipportnet4 #define HOST_MASK 32 #include "ip_set_hash_gen.h" static int hash_ipportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { const struct hash_ipportnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK - 1; if (!ip_set_get_ip4_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) return -EINVAL; ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); ip4addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2); e.ip2 &= ip_set_netmask(e.cidr + 1); return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { struct hash_ipportnet4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, p = 0, port, port_to; u32 ip2_from = 0, ip2_to = 0, ip2, i = 0; bool with_ports = false; u8 cidr; int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2], &ip2_from); if (ret) return ret; if (tb[IPSET_ATTR_CIDR2]) { cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]); if (!cidr || cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; e.cidr = cidr - 1; } e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); if (tb[IPSET_ATTR_PROTO]) { e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); with_ports = ip_set_proto_with_ports(e.proto); if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else { return -IPSET_ERR_MISSING_PROTO; } if (!(with_ports || e.proto == IPPROTO_ICMP)) e.port = 0; if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } with_ports = with_ports && tb[IPSET_ATTR_PORT_TO]; if (adt == IPSET_TEST || !(tb[IPSET_ATTR_CIDR] || tb[IPSET_ATTR_IP_TO] || with_ports || tb[IPSET_ATTR_IP2_TO])) { e.ip = htonl(ip); e.ip2 = htonl(ip2_from & ip_set_hostmask(e.cidr + 1)); ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_enomatch(ret, flags, adt, set) ? -ret : ip_set_eexist(ret, flags) ? 0 : ret; } ip_to = ip; if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) return ret; if (ip > ip_to) swap(ip, ip_to); } else if (tb[IPSET_ATTR_CIDR]) { cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (!cidr || cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; ip_set_mask_from_to(ip, ip_to, cidr); } port_to = port = ntohs(e.port); if (tb[IPSET_ATTR_PORT_TO]) { port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (port > port_to) swap(port, port_to); } ip2_to = ip2_from; if (tb[IPSET_ATTR_IP2_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); if (ret) return ret; if (ip2_from > ip2_to) swap(ip2_from, ip2_to); if (ip2_from + UINT_MAX == ip2_to) return -IPSET_ERR_HASH_RANGE; } else { ip_set_mask_from_to(ip2_from, ip2_to, e.cidr + 1); } if (retried) { ip = ntohl(h->next.ip); p = ntohs(h->next.port); ip2 = ntohl(h->next.ip2); } else { p = port; ip2 = ip2_from; } for (; ip <= ip_to; ip++) { e.ip = htonl(ip); for (; p <= port_to; p++) { e.port = htons(p); do { i++; e.ip2 = htonl(ip2); ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr); e.cidr = cidr - 1; if (i > IPSET_MAX_RANGE) { hash_ipportnet4_data_next(&h->next, &e); return -ERANGE; } ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; } while (ip2++ < ip2_to); ip2 = ip2_from; } p = port; } return ret; } /* IPv6 variant */ struct hash_ipportnet6_elem { union nf_inet_addr ip; union nf_inet_addr ip2; __be16 port; u8 cidr:7; u8 nomatch:1; u8 proto; }; /* Common functions */ static bool hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1, const struct hash_ipportnet6_elem *ip2, u32 *multi) { return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && ipv6_addr_equal(&ip1->ip2.in6, &ip2->ip2.in6) && ip1->cidr == ip2->cidr && ip1->port == ip2->port && ip1->proto == ip2->proto; } static int hash_ipportnet6_do_data_match(const struct hash_ipportnet6_elem *elem) { return elem->nomatch ? -ENOTEMPTY : 1; } static void hash_ipportnet6_data_set_flags(struct hash_ipportnet6_elem *elem, u32 flags) { elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH); } static void hash_ipportnet6_data_reset_flags(struct hash_ipportnet6_elem *elem, u8 *flags) { swap(*flags, elem->nomatch); } static void hash_ipportnet6_data_netmask(struct hash_ipportnet6_elem *elem, u8 cidr) { ip6_netmask(&elem->ip2, cidr); elem->cidr = cidr - 1; } static bool hash_ipportnet6_data_list(struct sk_buff *skb, const struct hash_ipportnet6_elem *data) { u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) || nla_put_ipaddr6(skb, IPSET_ATTR_IP2, &data->ip2.in6) || nla_put_net16(skb, IPSET_ATTR_PORT, data->port) || nla_put_u8(skb, IPSET_ATTR_CIDR2, data->cidr + 1) || nla_put_u8(skb, IPSET_ATTR_PROTO, data->proto) || (flags && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_ipportnet6_data_next(struct hash_ipportnet6_elem *next, const struct hash_ipportnet6_elem *d) { next->port = d->port; } #undef MTYPE #undef HOST_MASK #define MTYPE hash_ipportnet6 #define HOST_MASK 128 #define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" static int hash_ipportnet6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { const struct hash_ipportnet6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet6_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (adt == IPSET_TEST) e.cidr = HOST_MASK - 1; if (!ip_set_get_ip6_port(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.port, &e.proto)) return -EINVAL; ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); ip6addrptr(skb, opt->flags & IPSET_DIM_THREE_SRC, &e.ip2.in6); ip6_netmask(&e.ip2, e.cidr + 1); return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { const struct hash_ipportnet6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_ipportnet6_elem e = { .cidr = HOST_MASK - 1 }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 port, port_to; bool with_ports = false; u8 cidr; int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP] || !tb[IPSET_ATTR_IP2] || !ip_set_attr_netorder(tb, IPSET_ATTR_PORT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PORT_TO) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; if (unlikely(tb[IPSET_ATTR_CIDR])) { cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (cidr != HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP2], &e.ip2); if (ret) return ret; if (tb[IPSET_ATTR_CIDR2]) { cidr = nla_get_u8(tb[IPSET_ATTR_CIDR2]); if (!cidr || cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; e.cidr = cidr - 1; } ip6_netmask(&e.ip2, e.cidr + 1); e.port = nla_get_be16(tb[IPSET_ATTR_PORT]); if (tb[IPSET_ATTR_PROTO]) { e.proto = nla_get_u8(tb[IPSET_ATTR_PROTO]); with_ports = ip_set_proto_with_ports(e.proto); if (e.proto == 0) return -IPSET_ERR_INVALID_PROTO; } else { return -IPSET_ERR_MISSING_PROTO; } if (!(with_ports || e.proto == IPPROTO_ICMPV6)) e.port = 0; if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } if (adt == IPSET_TEST || !with_ports || !tb[IPSET_ATTR_PORT_TO]) { ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_enomatch(ret, flags, adt, set) ? -ret : ip_set_eexist(ret, flags) ? 0 : ret; } port = ntohs(e.port); port_to = ip_set_get_h16(tb[IPSET_ATTR_PORT_TO]); if (port > port_to) swap(port, port_to); if (retried) port = ntohs(h->next.port); for (; port <= port_to; port++) { e.port = htons(port); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; } return ret; } static struct ip_set_type hash_ipportnet_type __read_mostly = { .name = "hash:ip,port,net", .protocol = IPSET_PROTOCOL, .features = IPSET_TYPE_IP | IPSET_TYPE_PORT | IPSET_TYPE_IP2 | IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_THREE, .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_ipportnet_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, }, .adt_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_IP2] = { .type = NLA_NESTED }, [IPSET_ATTR_IP2_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_PORT] = { .type = NLA_U16 }, [IPSET_ATTR_PORT_TO] = { .type = NLA_U16 }, [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_CIDR2] = { .type = NLA_U8 }, [IPSET_ATTR_PROTO] = { .type = NLA_U8 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; static int __init hash_ipportnet_init(void) { return ip_set_type_register(&hash_ipportnet_type); } static void __exit hash_ipportnet_fini(void) { rcu_barrier(); ip_set_type_unregister(&hash_ipportnet_type); } module_init(hash_ipportnet_init); module_exit(hash_ipportnet_fini); |
| 108 108 108 101 82 83 108 25 75 86 86 84 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 | // SPDX-License-Identifier: GPL-2.0 /* * USB Serial Converter Bus specific functions * * Copyright (C) 2002 Greg Kroah-Hartman (greg@kroah.com) */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/tty.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/usb.h> #include <linux/usb/serial.h> static int usb_serial_device_match(struct device *dev, const struct device_driver *drv) { const struct usb_serial_port *port = to_usb_serial_port(dev); const struct usb_serial_driver *driver = to_usb_serial_driver(drv); /* * drivers are already assigned to ports in serial_probe so it's * a simple check here. */ if (driver == port->serial->type) return 1; return 0; } static int usb_serial_device_probe(struct device *dev) { struct usb_serial_port *port = to_usb_serial_port(dev); struct usb_serial_driver *driver; struct device *tty_dev; int retval = 0; int minor; /* make sure suspend/resume doesn't race against port_probe */ retval = usb_autopm_get_interface(port->serial->interface); if (retval) return retval; driver = port->serial->type; if (driver->port_probe) { retval = driver->port_probe(port); if (retval) goto err_autopm_put; } minor = port->minor; tty_dev = tty_port_register_device(&port->port, usb_serial_tty_driver, minor, dev); if (IS_ERR(tty_dev)) { retval = PTR_ERR(tty_dev); goto err_port_remove; } usb_autopm_put_interface(port->serial->interface); dev_info(&port->serial->dev->dev, "%s converter now attached to ttyUSB%d\n", driver->description, minor); return 0; err_port_remove: if (driver->port_remove) driver->port_remove(port); err_autopm_put: usb_autopm_put_interface(port->serial->interface); return retval; } static void usb_serial_device_remove(struct device *dev) { struct usb_serial_port *port = to_usb_serial_port(dev); struct usb_serial_driver *driver; int minor; int autopm_err; /* * Make sure suspend/resume doesn't race against port_remove. * * Note that no further runtime PM callbacks will be made if * autopm_get fails. */ autopm_err = usb_autopm_get_interface(port->serial->interface); minor = port->minor; tty_unregister_device(usb_serial_tty_driver, minor); driver = port->serial->type; if (driver->port_remove) driver->port_remove(port); dev_info(dev, "%s converter now disconnected from ttyUSB%d\n", driver->description, minor); if (!autopm_err) usb_autopm_put_interface(port->serial->interface); } static ssize_t new_id_store(struct device_driver *driver, const char *buf, size_t count) { struct usb_serial_driver *usb_drv = to_usb_serial_driver(driver); ssize_t retval = usb_store_new_id(&usb_drv->dynids, usb_drv->id_table, driver, buf, count); if (retval >= 0 && usb_drv->usb_driver != NULL) retval = usb_store_new_id(&usb_drv->usb_driver->dynids, usb_drv->usb_driver->id_table, &usb_drv->usb_driver->driver, buf, count); return retval; } static ssize_t new_id_show(struct device_driver *driver, char *buf) { struct usb_serial_driver *usb_drv = to_usb_serial_driver(driver); return usb_show_dynids(&usb_drv->dynids, buf); } static DRIVER_ATTR_RW(new_id); static struct attribute *usb_serial_drv_attrs[] = { &driver_attr_new_id.attr, NULL, }; ATTRIBUTE_GROUPS(usb_serial_drv); static void free_dynids(struct usb_serial_driver *drv) { struct usb_dynid *dynid, *n; guard(mutex)(&usb_dynids_lock); list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { list_del(&dynid->node); kfree(dynid); } } const struct bus_type usb_serial_bus_type = { .name = "usb-serial", .match = usb_serial_device_match, .probe = usb_serial_device_probe, .remove = usb_serial_device_remove, .drv_groups = usb_serial_drv_groups, }; int usb_serial_bus_register(struct usb_serial_driver *driver) { int retval; driver->driver.bus = &usb_serial_bus_type; INIT_LIST_HEAD(&driver->dynids.list); retval = driver_register(&driver->driver); return retval; } void usb_serial_bus_deregister(struct usb_serial_driver *driver) { free_dynids(driver); driver_unregister(&driver->driver); } |
| 64 91 655 114 174 337 312 289 576 44 50 51 44 44 62 10 7 44 23 23 22 23 23 68 68 21 21 53 53 37 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Authors: Lotsa people, from code originally in tcp */ #ifndef _INET_HASHTABLES_H #define _INET_HASHTABLES_H #include <linux/interrupt.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/list.h> #include <linux/slab.h> #include <linux/socket.h> #include <linux/spinlock.h> #include <linux/types.h> #include <linux/wait.h> #include <net/inet_connection_sock.h> #include <net/inet_sock.h> #include <net/ip.h> #include <net/sock.h> #include <net/route.h> #include <net/tcp_states.h> #include <net/netns/hash.h> #include <linux/refcount.h> #include <asm/byteorder.h> /* This is for all connections with a full identity, no wildcards. * The 'e' prefix stands for Establish, but we really put all sockets * but LISTEN ones. */ struct inet_ehash_bucket { struct hlist_nulls_head chain; }; /* There are a few simple rules, which allow for local port reuse by * an application. In essence: * * 1) Sockets bound to different interfaces may share a local port. * Failing that, goto test 2. * 2) If all sockets have sk->sk_reuse set, and none of them are in * TCP_LISTEN state, the port may be shared. * Failing that, goto test 3. * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local * address, and none of them are the same, the port may be * shared. * Failing this, the port cannot be shared. * * The interesting point, is test #2. This is what an FTP server does * all day. To optimize this case we use a specific flag bit defined * below. As we add sockets to a bind bucket list, we perform a * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN)) * As long as all sockets added to a bind bucket pass this test, * the flag bit will be set. * The resulting situation is that tcp_v[46]_verify_bind() can just check * for this flag bit, if it is set and the socket trying to bind has * sk->sk_reuse set, we don't even have to walk the owners list at all, * we return that it is ok to bind this socket to the requested local port. * * Sounds like a lot of work, but it is worth it. In a more naive * implementation (ie. current FreeBSD etc.) the entire list of ports * must be walked for each data port opened by an ftp server. Needless * to say, this does not scale at all. With a couple thousand FTP * users logged onto your box, isn't it nice to know that new data * ports are created in O(1) time? I thought so. ;-) -DaveM */ #define FASTREUSEPORT_ANY 1 #define FASTREUSEPORT_STRICT 2 struct inet_bind_bucket { possible_net_t ib_net; int l3mdev; unsigned short port; signed char fastreuse; signed char fastreuseport; kuid_t fastuid; #if IS_ENABLED(CONFIG_IPV6) struct in6_addr fast_v6_rcv_saddr; #endif __be32 fast_rcv_saddr; unsigned short fast_sk_family; bool fast_ipv6_only; struct hlist_node node; struct hlist_head bhash2; struct rcu_head rcu; }; struct inet_bind2_bucket { possible_net_t ib_net; int l3mdev; unsigned short port; #if IS_ENABLED(CONFIG_IPV6) unsigned short addr_type; struct in6_addr v6_rcv_saddr; #define rcv_saddr v6_rcv_saddr.s6_addr32[3] #else __be32 rcv_saddr; #endif /* Node in the bhash2 inet_bind_hashbucket chain */ struct hlist_node node; struct hlist_node bhash_node; /* List of sockets hashed to this bucket */ struct hlist_head owners; }; static inline struct net *ib_net(const struct inet_bind_bucket *ib) { return read_pnet(&ib->ib_net); } static inline struct net *ib2_net(const struct inet_bind2_bucket *ib) { return read_pnet(&ib->ib_net); } #define inet_bind_bucket_for_each(tb, head) \ hlist_for_each_entry(tb, head, node) struct inet_bind_hashbucket { spinlock_t lock; struct hlist_head chain; }; /* Sockets can be hashed in established or listening table. * We must use different 'nulls' end-of-chain value for all hash buckets : * A socket might transition from ESTABLISH to LISTEN state without * RCU grace period. A lookup in ehash table needs to handle this case. */ #define LISTENING_NULLS_BASE (1U << 29) struct inet_listen_hashbucket { spinlock_t lock; struct hlist_nulls_head nulls_head; }; /* This is for listening sockets, thus all sockets which possess wildcards. */ #define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ struct inet_hashinfo { /* This is for sockets with full identity only. Sockets here will * always be without wildcards and will have the following invariant: * * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE * */ struct inet_ehash_bucket *ehash; spinlock_t *ehash_locks; unsigned int ehash_mask; unsigned int ehash_locks_mask; /* Ok, let's try this, I give up, we do need a local binding * TCP hash as well as the others for fast bind/connect. */ struct kmem_cache *bind_bucket_cachep; /* This bind table is hashed by local port */ struct inet_bind_hashbucket *bhash; struct kmem_cache *bind2_bucket_cachep; /* This bind table is hashed by local port and sk->sk_rcv_saddr (ipv4) * or sk->sk_v6_rcv_saddr (ipv6). This 2nd bind table is used * primarily for expediting bind conflict resolution. */ struct inet_bind_hashbucket *bhash2; unsigned int bhash_size; /* The 2nd listener table hashed by local port and address */ unsigned int lhash2_mask; struct inet_listen_hashbucket *lhash2; bool pernet; } ____cacheline_aligned_in_smp; static inline struct inet_hashinfo *tcp_get_hashinfo(const struct sock *sk) { return sock_net(sk)->ipv4.tcp_death_row.hashinfo; } static inline struct inet_listen_hashbucket * inet_lhash2_bucket(struct inet_hashinfo *h, u32 hash) { return &h->lhash2[hash & h->lhash2_mask]; } static inline struct inet_ehash_bucket *inet_ehash_bucket( struct inet_hashinfo *hashinfo, unsigned int hash) { return &hashinfo->ehash[hash & hashinfo->ehash_mask]; } static inline spinlock_t *inet_ehash_lockp( struct inet_hashinfo *hashinfo, unsigned int hash) { return &hashinfo->ehash_locks[hash & hashinfo->ehash_locks_mask]; } int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo); static inline void inet_hashinfo2_free_mod(struct inet_hashinfo *h) { kfree(h->lhash2); h->lhash2 = NULL; } static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) { kvfree(hashinfo->ehash_locks); hashinfo->ehash_locks = NULL; } struct inet_hashinfo *inet_pernet_hashinfo_alloc(struct inet_hashinfo *hashinfo, unsigned int ehash_entries); void inet_pernet_hashinfo_free(struct inet_hashinfo *hashinfo); struct inet_bind_bucket * inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net, struct inet_bind_hashbucket *head, const unsigned short snum, int l3mdev); void inet_bind_bucket_destroy(struct inet_bind_bucket *tb); bool inet_bind_bucket_match(const struct inet_bind_bucket *tb, const struct net *net, unsigned short port, int l3mdev); struct inet_bind2_bucket * inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net, struct inet_bind_hashbucket *head, struct inet_bind_bucket *tb, const struct sock *sk); void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb); struct inet_bind2_bucket * inet_bind2_bucket_find(const struct inet_bind_hashbucket *head, const struct net *net, unsigned short port, int l3mdev, const struct sock *sk); bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const struct net *net, unsigned short port, int l3mdev, const struct sock *sk); static inline u32 inet_bhashfn(const struct net *net, const __u16 lport, const u32 bhash_size) { return (lport + net_hash_mix(net)) & (bhash_size - 1); } static inline struct inet_bind_hashbucket * inet_bhashfn_portaddr(const struct inet_hashinfo *hinfo, const struct sock *sk, const struct net *net, unsigned short port) { u32 hash; #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) hash = ipv6_portaddr_hash(net, &sk->sk_v6_rcv_saddr, port); else #endif hash = ipv4_portaddr_hash(net, sk->sk_rcv_saddr, port); return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)]; } struct inet_bind_hashbucket * inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port); /* This should be called whenever a socket's sk_rcv_saddr (ipv4) or * sk_v6_rcv_saddr (ipv6) changes after it has been binded. The socket's * rcv_saddr field should already have been updated when this is called. */ int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family); void inet_bhash2_reset_saddr(struct sock *sk); void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, struct inet_bind2_bucket *tb2, unsigned short port); /* Caller must disable local BH processing. */ int __inet_inherit_port(const struct sock *sk, struct sock *child); void inet_put_port(struct sock *sk); void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, unsigned long numentries, int scale, unsigned long low_limit, unsigned long high_limit); int inet_hashinfo2_init_mod(struct inet_hashinfo *h); bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk); bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk); int __inet_hash(struct sock *sk, struct sock *osk); int inet_hash(struct sock *sk); void inet_unhash(struct sock *sk); struct sock *__inet_lookup_listener(const struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be32 saddr, const __be16 sport, const __be32 daddr, const unsigned short hnum, const int dif, const int sdif); static inline struct sock *inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif, int sdif) { return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, sport, daddr, ntohs(dport), dif, sdif); } /* Socket demux engine toys. */ /* What happens here is ugly; there's a pair of adjacent fields in struct inet_sock; __be16 dport followed by __u16 num. We want to search by pair, so we combine the keys into a single 32bit value and compare with 32bit value read from &...->dport. Let's at least make sure that it's not mixed with anything else... On 64bit targets we combine comparisons with pair of adjacent __be32 fields in the same way. */ #ifdef __BIG_ENDIAN #define INET_COMBINED_PORTS(__sport, __dport) \ ((__force __portpair)(((__force __u32)(__be16)(__sport) << 16) | (__u32)(__dport))) #else /* __LITTLE_ENDIAN */ #define INET_COMBINED_PORTS(__sport, __dport) \ ((__force __portpair)(((__u32)(__dport) << 16) | (__force __u32)(__be16)(__sport))) #endif #ifdef __BIG_ENDIAN #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ const __addrpair __name = (__force __addrpair) ( \ (((__force __u64)(__be32)(__saddr)) << 32) | \ ((__force __u64)(__be32)(__daddr))) #else /* __LITTLE_ENDIAN */ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ const __addrpair __name = (__force __addrpair) ( \ (((__force __u64)(__be32)(__daddr)) << 32) | \ ((__force __u64)(__be32)(__saddr))) #endif /* __BIG_ENDIAN */ static inline bool inet_match(const struct net *net, const struct sock *sk, const __addrpair cookie, const __portpair ports, int dif, int sdif) { if (!net_eq(sock_net(sk), net) || sk->sk_portpair != ports || sk->sk_addrpair != cookie) return false; /* READ_ONCE() paired with WRITE_ONCE() in sock_bindtoindex_locked() */ return inet_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif, sdif); } /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need * not check it for lookups anymore, thanks Alexey. -DaveM */ struct sock *__inet_lookup_established(const struct net *net, struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 hnum, const int dif, const int sdif); typedef u32 (inet_ehashfn_t)(const struct net *net, const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport); inet_ehashfn_t inet_ehashfn; INDIRECT_CALLABLE_DECLARE(inet_ehashfn_t udp_ehashfn); struct sock *inet_lookup_reuseport(const struct net *net, struct sock *sk, struct sk_buff *skb, int doff, __be32 saddr, __be16 sport, __be32 daddr, unsigned short hnum, inet_ehashfn_t *ehashfn); struct sock *inet_lookup_run_sk_lookup(const struct net *net, int protocol, struct sk_buff *skb, int doff, __be32 saddr, __be16 sport, __be32 daddr, u16 hnum, const int dif, inet_ehashfn_t *ehashfn); static inline struct sock * inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, const int dif) { return __inet_lookup_established(net, hashinfo, saddr, sport, daddr, ntohs(dport), dif, 0); } static inline struct sock *__inet_lookup(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, const int dif, const int sdif, bool *refcounted) { u16 hnum = ntohs(dport); struct sock *sk; sk = __inet_lookup_established(net, hashinfo, saddr, sport, daddr, hnum, dif, sdif); *refcounted = true; if (sk) return sk; *refcounted = false; return __inet_lookup_listener(net, hashinfo, skb, doff, saddr, sport, daddr, hnum, dif, sdif); } static inline struct sock *inet_lookup(struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, const int dif) { struct sock *sk; bool refcounted; sk = __inet_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, dport, dif, 0, &refcounted); if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; } static inline struct sock *inet_steal_sock(struct net *net, struct sk_buff *skb, int doff, const __be32 saddr, const __be16 sport, const __be32 daddr, const __be16 dport, bool *refcounted, inet_ehashfn_t *ehashfn) { struct sock *sk, *reuse_sk; bool prefetched; sk = skb_steal_sock(skb, refcounted, &prefetched); if (!sk) return NULL; if (!prefetched || !sk_fullsock(sk)) return sk; if (sk->sk_protocol == IPPROTO_TCP) { if (sk->sk_state != TCP_LISTEN) return sk; } else if (sk->sk_protocol == IPPROTO_UDP) { if (sk->sk_state != TCP_CLOSE) return sk; } else { return sk; } reuse_sk = inet_lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, ntohs(dport), ehashfn); if (!reuse_sk) return sk; /* We've chosen a new reuseport sock which is never refcounted. This * implies that sk also isn't refcounted. */ WARN_ON_ONCE(*refcounted); return reuse_sk; } static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be16 sport, const __be16 dport, const int sdif, bool *refcounted) { struct net *net = dev_net_rcu(skb_dst(skb)->dev); const struct iphdr *iph = ip_hdr(skb); struct sock *sk; sk = inet_steal_sock(net, skb, doff, iph->saddr, sport, iph->daddr, dport, refcounted, inet_ehashfn); if (IS_ERR(sk)) return NULL; if (sk) return sk; return __inet_lookup(net, hashinfo, skb, doff, iph->saddr, sport, iph->daddr, dport, inet_iif(skb), sdif, refcounted); } static inline void sk_daddr_set(struct sock *sk, __be32 addr) { sk->sk_daddr = addr; /* alias of inet_daddr */ #if IS_ENABLED(CONFIG_IPV6) ipv6_addr_set_v4mapped(addr, &sk->sk_v6_daddr); #endif } static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr) { sk->sk_rcv_saddr = addr; /* alias of inet_rcv_saddr */ #if IS_ENABLED(CONFIG_IPV6) ipv6_addr_set_v4mapped(addr, &sk->sk_v6_rcv_saddr); #endif } int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk, u64 port_offset, u32 hash_port0, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **, bool rcu_lookup, u32 hash)); int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk); #endif /* _INET_HASHTABLES_H */ |
| 1 1 2 1 3 2 2 2 1 1 1 2 1 1 1 4 4 4 1 3 3 4 1 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Roccat Kone[+] driver for Linux * * Copyright (c) 2010 Stefan Achatz <erazor_de@users.sourceforge.net> */ /* */ /* * Roccat Kone[+] is an updated/improved version of the Kone with more memory * and functionality and without the non-standard behaviours the Kone had. * KoneXTD has same capabilities but updated sensor. */ #include <linux/device.h> #include <linux/input.h> #include <linux/hid.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/hid-roccat.h> #include "hid-ids.h" #include "hid-roccat-common.h" #include "hid-roccat-koneplus.h" static uint profile_numbers[5] = {0, 1, 2, 3, 4}; static void koneplus_profile_activated(struct koneplus_device *koneplus, uint new_profile) { koneplus->actual_profile = new_profile; } static int koneplus_send_control(struct usb_device *usb_dev, uint value, enum koneplus_control_requests request) { struct roccat_common2_control control; if ((request == KONEPLUS_CONTROL_REQUEST_PROFILE_SETTINGS || request == KONEPLUS_CONTROL_REQUEST_PROFILE_BUTTONS) && value > 4) return -EINVAL; control.command = ROCCAT_COMMON_COMMAND_CONTROL; control.value = value; control.request = request; return roccat_common2_send_with_status(usb_dev, ROCCAT_COMMON_COMMAND_CONTROL, &control, sizeof(struct roccat_common2_control)); } /* retval is 0-4 on success, < 0 on error */ static int koneplus_get_actual_profile(struct usb_device *usb_dev) { struct koneplus_actual_profile buf; int retval; retval = roccat_common2_receive(usb_dev, KONEPLUS_COMMAND_ACTUAL_PROFILE, &buf, KONEPLUS_SIZE_ACTUAL_PROFILE); return retval ? retval : buf.actual_profile; } static int koneplus_set_actual_profile(struct usb_device *usb_dev, int new_profile) { struct koneplus_actual_profile buf; buf.command = KONEPLUS_COMMAND_ACTUAL_PROFILE; buf.size = KONEPLUS_SIZE_ACTUAL_PROFILE; buf.actual_profile = new_profile; return roccat_common2_send_with_status(usb_dev, KONEPLUS_COMMAND_ACTUAL_PROFILE, &buf, KONEPLUS_SIZE_ACTUAL_PROFILE); } static ssize_t koneplus_sysfs_read(struct file *fp, struct kobject *kobj, char *buf, loff_t off, size_t count, size_t real_size, uint command) { struct device *dev = kobj_to_dev(kobj)->parent->parent; struct koneplus_device *koneplus = hid_get_drvdata(dev_get_drvdata(dev)); struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev)); int retval; if (off >= real_size) return 0; if (off != 0 || count != real_size) return -EINVAL; mutex_lock(&koneplus->koneplus_lock); retval = roccat_common2_receive(usb_dev, command, buf, real_size); mutex_unlock(&koneplus->koneplus_lock); if (retval) return retval; return real_size; } static ssize_t koneplus_sysfs_write(struct file *fp, struct kobject *kobj, void const *buf, loff_t off, size_t count, size_t real_size, uint command) { struct device *dev = kobj_to_dev(kobj)->parent->parent; struct koneplus_device *koneplus = hid_get_drvdata(dev_get_drvdata(dev)); struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev)); int retval; if (off != 0 || count != real_size) return -EINVAL; mutex_lock(&koneplus->koneplus_lock); retval = roccat_common2_send_with_status(usb_dev, command, buf, real_size); mutex_unlock(&koneplus->koneplus_lock); if (retval) return retval; return real_size; } #define KONEPLUS_SYSFS_W(thingy, THINGY) \ static ssize_t koneplus_sysfs_write_ ## thingy(struct file *fp, \ struct kobject *kobj, const struct bin_attribute *attr, \ char *buf, loff_t off, size_t count) \ { \ return koneplus_sysfs_write(fp, kobj, buf, off, count, \ KONEPLUS_SIZE_ ## THINGY, KONEPLUS_COMMAND_ ## THINGY); \ } #define KONEPLUS_SYSFS_R(thingy, THINGY) \ static ssize_t koneplus_sysfs_read_ ## thingy(struct file *fp, \ struct kobject *kobj, const struct bin_attribute *attr, \ char *buf, loff_t off, size_t count) \ { \ return koneplus_sysfs_read(fp, kobj, buf, off, count, \ KONEPLUS_SIZE_ ## THINGY, KONEPLUS_COMMAND_ ## THINGY); \ } #define KONEPLUS_SYSFS_RW(thingy, THINGY) \ KONEPLUS_SYSFS_W(thingy, THINGY) \ KONEPLUS_SYSFS_R(thingy, THINGY) #define KONEPLUS_BIN_ATTRIBUTE_RW(thingy, THINGY) \ KONEPLUS_SYSFS_RW(thingy, THINGY); \ static const struct bin_attribute bin_attr_##thingy = { \ .attr = { .name = #thingy, .mode = 0660 }, \ .size = KONEPLUS_SIZE_ ## THINGY, \ .read_new = koneplus_sysfs_read_ ## thingy, \ .write_new = koneplus_sysfs_write_ ## thingy \ } #define KONEPLUS_BIN_ATTRIBUTE_R(thingy, THINGY) \ KONEPLUS_SYSFS_R(thingy, THINGY); \ static const struct bin_attribute bin_attr_##thingy = { \ .attr = { .name = #thingy, .mode = 0440 }, \ .size = KONEPLUS_SIZE_ ## THINGY, \ .read_new = koneplus_sysfs_read_ ## thingy, \ } #define KONEPLUS_BIN_ATTRIBUTE_W(thingy, THINGY) \ KONEPLUS_SYSFS_W(thingy, THINGY); \ static const struct bin_attribute bin_attr_##thingy = { \ .attr = { .name = #thingy, .mode = 0220 }, \ .size = KONEPLUS_SIZE_ ## THINGY, \ .write_new = koneplus_sysfs_write_ ## thingy \ } KONEPLUS_BIN_ATTRIBUTE_W(control, CONTROL); KONEPLUS_BIN_ATTRIBUTE_W(talk, TALK); KONEPLUS_BIN_ATTRIBUTE_W(macro, MACRO); KONEPLUS_BIN_ATTRIBUTE_R(tcu_image, TCU_IMAGE); KONEPLUS_BIN_ATTRIBUTE_RW(info, INFO); KONEPLUS_BIN_ATTRIBUTE_RW(sensor, SENSOR); KONEPLUS_BIN_ATTRIBUTE_RW(tcu, TCU); KONEPLUS_BIN_ATTRIBUTE_RW(profile_settings, PROFILE_SETTINGS); KONEPLUS_BIN_ATTRIBUTE_RW(profile_buttons, PROFILE_BUTTONS); static ssize_t koneplus_sysfs_read_profilex_settings(struct file *fp, struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj)->parent->parent; struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev)); ssize_t retval; retval = koneplus_send_control(usb_dev, *(uint *)(attr->private), KONEPLUS_CONTROL_REQUEST_PROFILE_SETTINGS); if (retval) return retval; return koneplus_sysfs_read(fp, kobj, buf, off, count, KONEPLUS_SIZE_PROFILE_SETTINGS, KONEPLUS_COMMAND_PROFILE_SETTINGS); } static ssize_t koneplus_sysfs_read_profilex_buttons(struct file *fp, struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj)->parent->parent; struct usb_device *usb_dev = interface_to_usbdev(to_usb_interface(dev)); ssize_t retval; retval = koneplus_send_control(usb_dev, *(uint *)(attr->private), KONEPLUS_CONTROL_REQUEST_PROFILE_BUTTONS); if (retval) return retval; return koneplus_sysfs_read(fp, kobj, buf, off, count, KONEPLUS_SIZE_PROFILE_BUTTONS, KONEPLUS_COMMAND_PROFILE_BUTTONS); } #define PROFILE_ATTR(number) \ static const struct bin_attribute bin_attr_profile##number##_settings = { \ .attr = { .name = "profile" #number "_settings", .mode = 0440 }, \ .size = KONEPLUS_SIZE_PROFILE_SETTINGS, \ .read_new = koneplus_sysfs_read_profilex_settings, \ .private = &profile_numbers[number-1], \ }; \ static const struct bin_attribute bin_attr_profile##number##_buttons = { \ .attr = { .name = "profile" #number "_buttons", .mode = 0440 }, \ .size = KONEPLUS_SIZE_PROFILE_BUTTONS, \ .read_new = koneplus_sysfs_read_profilex_buttons, \ .private = &profile_numbers[number-1], \ }; PROFILE_ATTR(1); PROFILE_ATTR(2); PROFILE_ATTR(3); PROFILE_ATTR(4); PROFILE_ATTR(5); static ssize_t koneplus_sysfs_show_actual_profile(struct device *dev, struct device_attribute *attr, char *buf) { struct koneplus_device *koneplus = hid_get_drvdata(dev_get_drvdata(dev->parent->parent)); return sysfs_emit(buf, "%d\n", koneplus->actual_profile); } static ssize_t koneplus_sysfs_set_actual_profile(struct device *dev, struct device_attribute *attr, char const *buf, size_t size) { struct koneplus_device *koneplus; struct usb_device *usb_dev; unsigned long profile; int retval; struct koneplus_roccat_report roccat_report; dev = dev->parent->parent; koneplus = hid_get_drvdata(dev_get_drvdata(dev)); usb_dev = interface_to_usbdev(to_usb_interface(dev)); retval = kstrtoul(buf, 10, &profile); if (retval) return retval; if (profile > 4) return -EINVAL; mutex_lock(&koneplus->koneplus_lock); retval = koneplus_set_actual_profile(usb_dev, profile); if (retval) { mutex_unlock(&koneplus->koneplus_lock); return retval; } koneplus_profile_activated(koneplus, profile); roccat_report.type = KONEPLUS_MOUSE_REPORT_BUTTON_TYPE_PROFILE; roccat_report.data1 = profile + 1; roccat_report.data2 = 0; roccat_report.profile = profile + 1; roccat_report_event(koneplus->chrdev_minor, (uint8_t const *)&roccat_report); mutex_unlock(&koneplus->koneplus_lock); return size; } static DEVICE_ATTR(actual_profile, 0660, koneplus_sysfs_show_actual_profile, koneplus_sysfs_set_actual_profile); static DEVICE_ATTR(startup_profile, 0660, koneplus_sysfs_show_actual_profile, koneplus_sysfs_set_actual_profile); static ssize_t koneplus_sysfs_show_firmware_version(struct device *dev, struct device_attribute *attr, char *buf) { struct koneplus_device *koneplus; struct usb_device *usb_dev; struct koneplus_info info; dev = dev->parent->parent; koneplus = hid_get_drvdata(dev_get_drvdata(dev)); usb_dev = interface_to_usbdev(to_usb_interface(dev)); mutex_lock(&koneplus->koneplus_lock); roccat_common2_receive(usb_dev, KONEPLUS_COMMAND_INFO, &info, KONEPLUS_SIZE_INFO); mutex_unlock(&koneplus->koneplus_lock); return sysfs_emit(buf, "%d\n", info.firmware_version); } static DEVICE_ATTR(firmware_version, 0440, koneplus_sysfs_show_firmware_version, NULL); static struct attribute *koneplus_attrs[] = { &dev_attr_actual_profile.attr, &dev_attr_startup_profile.attr, &dev_attr_firmware_version.attr, NULL, }; static const struct bin_attribute *const koneplus_bin_attributes[] = { &bin_attr_control, &bin_attr_talk, &bin_attr_macro, &bin_attr_tcu_image, &bin_attr_info, &bin_attr_sensor, &bin_attr_tcu, &bin_attr_profile_settings, &bin_attr_profile_buttons, &bin_attr_profile1_settings, &bin_attr_profile2_settings, &bin_attr_profile3_settings, &bin_attr_profile4_settings, &bin_attr_profile5_settings, &bin_attr_profile1_buttons, &bin_attr_profile2_buttons, &bin_attr_profile3_buttons, &bin_attr_profile4_buttons, &bin_attr_profile5_buttons, NULL, }; static const struct attribute_group koneplus_group = { .attrs = koneplus_attrs, .bin_attrs_new = koneplus_bin_attributes, }; static const struct attribute_group *koneplus_groups[] = { &koneplus_group, NULL, }; static const struct class koneplus_class = { .name = "koneplus", .dev_groups = koneplus_groups, }; static int koneplus_init_koneplus_device_struct(struct usb_device *usb_dev, struct koneplus_device *koneplus) { int retval; mutex_init(&koneplus->koneplus_lock); retval = koneplus_get_actual_profile(usb_dev); if (retval < 0) return retval; koneplus_profile_activated(koneplus, retval); return 0; } static int koneplus_init_specials(struct hid_device *hdev) { struct usb_interface *intf = to_usb_interface(hdev->dev.parent); struct usb_device *usb_dev = interface_to_usbdev(intf); struct koneplus_device *koneplus; int retval; if (intf->cur_altsetting->desc.bInterfaceProtocol == USB_INTERFACE_PROTOCOL_MOUSE) { koneplus = kzalloc(sizeof(*koneplus), GFP_KERNEL); if (!koneplus) { hid_err(hdev, "can't alloc device descriptor\n"); return -ENOMEM; } hid_set_drvdata(hdev, koneplus); retval = koneplus_init_koneplus_device_struct(usb_dev, koneplus); if (retval) { hid_err(hdev, "couldn't init struct koneplus_device\n"); goto exit_free; } retval = roccat_connect(&koneplus_class, hdev, sizeof(struct koneplus_roccat_report)); if (retval < 0) { hid_err(hdev, "couldn't init char dev\n"); } else { koneplus->chrdev_minor = retval; koneplus->roccat_claimed = 1; } } else { hid_set_drvdata(hdev, NULL); } return 0; exit_free: kfree(koneplus); return retval; } static void koneplus_remove_specials(struct hid_device *hdev) { struct usb_interface *intf = to_usb_interface(hdev->dev.parent); struct koneplus_device *koneplus; if (intf->cur_altsetting->desc.bInterfaceProtocol == USB_INTERFACE_PROTOCOL_MOUSE) { koneplus = hid_get_drvdata(hdev); if (koneplus->roccat_claimed) roccat_disconnect(koneplus->chrdev_minor); kfree(koneplus); } } static int koneplus_probe(struct hid_device *hdev, const struct hid_device_id *id) { int retval; if (!hid_is_usb(hdev)) return -EINVAL; retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); goto exit; } retval = hid_hw_start(hdev, HID_CONNECT_DEFAULT); if (retval) { hid_err(hdev, "hw start failed\n"); goto exit; } retval = koneplus_init_specials(hdev); if (retval) { hid_err(hdev, "couldn't install mouse\n"); goto exit_stop; } return 0; exit_stop: hid_hw_stop(hdev); exit: return retval; } static void koneplus_remove(struct hid_device *hdev) { koneplus_remove_specials(hdev); hid_hw_stop(hdev); } static void koneplus_keep_values_up_to_date(struct koneplus_device *koneplus, u8 const *data) { struct koneplus_mouse_report_button const *button_report; switch (data[0]) { case KONEPLUS_MOUSE_REPORT_NUMBER_BUTTON: button_report = (struct koneplus_mouse_report_button const *)data; switch (button_report->type) { case KONEPLUS_MOUSE_REPORT_BUTTON_TYPE_PROFILE: koneplus_profile_activated(koneplus, button_report->data1 - 1); break; } break; } } static void koneplus_report_to_chrdev(struct koneplus_device const *koneplus, u8 const *data) { struct koneplus_roccat_report roccat_report; struct koneplus_mouse_report_button const *button_report; if (data[0] != KONEPLUS_MOUSE_REPORT_NUMBER_BUTTON) return; button_report = (struct koneplus_mouse_report_button const *)data; if ((button_report->type == KONEPLUS_MOUSE_REPORT_BUTTON_TYPE_QUICKLAUNCH || button_report->type == KONEPLUS_MOUSE_REPORT_BUTTON_TYPE_TIMER) && button_report->data2 != KONEPLUS_MOUSE_REPORT_BUTTON_ACTION_PRESS) return; roccat_report.type = button_report->type; roccat_report.data1 = button_report->data1; roccat_report.data2 = button_report->data2; roccat_report.profile = koneplus->actual_profile + 1; roccat_report_event(koneplus->chrdev_minor, (uint8_t const *)&roccat_report); } static int koneplus_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size) { struct usb_interface *intf = to_usb_interface(hdev->dev.parent); struct koneplus_device *koneplus = hid_get_drvdata(hdev); if (intf->cur_altsetting->desc.bInterfaceProtocol != USB_INTERFACE_PROTOCOL_MOUSE) return 0; if (koneplus == NULL) return 0; koneplus_keep_values_up_to_date(koneplus, data); if (koneplus->roccat_claimed) koneplus_report_to_chrdev(koneplus, data); return 0; } static const struct hid_device_id koneplus_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_KONEPLUS) }, { HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_KONEXTD) }, { } }; MODULE_DEVICE_TABLE(hid, koneplus_devices); static struct hid_driver koneplus_driver = { .name = "koneplus", .id_table = koneplus_devices, .probe = koneplus_probe, .remove = koneplus_remove, .raw_event = koneplus_raw_event }; static int __init koneplus_init(void) { int retval; /* class name has to be same as driver name */ retval = class_register(&koneplus_class); if (retval) return retval; retval = hid_register_driver(&koneplus_driver); if (retval) class_unregister(&koneplus_class); return retval; } static void __exit koneplus_exit(void) { hid_unregister_driver(&koneplus_driver); class_unregister(&koneplus_class); } module_init(koneplus_init); module_exit(koneplus_exit); MODULE_AUTHOR("Stefan Achatz"); MODULE_DESCRIPTION("USB Roccat Kone[+]/XTD driver"); MODULE_LICENSE("GPL v2"); |
| 10 252 32 32 22 31 32 32 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 | /* SPDX-License-Identifier: GPL-2.0 */ /* * fscrypt.h: declarations for per-file encryption * * Filesystems that implement per-file encryption must include this header * file. * * Copyright (C) 2015, Google, Inc. * * Written by Michael Halcrow, 2015. * Modified by Jaegeuk Kim, 2015. */ #ifndef _LINUX_FSCRYPT_H #define _LINUX_FSCRYPT_H #include <linux/fs.h> #include <linux/mm.h> #include <linux/slab.h> #include <uapi/linux/fscrypt.h> /* * The lengths of all file contents blocks must be divisible by this value. * This is needed to ensure that all contents encryption modes will work, as * some of the supported modes don't support arbitrarily byte-aligned messages. * * Since the needed alignment is 16 bytes, most filesystems will meet this * requirement naturally, as typical block sizes are powers of 2. However, if a * filesystem can generate arbitrarily byte-aligned block lengths (e.g., via * compression), then it will need to pad to this alignment before encryption. */ #define FSCRYPT_CONTENTS_ALIGNMENT 16 union fscrypt_policy; struct fscrypt_inode_info; struct fs_parameter; struct seq_file; struct fscrypt_str { unsigned char *name; u32 len; }; struct fscrypt_name { const struct qstr *usr_fname; struct fscrypt_str disk_name; u32 hash; u32 minor_hash; struct fscrypt_str crypto_buf; bool is_nokey_name; }; #define FSTR_INIT(n, l) { .name = n, .len = l } #define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len) #define fname_name(p) ((p)->disk_name.name) #define fname_len(p) ((p)->disk_name.len) /* Maximum value for the third parameter of fscrypt_operations.set_context(). */ #define FSCRYPT_SET_CONTEXT_MAX_SIZE 40 #ifdef CONFIG_FS_ENCRYPTION /* Crypto operations for filesystems */ struct fscrypt_operations { /* * If set, then fs/crypto/ will allocate a global bounce page pool the * first time an encryption key is set up for a file. The bounce page * pool is required by the following functions: * * - fscrypt_encrypt_pagecache_blocks() * - fscrypt_zeroout_range() for files not using inline crypto * * If the filesystem doesn't use those, it doesn't need to set this. */ unsigned int needs_bounce_pages : 1; /* * If set, then fs/crypto/ will allow the use of encryption settings * that assume inode numbers fit in 32 bits (i.e. * FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{32,64}), provided that the other * prerequisites for these settings are also met. This is only useful * if the filesystem wants to support inline encryption hardware that is * limited to 32-bit or 64-bit data unit numbers and where programming * keyslots is very slow. */ unsigned int has_32bit_inodes : 1; /* * If set, then fs/crypto/ will allow users to select a crypto data unit * size that is less than the filesystem block size. This is done via * the log2_data_unit_size field of the fscrypt policy. This flag is * not compatible with filesystems that encrypt variable-length blocks * (i.e. blocks that aren't all equal to filesystem's block size), for * example as a result of compression. It's also not compatible with * the fscrypt_encrypt_block_inplace() and * fscrypt_decrypt_block_inplace() functions. */ unsigned int supports_subblock_data_units : 1; /* * This field exists only for backwards compatibility reasons and should * only be set by the filesystems that are setting it already. It * contains the filesystem-specific key description prefix that is * accepted for "logon" keys for v1 fscrypt policies. This * functionality is deprecated in favor of the generic prefix * "fscrypt:", which itself is deprecated in favor of the filesystem * keyring ioctls such as FS_IOC_ADD_ENCRYPTION_KEY. Filesystems that * are newly adding fscrypt support should not set this field. */ const char *legacy_key_prefix; /* * Get the fscrypt context of the given inode. * * @inode: the inode whose context to get * @ctx: the buffer into which to get the context * @len: length of the @ctx buffer in bytes * * Return: On success, returns the length of the context in bytes; this * may be less than @len. On failure, returns -ENODATA if the * inode doesn't have a context, -ERANGE if the context is * longer than @len, or another -errno code. */ int (*get_context)(struct inode *inode, void *ctx, size_t len); /* * Set an fscrypt context on the given inode. * * @inode: the inode whose context to set. The inode won't already have * an fscrypt context. * @ctx: the context to set * @len: length of @ctx in bytes (at most FSCRYPT_SET_CONTEXT_MAX_SIZE) * @fs_data: If called from fscrypt_set_context(), this will be the * value the filesystem passed to fscrypt_set_context(). * Otherwise (i.e. when called from * FS_IOC_SET_ENCRYPTION_POLICY) this will be NULL. * * i_rwsem will be held for write. * * Return: 0 on success, -errno on failure. */ int (*set_context)(struct inode *inode, const void *ctx, size_t len, void *fs_data); /* * Get the dummy fscrypt policy in use on the filesystem (if any). * * Filesystems only need to implement this function if they support the * test_dummy_encryption mount option. * * Return: A pointer to the dummy fscrypt policy, if the filesystem is * mounted with test_dummy_encryption; otherwise NULL. */ const union fscrypt_policy *(*get_dummy_policy)(struct super_block *sb); /* * Check whether a directory is empty. i_rwsem will be held for write. */ bool (*empty_dir)(struct inode *inode); /* * Check whether the filesystem's inode numbers and UUID are stable, * meaning that they will never be changed even by offline operations * such as filesystem shrinking and therefore can be used in the * encryption without the possibility of files becoming unreadable. * * Filesystems only need to implement this function if they want to * support the FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{32,64} flags. These * flags are designed to work around the limitations of UFS and eMMC * inline crypto hardware, and they shouldn't be used in scenarios where * such hardware isn't being used. * * Leaving this NULL is equivalent to always returning false. */ bool (*has_stable_inodes)(struct super_block *sb); /* * Return an array of pointers to the block devices to which the * filesystem may write encrypted file contents, NULL if the filesystem * only has a single such block device, or an ERR_PTR() on error. * * On successful non-NULL return, *num_devs is set to the number of * devices in the returned array. The caller must free the returned * array using kfree(). * * If the filesystem can use multiple block devices (other than block * devices that aren't used for encrypted file contents, such as * external journal devices), and wants to support inline encryption, * then it must implement this function. Otherwise it's not needed. */ struct block_device **(*get_devices)(struct super_block *sb, unsigned int *num_devs); }; int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name, struct dentry *dentry, unsigned int flags); static inline struct fscrypt_inode_info * fscrypt_get_inode_info(const struct inode *inode) { /* * Pairs with the cmpxchg_release() in fscrypt_setup_encryption_info(). * I.e., another task may publish ->i_crypt_info concurrently, executing * a RELEASE barrier. We need to use smp_load_acquire() here to safely * ACQUIRE the memory the other task published. */ return smp_load_acquire(&inode->i_crypt_info); } /** * fscrypt_needs_contents_encryption() - check whether an inode needs * contents encryption * @inode: the inode to check * * Return: %true iff the inode is an encrypted regular file and the kernel was * built with fscrypt support. * * If you need to know whether the encrypt bit is set even when the kernel was * built without fscrypt support, you must use IS_ENCRYPTED() directly instead. */ static inline bool fscrypt_needs_contents_encryption(const struct inode *inode) { return IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode); } /* * When d_splice_alias() moves a directory's no-key alias to its * plaintext alias as a result of the encryption key being added, * DCACHE_NOKEY_NAME must be cleared and there might be an opportunity * to disable d_revalidate. Note that we don't have to support the * inverse operation because fscrypt doesn't allow no-key names to be * the source or target of a rename(). */ static inline void fscrypt_handle_d_move(struct dentry *dentry) { /* * VFS calls fscrypt_handle_d_move even for non-fscrypt * filesystems. */ if (dentry->d_flags & DCACHE_NOKEY_NAME) { dentry->d_flags &= ~DCACHE_NOKEY_NAME; /* * Other filesystem features might be handling dentry * revalidation, in which case it cannot be disabled. */ if (dentry->d_op->d_revalidate == fscrypt_d_revalidate) dentry->d_flags &= ~DCACHE_OP_REVALIDATE; } } /** * fscrypt_is_nokey_name() - test whether a dentry is a no-key name * @dentry: the dentry to check * * This returns true if the dentry is a no-key dentry. A no-key dentry is a * dentry that was created in an encrypted directory that hasn't had its * encryption key added yet. Such dentries may be either positive or negative. * * When a filesystem is asked to create a new filename in an encrypted directory * and the new filename's dentry is a no-key dentry, it must fail the operation * with ENOKEY. This includes ->create(), ->mkdir(), ->mknod(), ->symlink(), * ->rename(), and ->link(). (However, ->rename() and ->link() are already * handled by fscrypt_prepare_rename() and fscrypt_prepare_link().) * * This is necessary because creating a filename requires the directory's * encryption key, but just checking for the key on the directory inode during * the final filesystem operation doesn't guarantee that the key was available * during the preceding dentry lookup. And the key must have already been * available during the dentry lookup in order for it to have been checked * whether the filename already exists in the directory and for the new file's * dentry not to be invalidated due to it incorrectly having the no-key flag. * * Return: %true if the dentry is a no-key name */ static inline bool fscrypt_is_nokey_name(const struct dentry *dentry) { return dentry->d_flags & DCACHE_NOKEY_NAME; } static inline void fscrypt_prepare_dentry(struct dentry *dentry, bool is_nokey_name) { /* * This code tries to only take ->d_lock when necessary to write * to ->d_flags. We shouldn't be peeking on d_flags for * DCACHE_OP_REVALIDATE unlocked, but in the unlikely case * there is a race, the worst it can happen is that we fail to * unset DCACHE_OP_REVALIDATE and pay the cost of an extra * d_revalidate. */ if (is_nokey_name) { spin_lock(&dentry->d_lock); dentry->d_flags |= DCACHE_NOKEY_NAME; spin_unlock(&dentry->d_lock); } else if (dentry->d_flags & DCACHE_OP_REVALIDATE && dentry->d_op->d_revalidate == fscrypt_d_revalidate) { /* * Unencrypted dentries and encrypted dentries where the * key is available are always valid from fscrypt * perspective. Avoid the cost of calling * fscrypt_d_revalidate unnecessarily. */ spin_lock(&dentry->d_lock); dentry->d_flags &= ~DCACHE_OP_REVALIDATE; spin_unlock(&dentry->d_lock); } } /* crypto.c */ void fscrypt_enqueue_decrypt_work(struct work_struct *); struct page *fscrypt_encrypt_pagecache_blocks(struct folio *folio, size_t len, size_t offs, gfp_t gfp_flags); int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num, gfp_t gfp_flags); int fscrypt_decrypt_pagecache_blocks(struct folio *folio, size_t len, size_t offs); int fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num); static inline bool fscrypt_is_bounce_page(struct page *page) { return page->mapping == NULL; } static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) { return (struct page *)page_private(bounce_page); } static inline bool fscrypt_is_bounce_folio(struct folio *folio) { return folio->mapping == NULL; } static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio) { return bounce_folio->private; } void fscrypt_free_bounce_page(struct page *bounce_page); /* policy.c */ int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg); int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg); int fscrypt_ioctl_get_policy_ex(struct file *filp, void __user *arg); int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg); int fscrypt_has_permitted_context(struct inode *parent, struct inode *child); int fscrypt_context_for_new_inode(void *ctx, struct inode *inode); int fscrypt_set_context(struct inode *inode, void *fs_data); struct fscrypt_dummy_policy { const union fscrypt_policy *policy; }; int fscrypt_parse_test_dummy_encryption(const struct fs_parameter *param, struct fscrypt_dummy_policy *dummy_policy); bool fscrypt_dummy_policies_equal(const struct fscrypt_dummy_policy *p1, const struct fscrypt_dummy_policy *p2); void fscrypt_show_test_dummy_encryption(struct seq_file *seq, char sep, struct super_block *sb); static inline bool fscrypt_is_dummy_policy_set(const struct fscrypt_dummy_policy *dummy_policy) { return dummy_policy->policy != NULL; } static inline void fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) { kfree(dummy_policy->policy); dummy_policy->policy = NULL; } /* keyring.c */ void fscrypt_destroy_keyring(struct super_block *sb); int fscrypt_ioctl_add_key(struct file *filp, void __user *arg); int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg); int fscrypt_ioctl_remove_key_all_users(struct file *filp, void __user *arg); int fscrypt_ioctl_get_key_status(struct file *filp, void __user *arg); /* keysetup.c */ int fscrypt_prepare_new_inode(struct inode *dir, struct inode *inode, bool *encrypt_ret); void fscrypt_put_encryption_info(struct inode *inode); void fscrypt_free_inode(struct inode *inode); int fscrypt_drop_inode(struct inode *inode); /* fname.c */ int fscrypt_fname_encrypt(const struct inode *inode, const struct qstr *iname, u8 *out, unsigned int olen); bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len, u32 max_len, u32 *encrypted_len_ret); int fscrypt_setup_filename(struct inode *inode, const struct qstr *iname, int lookup, struct fscrypt_name *fname); static inline void fscrypt_free_filename(struct fscrypt_name *fname) { kfree(fname->crypto_buf.name); } int fscrypt_fname_alloc_buffer(u32 max_encrypted_len, struct fscrypt_str *crypto_str); void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str); int fscrypt_fname_disk_to_usr(const struct inode *inode, u32 hash, u32 minor_hash, const struct fscrypt_str *iname, struct fscrypt_str *oname); bool fscrypt_match_name(const struct fscrypt_name *fname, const u8 *de_name, u32 de_name_len); u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name); /* bio.c */ bool fscrypt_decrypt_bio(struct bio *bio); int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, sector_t pblk, unsigned int len); /* hooks.c */ int fscrypt_file_open(struct inode *inode, struct file *filp); int __fscrypt_prepare_link(struct inode *inode, struct inode *dir, struct dentry *dentry); int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, struct fscrypt_name *fname); int fscrypt_prepare_lookup_partial(struct inode *dir, struct dentry *dentry); int __fscrypt_prepare_readdir(struct inode *dir); int __fscrypt_prepare_setattr(struct dentry *dentry, struct iattr *attr); int fscrypt_prepare_setflags(struct inode *inode, unsigned int oldflags, unsigned int flags); int fscrypt_prepare_symlink(struct inode *dir, const char *target, unsigned int len, unsigned int max_len, struct fscrypt_str *disk_link); int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, unsigned int len, struct fscrypt_str *disk_link); const char *fscrypt_get_symlink(struct inode *inode, const void *caddr, unsigned int max_size, struct delayed_call *done); int fscrypt_symlink_getattr(const struct path *path, struct kstat *stat); static inline void fscrypt_set_ops(struct super_block *sb, const struct fscrypt_operations *s_cop) { sb->s_cop = s_cop; } #else /* !CONFIG_FS_ENCRYPTION */ static inline struct fscrypt_inode_info * fscrypt_get_inode_info(const struct inode *inode) { return NULL; } static inline bool fscrypt_needs_contents_encryption(const struct inode *inode) { return false; } static inline void fscrypt_handle_d_move(struct dentry *dentry) { } static inline bool fscrypt_is_nokey_name(const struct dentry *dentry) { return false; } static inline void fscrypt_prepare_dentry(struct dentry *dentry, bool is_nokey_name) { } /* crypto.c */ static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work) { } static inline struct page *fscrypt_encrypt_pagecache_blocks(struct folio *folio, size_t len, size_t offs, gfp_t gfp_flags) { return ERR_PTR(-EOPNOTSUPP); } static inline int fscrypt_encrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num, gfp_t gfp_flags) { return -EOPNOTSUPP; } static inline int fscrypt_decrypt_pagecache_blocks(struct folio *folio, size_t len, size_t offs) { return -EOPNOTSUPP; } static inline int fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page, unsigned int len, unsigned int offs, u64 lblk_num) { return -EOPNOTSUPP; } static inline bool fscrypt_is_bounce_page(struct page *page) { return false; } static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) { WARN_ON_ONCE(1); return ERR_PTR(-EINVAL); } static inline bool fscrypt_is_bounce_folio(struct folio *folio) { return false; } static inline struct folio *fscrypt_pagecache_folio(struct folio *bounce_folio) { WARN_ON_ONCE(1); return ERR_PTR(-EINVAL); } static inline void fscrypt_free_bounce_page(struct page *bounce_page) { } /* policy.c */ static inline int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg) { return -EOPNOTSUPP; } static inline int fscrypt_ioctl_get_policy(struct file *filp, void __user *arg) { return -EOPNOTSUPP; } static inline int fscrypt_ioctl_get_policy_ex(struct file *filp, void __user *arg) { return -EOPNOTSUPP; } static inline int fscrypt_ioctl_get_nonce(struct file *filp, void __user *arg) { return -EOPNOTSUPP; } static inline int fscrypt_has_permitted_context(struct inode *parent, struct inode *child) { return 0; } static inline int fscrypt_set_context(struct inode *inode, void *fs_data) { return -EOPNOTSUPP; } struct fscrypt_dummy_policy { }; static inline int fscrypt_parse_test_dummy_encryption(const struct fs_parameter *param, struct fscrypt_dummy_policy *dummy_policy) { return -EINVAL; } static inline bool fscrypt_dummy_policies_equal(const struct fscrypt_dummy_policy *p1, const struct fscrypt_dummy_policy *p2) { return true; } static inline void fscrypt_show_test_dummy_encryption(struct seq_file *seq, char sep, struct super_block *sb) { } static inline bool fscrypt_is_dummy_policy_set(const struct fscrypt_dummy_policy *dummy_policy) { return false; } static inline void fscrypt_free_dummy_policy(struct fscrypt_dummy_policy *dummy_policy) { } /* keyring.c */ static inline void fscrypt_destroy_keyring(struct super_block *sb) { } static inline int fscrypt_ioctl_add_key(struct file *filp, void __user *arg) { return -EOPNOTSUPP; } static inline int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg) { return -EOPNOTSUPP; } static inline int fscrypt_ioctl_remove_key_all_users(struct file *filp, void __user *arg) { return -EOPNOTSUPP; } static inline int fscrypt_ioctl_get_key_status(struct file *filp, void __user *arg) { return -EOPNOTSUPP; } /* keysetup.c */ static inline int fscrypt_prepare_new_inode(struct inode *dir, struct inode *inode, bool *encrypt_ret) { if (IS_ENCRYPTED(dir)) return -EOPNOTSUPP; return 0; } static inline void fscrypt_put_encryption_info(struct inode *inode) { return; } static inline void fscrypt_free_inode(struct inode *inode) { } static inline int fscrypt_drop_inode(struct inode *inode) { return 0; } /* fname.c */ static inline int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, int lookup, struct fscrypt_name *fname) { if (IS_ENCRYPTED(dir)) return -EOPNOTSUPP; memset(fname, 0, sizeof(*fname)); fname->usr_fname = iname; fname->disk_name.name = (unsigned char *)iname->name; fname->disk_name.len = iname->len; return 0; } static inline void fscrypt_free_filename(struct fscrypt_name *fname) { return; } static inline int fscrypt_fname_alloc_buffer(u32 max_encrypted_len, struct fscrypt_str *crypto_str) { return -EOPNOTSUPP; } static inline void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str) { return; } static inline int fscrypt_fname_disk_to_usr(const struct inode *inode, u32 hash, u32 minor_hash, const struct fscrypt_str *iname, struct fscrypt_str *oname) { return -EOPNOTSUPP; } static inline bool fscrypt_match_name(const struct fscrypt_name *fname, const u8 *de_name, u32 de_name_len) { /* Encryption support disabled; use standard comparison */ if (de_name_len != fname->disk_name.len) return false; return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); } static inline u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name) { WARN_ON_ONCE(1); return 0; } static inline int fscrypt_d_revalidate(struct inode *dir, const struct qstr *name, struct dentry *dentry, unsigned int flags) { return 1; } /* bio.c */ static inline bool fscrypt_decrypt_bio(struct bio *bio) { return true; } static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, sector_t pblk, unsigned int len) { return -EOPNOTSUPP; } /* hooks.c */ static inline int fscrypt_file_open(struct inode *inode, struct file *filp) { if (IS_ENCRYPTED(inode)) return -EOPNOTSUPP; return 0; } static inline int __fscrypt_prepare_link(struct inode *inode, struct inode *dir, struct dentry *dentry) { return -EOPNOTSUPP; } static inline int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { return -EOPNOTSUPP; } static inline int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, struct fscrypt_name *fname) { return -EOPNOTSUPP; } static inline int fscrypt_prepare_lookup_partial(struct inode *dir, struct dentry *dentry) { return -EOPNOTSUPP; } static inline int __fscrypt_prepare_readdir(struct inode *dir) { return -EOPNOTSUPP; } static inline int __fscrypt_prepare_setattr(struct dentry *dentry, struct iattr *attr) { return -EOPNOTSUPP; } static inline int fscrypt_prepare_setflags(struct inode *inode, unsigned int oldflags, unsigned int flags) { return 0; } static inline int fscrypt_prepare_symlink(struct inode *dir, const char *target, unsigned int len, unsigned int max_len, struct fscrypt_str *disk_link) { if (IS_ENCRYPTED(dir)) return -EOPNOTSUPP; disk_link->name = (unsigned char *)target; disk_link->len = len + 1; if (disk_link->len > max_len) return -ENAMETOOLONG; return 0; } static inline int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, unsigned int len, struct fscrypt_str *disk_link) { return -EOPNOTSUPP; } static inline const char *fscrypt_get_symlink(struct inode *inode, const void *caddr, unsigned int max_size, struct delayed_call *done) { return ERR_PTR(-EOPNOTSUPP); } static inline int fscrypt_symlink_getattr(const struct path *path, struct kstat *stat) { return -EOPNOTSUPP; } static inline void fscrypt_set_ops(struct super_block *sb, const struct fscrypt_operations *s_cop) { } #endif /* !CONFIG_FS_ENCRYPTION */ /* inline_crypt.c */ #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode); void fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, u64 first_lblk, gfp_t gfp_mask); void fscrypt_set_bio_crypt_ctx_bh(struct bio *bio, const struct buffer_head *first_bh, gfp_t gfp_mask); bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, u64 next_lblk); bool fscrypt_mergeable_bio_bh(struct bio *bio, const struct buffer_head *next_bh); bool fscrypt_dio_supported(struct inode *inode); u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, u64 nr_blocks); #else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ static inline bool __fscrypt_inode_uses_inline_crypto(const struct inode *inode) { return false; } static inline void fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, u64 first_lblk, gfp_t gfp_mask) { } static inline void fscrypt_set_bio_crypt_ctx_bh( struct bio *bio, const struct buffer_head *first_bh, gfp_t gfp_mask) { } static inline bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, u64 next_lblk) { return true; } static inline bool fscrypt_mergeable_bio_bh(struct bio *bio, const struct buffer_head *next_bh) { return true; } static inline bool fscrypt_dio_supported(struct inode *inode) { return !fscrypt_needs_contents_encryption(inode); } static inline u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, u64 nr_blocks) { return nr_blocks; } #endif /* !CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ /** * fscrypt_inode_uses_inline_crypto() - test whether an inode uses inline * encryption * @inode: an inode. If encrypted, its key must be set up. * * Return: true if the inode requires file contents encryption and if the * encryption should be done in the block layer via blk-crypto rather * than in the filesystem layer. */ static inline bool fscrypt_inode_uses_inline_crypto(const struct inode *inode) { return fscrypt_needs_contents_encryption(inode) && __fscrypt_inode_uses_inline_crypto(inode); } /** * fscrypt_inode_uses_fs_layer_crypto() - test whether an inode uses fs-layer * encryption * @inode: an inode. If encrypted, its key must be set up. * * Return: true if the inode requires file contents encryption and if the * encryption should be done in the filesystem layer rather than in the * block layer via blk-crypto. */ static inline bool fscrypt_inode_uses_fs_layer_crypto(const struct inode *inode) { return fscrypt_needs_contents_encryption(inode) && !__fscrypt_inode_uses_inline_crypto(inode); } /** * fscrypt_has_encryption_key() - check whether an inode has had its key set up * @inode: the inode to check * * Return: %true if the inode has had its encryption key set up, else %false. * * Usually this should be preceded by fscrypt_get_encryption_info() to try to * set up the key first. */ static inline bool fscrypt_has_encryption_key(const struct inode *inode) { return fscrypt_get_inode_info(inode) != NULL; } /** * fscrypt_prepare_link() - prepare to link an inode into a possibly-encrypted * directory * @old_dentry: an existing dentry for the inode being linked * @dir: the target directory * @dentry: negative dentry for the target filename * * A new link can only be added to an encrypted directory if the directory's * encryption key is available --- since otherwise we'd have no way to encrypt * the filename. * * We also verify that the link will not violate the constraint that all files * in an encrypted directory tree use the same encryption policy. * * Return: 0 on success, -ENOKEY if the directory's encryption key is missing, * -EXDEV if the link would result in an inconsistent encryption policy, or * another -errno code. */ static inline int fscrypt_prepare_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { if (IS_ENCRYPTED(dir)) return __fscrypt_prepare_link(d_inode(old_dentry), dir, dentry); return 0; } /** * fscrypt_prepare_rename() - prepare for a rename between possibly-encrypted * directories * @old_dir: source directory * @old_dentry: dentry for source file * @new_dir: target directory * @new_dentry: dentry for target location (may be negative unless exchanging) * @flags: rename flags (we care at least about %RENAME_EXCHANGE) * * Prepare for ->rename() where the source and/or target directories may be * encrypted. A new link can only be added to an encrypted directory if the * directory's encryption key is available --- since otherwise we'd have no way * to encrypt the filename. A rename to an existing name, on the other hand, * *is* cryptographically possible without the key. However, we take the more * conservative approach and just forbid all no-key renames. * * We also verify that the rename will not violate the constraint that all files * in an encrypted directory tree use the same encryption policy. * * Return: 0 on success, -ENOKEY if an encryption key is missing, -EXDEV if the * rename would cause inconsistent encryption policies, or another -errno code. */ static inline int fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { if (IS_ENCRYPTED(old_dir) || IS_ENCRYPTED(new_dir)) return __fscrypt_prepare_rename(old_dir, old_dentry, new_dir, new_dentry, flags); return 0; } /** * fscrypt_prepare_lookup() - prepare to lookup a name in a possibly-encrypted * directory * @dir: directory being searched * @dentry: filename being looked up * @fname: (output) the name to use to search the on-disk directory * * Prepare for ->lookup() in a directory which may be encrypted by determining * the name that will actually be used to search the directory on-disk. If the * directory's encryption policy is supported by this kernel and its encryption * key is available, then the lookup is assumed to be by plaintext name; * otherwise, it is assumed to be by no-key name. * * This will set DCACHE_NOKEY_NAME on the dentry if the lookup is by no-key * name. In this case the filesystem must assign the dentry a dentry_operations * which contains fscrypt_d_revalidate (or contains a d_revalidate method that * calls fscrypt_d_revalidate), so that the dentry will be invalidated if the * directory's encryption key is later added. * * Return: 0 on success; -ENOENT if the directory's key is unavailable but the * filename isn't a valid no-key name, so a negative dentry should be created; * or another -errno code. */ static inline int fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, struct fscrypt_name *fname) { if (IS_ENCRYPTED(dir)) return __fscrypt_prepare_lookup(dir, dentry, fname); memset(fname, 0, sizeof(*fname)); fname->usr_fname = &dentry->d_name; fname->disk_name.name = (unsigned char *)dentry->d_name.name; fname->disk_name.len = dentry->d_name.len; fscrypt_prepare_dentry(dentry, false); return 0; } /** * fscrypt_prepare_readdir() - prepare to read a possibly-encrypted directory * @dir: the directory inode * * If the directory is encrypted and it doesn't already have its encryption key * set up, try to set it up so that the filenames will be listed in plaintext * form rather than in no-key form. * * Return: 0 on success; -errno on error. Note that the encryption key being * unavailable is not considered an error. It is also not an error if * the encryption policy is unsupported by this kernel; that is treated * like the key being unavailable, so that files can still be deleted. */ static inline int fscrypt_prepare_readdir(struct inode *dir) { if (IS_ENCRYPTED(dir)) return __fscrypt_prepare_readdir(dir); return 0; } /** * fscrypt_prepare_setattr() - prepare to change a possibly-encrypted inode's * attributes * @dentry: dentry through which the inode is being changed * @attr: attributes to change * * Prepare for ->setattr() on a possibly-encrypted inode. On an encrypted file, * most attribute changes are allowed even without the encryption key. However, * without the encryption key we do have to forbid truncates. This is needed * because the size being truncated to may not be a multiple of the filesystem * block size, and in that case we'd have to decrypt the final block, zero the * portion past i_size, and re-encrypt it. (We *could* allow truncating to a * filesystem block boundary, but it's simpler to just forbid all truncates --- * and we already forbid all other contents modifications without the key.) * * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code * if a problem occurred while setting up the encryption key. */ static inline int fscrypt_prepare_setattr(struct dentry *dentry, struct iattr *attr) { if (IS_ENCRYPTED(d_inode(dentry))) return __fscrypt_prepare_setattr(dentry, attr); return 0; } /** * fscrypt_encrypt_symlink() - encrypt the symlink target if needed * @inode: symlink inode * @target: plaintext symlink target * @len: length of @target excluding null terminator * @disk_link: (in/out) the on-disk symlink target being prepared * * If the symlink target needs to be encrypted, then this function encrypts it * into @disk_link->name. fscrypt_prepare_symlink() must have been called * previously to compute @disk_link->len. If the filesystem did not allocate a * buffer for @disk_link->name after calling fscrypt_prepare_link(), then one * will be kmalloc()'ed and the filesystem will be responsible for freeing it. * * Return: 0 on success, -errno on failure */ static inline int fscrypt_encrypt_symlink(struct inode *inode, const char *target, unsigned int len, struct fscrypt_str *disk_link) { if (IS_ENCRYPTED(inode)) return __fscrypt_encrypt_symlink(inode, target, len, disk_link); return 0; } /* If *pagep is a bounce page, free it and set *pagep to the pagecache page */ static inline void fscrypt_finalize_bounce_page(struct page **pagep) { struct page *page = *pagep; if (fscrypt_is_bounce_page(page)) { *pagep = fscrypt_pagecache_page(page); fscrypt_free_bounce_page(page); } } #endif /* _LINUX_FSCRYPT_H */ |
| 37782 964 964 904 826 1219 37609 37782 243 37618 132 1218 1218 1219 41 40185 37790 40183 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Latched RB-trees * * Copyright (C) 2015 Intel Corp., Peter Zijlstra <peterz@infradead.org> * * Since RB-trees have non-atomic modifications they're not immediately suited * for RCU/lockless queries. Even though we made RB-tree lookups non-fatal for * lockless lookups; we cannot guarantee they return a correct result. * * The simplest solution is a seqlock + RB-tree, this will allow lockless * lookups; but has the constraint (inherent to the seqlock) that read sides * cannot nest in write sides. * * If we need to allow unconditional lookups (say as required for NMI context * usage) we need a more complex setup; this data structure provides this by * employing the latch technique -- see @write_seqcount_latch_begin -- to * implement a latched RB-tree which does allow for unconditional lookups by * virtue of always having (at least) one stable copy of the tree. * * However, while we have the guarantee that there is at all times one stable * copy, this does not guarantee an iteration will not observe modifications. * What might have been a stable copy at the start of the iteration, need not * remain so for the duration of the iteration. * * Therefore, this does require a lockless RB-tree iteration to be non-fatal; * see the comment in lib/rbtree.c. Note however that we only require the first * condition -- not seeing partial stores -- because the latch thing isolates * us from loops. If we were to interrupt a modification the lookup would be * pointed at the stable tree and complete while the modification was halted. */ #ifndef RB_TREE_LATCH_H #define RB_TREE_LATCH_H #include <linux/rbtree.h> #include <linux/seqlock.h> #include <linux/rcupdate.h> struct latch_tree_node { struct rb_node node[2]; }; struct latch_tree_root { seqcount_latch_t seq; struct rb_root tree[2]; }; /** * latch_tree_ops - operators to define the tree order * @less: used for insertion; provides the (partial) order between two elements. * @comp: used for lookups; provides the order between the search key and an element. * * The operators are related like: * * comp(a->key,b) < 0 := less(a,b) * comp(a->key,b) > 0 := less(b,a) * comp(a->key,b) == 0 := !less(a,b) && !less(b,a) * * If these operators define a partial order on the elements we make no * guarantee on which of the elements matching the key is found. See * latch_tree_find(). */ struct latch_tree_ops { bool (*less)(struct latch_tree_node *a, struct latch_tree_node *b); int (*comp)(void *key, struct latch_tree_node *b); }; static __always_inline struct latch_tree_node * __lt_from_rb(struct rb_node *node, int idx) { return container_of(node, struct latch_tree_node, node[idx]); } static __always_inline void __lt_insert(struct latch_tree_node *ltn, struct latch_tree_root *ltr, int idx, bool (*less)(struct latch_tree_node *a, struct latch_tree_node *b)) { struct rb_root *root = <r->tree[idx]; struct rb_node **link = &root->rb_node; struct rb_node *node = <n->node[idx]; struct rb_node *parent = NULL; struct latch_tree_node *ltp; while (*link) { parent = *link; ltp = __lt_from_rb(parent, idx); if (less(ltn, ltp)) link = &parent->rb_left; else link = &parent->rb_right; } rb_link_node_rcu(node, parent, link); rb_insert_color(node, root); } static __always_inline void __lt_erase(struct latch_tree_node *ltn, struct latch_tree_root *ltr, int idx) { rb_erase(<n->node[idx], <r->tree[idx]); } static __always_inline struct latch_tree_node * __lt_find(void *key, struct latch_tree_root *ltr, int idx, int (*comp)(void *key, struct latch_tree_node *node)) { struct rb_node *node = rcu_dereference_raw(ltr->tree[idx].rb_node); struct latch_tree_node *ltn; int c; while (node) { ltn = __lt_from_rb(node, idx); c = comp(key, ltn); if (c < 0) node = rcu_dereference_raw(node->rb_left); else if (c > 0) node = rcu_dereference_raw(node->rb_right); else return ltn; } return NULL; } /** * latch_tree_insert() - insert @node into the trees @root * @node: nodes to insert * @root: trees to insert @node into * @ops: operators defining the node order * * It inserts @node into @root in an ordered fashion such that we can always * observe one complete tree. See the comment for write_seqcount_latch_begin(). * * The inserts use rcu_assign_pointer() to publish the element such that the * tree structure is stored before we can observe the new @node. * * All modifications (latch_tree_insert, latch_tree_remove) are assumed to be * serialized. */ static __always_inline void latch_tree_insert(struct latch_tree_node *node, struct latch_tree_root *root, const struct latch_tree_ops *ops) { write_seqcount_latch_begin(&root->seq); __lt_insert(node, root, 0, ops->less); write_seqcount_latch(&root->seq); __lt_insert(node, root, 1, ops->less); write_seqcount_latch_end(&root->seq); } /** * latch_tree_erase() - removes @node from the trees @root * @node: nodes to remote * @root: trees to remove @node from * @ops: operators defining the node order * * Removes @node from the trees @root in an ordered fashion such that we can * always observe one complete tree. See the comment for * write_seqcount_latch_begin(). * * It is assumed that @node will observe one RCU quiescent state before being * reused of freed. * * All modifications (latch_tree_insert, latch_tree_remove) are assumed to be * serialized. */ static __always_inline void latch_tree_erase(struct latch_tree_node *node, struct latch_tree_root *root, const struct latch_tree_ops *ops) { write_seqcount_latch_begin(&root->seq); __lt_erase(node, root, 0); write_seqcount_latch(&root->seq); __lt_erase(node, root, 1); write_seqcount_latch_end(&root->seq); } /** * latch_tree_find() - find the node matching @key in the trees @root * @key: search key * @root: trees to search for @key * @ops: operators defining the node order * * Does a lockless lookup in the trees @root for the node matching @key. * * It is assumed that this is called while holding the appropriate RCU read * side lock. * * If the operators define a partial order on the elements (there are multiple * elements which have the same key value) it is undefined which of these * elements will be found. Nor is it possible to iterate the tree to find * further elements with the same key value. * * Returns: a pointer to the node matching @key or NULL. */ static __always_inline struct latch_tree_node * latch_tree_find(void *key, struct latch_tree_root *root, const struct latch_tree_ops *ops) { struct latch_tree_node *node; unsigned int seq; do { seq = read_seqcount_latch(&root->seq); node = __lt_find(key, root, seq & 1, ops->comp); } while (read_seqcount_latch_retry(&root->seq, seq)); return node; } #endif /* RB_TREE_LATCH_H */ |
| 15 15 2 13 15 14 15 7 15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 | // SPDX-License-Identifier: GPL-2.0-only #include <linux/export.h> #include <linux/slab.h> #include <linux/regset.h> static int __regset_get(struct task_struct *target, const struct user_regset *regset, unsigned int size, void **data) { void *p = *data, *to_free = NULL; int res; if (!regset->regset_get) return -EOPNOTSUPP; if (size > regset->n * regset->size) size = regset->n * regset->size; if (!p) { to_free = p = kvzalloc(size, GFP_KERNEL); if (!p) return -ENOMEM; } res = regset->regset_get(target, regset, (struct membuf){.p = p, .left = size}); if (res < 0) { kvfree(to_free); return res; } *data = p; return size - res; } int regset_get(struct task_struct *target, const struct user_regset *regset, unsigned int size, void *data) { return __regset_get(target, regset, size, &data); } EXPORT_SYMBOL(regset_get); int regset_get_alloc(struct task_struct *target, const struct user_regset *regset, unsigned int size, void **data) { *data = NULL; return __regset_get(target, regset, size, data); } EXPORT_SYMBOL(regset_get_alloc); /** * copy_regset_to_user - fetch a thread's user_regset data into user memory * @target: thread to be examined * @view: &struct user_regset_view describing user thread machine state * @setno: index in @view->regsets * @offset: offset into the regset data, in bytes * @size: amount of data to copy, in bytes * @data: user-mode pointer to copy into */ int copy_regset_to_user(struct task_struct *target, const struct user_regset_view *view, unsigned int setno, unsigned int offset, unsigned int size, void __user *data) { const struct user_regset *regset = &view->regsets[setno]; void *buf; int ret; ret = regset_get_alloc(target, regset, size, &buf); if (ret > 0) ret = copy_to_user(data, buf, ret) ? -EFAULT : 0; kvfree(buf); return ret; } |
| 597 8 19 23 7 18 108 655 1 1 4 4 3 3 179 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 | /* SPDX-License-Identifier: GPL-2.0 */ #if !defined(_TRACE_KVM_MAIN_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_KVM_MAIN_H #include <linux/tracepoint.h> #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm #define ERSN(x) { KVM_EXIT_##x, "KVM_EXIT_" #x } #define kvm_trace_exit_reason \ ERSN(UNKNOWN), ERSN(EXCEPTION), ERSN(IO), ERSN(HYPERCALL), \ ERSN(DEBUG), ERSN(HLT), ERSN(MMIO), ERSN(IRQ_WINDOW_OPEN), \ ERSN(SHUTDOWN), ERSN(FAIL_ENTRY), ERSN(INTR), ERSN(SET_TPR), \ ERSN(TPR_ACCESS), ERSN(S390_SIEIC), ERSN(S390_RESET), ERSN(DCR),\ ERSN(NMI), ERSN(INTERNAL_ERROR), ERSN(OSI), ERSN(PAPR_HCALL), \ ERSN(S390_UCONTROL), ERSN(WATCHDOG), ERSN(S390_TSCH), ERSN(EPR),\ ERSN(SYSTEM_EVENT), ERSN(S390_STSI), ERSN(IOAPIC_EOI), \ ERSN(HYPERV), ERSN(ARM_NISV), ERSN(X86_RDMSR), ERSN(X86_WRMSR) TRACE_EVENT(kvm_userspace_exit, TP_PROTO(__u32 reason, int errno), TP_ARGS(reason, errno), TP_STRUCT__entry( __field( __u32, reason ) __field( int, errno ) ), TP_fast_assign( __entry->reason = reason; __entry->errno = errno; ), TP_printk("reason %s (%d)", __entry->errno < 0 ? (__entry->errno == -EINTR ? "restart" : "error") : __print_symbolic(__entry->reason, kvm_trace_exit_reason), __entry->errno < 0 ? -__entry->errno : __entry->reason) ); TRACE_EVENT(kvm_vcpu_wakeup, TP_PROTO(__u64 ns, bool waited, bool valid), TP_ARGS(ns, waited, valid), TP_STRUCT__entry( __field( __u64, ns ) __field( bool, waited ) __field( bool, valid ) ), TP_fast_assign( __entry->ns = ns; __entry->waited = waited; __entry->valid = valid; ), TP_printk("%s time %lld ns, polling %s", __entry->waited ? "wait" : "poll", __entry->ns, __entry->valid ? "valid" : "invalid") ); #if defined(CONFIG_HAVE_KVM_IRQCHIP) TRACE_EVENT(kvm_set_irq, TP_PROTO(unsigned int gsi, int level, int irq_source_id), TP_ARGS(gsi, level, irq_source_id), TP_STRUCT__entry( __field( unsigned int, gsi ) __field( int, level ) __field( int, irq_source_id ) ), TP_fast_assign( __entry->gsi = gsi; __entry->level = level; __entry->irq_source_id = irq_source_id; ), TP_printk("gsi %u level %d source %d", __entry->gsi, __entry->level, __entry->irq_source_id) ); #endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */ #if defined(__KVM_HAVE_IOAPIC) #define kvm_deliver_mode \ {0x0, "Fixed"}, \ {0x1, "LowPrio"}, \ {0x2, "SMI"}, \ {0x3, "Res3"}, \ {0x4, "NMI"}, \ {0x5, "INIT"}, \ {0x6, "SIPI"}, \ {0x7, "ExtINT"} TRACE_EVENT(kvm_ioapic_set_irq, TP_PROTO(__u64 e, int pin, bool coalesced), TP_ARGS(e, pin, coalesced), TP_STRUCT__entry( __field( __u64, e ) __field( int, pin ) __field( bool, coalesced ) ), TP_fast_assign( __entry->e = e; __entry->pin = pin; __entry->coalesced = coalesced; ), TP_printk("pin %u dst %x vec %u (%s|%s|%s%s)%s", __entry->pin, (u8)(__entry->e >> 56), (u8)__entry->e, __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), (__entry->e & (1<<11)) ? "logical" : "physical", (__entry->e & (1<<15)) ? "level" : "edge", (__entry->e & (1<<16)) ? "|masked" : "", __entry->coalesced ? " (coalesced)" : "") ); TRACE_EVENT(kvm_ioapic_delayed_eoi_inj, TP_PROTO(__u64 e), TP_ARGS(e), TP_STRUCT__entry( __field( __u64, e ) ), TP_fast_assign( __entry->e = e; ), TP_printk("dst %x vec %u (%s|%s|%s%s)", (u8)(__entry->e >> 56), (u8)__entry->e, __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), (__entry->e & (1<<11)) ? "logical" : "physical", (__entry->e & (1<<15)) ? "level" : "edge", (__entry->e & (1<<16)) ? "|masked" : "") ); TRACE_EVENT(kvm_msi_set_irq, TP_PROTO(__u64 address, __u64 data), TP_ARGS(address, data), TP_STRUCT__entry( __field( __u64, address ) __field( __u64, data ) ), TP_fast_assign( __entry->address = address; __entry->data = data; ), TP_printk("dst %llx vec %u (%s|%s|%s%s)", (u8)(__entry->address >> 12) | ((__entry->address >> 32) & 0xffffff00), (u8)__entry->data, __print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode), (__entry->address & (1<<2)) ? "logical" : "physical", (__entry->data & (1<<15)) ? "level" : "edge", (__entry->address & (1<<3)) ? "|rh" : "") ); #define kvm_irqchips \ {KVM_IRQCHIP_PIC_MASTER, "PIC master"}, \ {KVM_IRQCHIP_PIC_SLAVE, "PIC slave"}, \ {KVM_IRQCHIP_IOAPIC, "IOAPIC"} #endif /* defined(__KVM_HAVE_IOAPIC) */ #if defined(CONFIG_HAVE_KVM_IRQCHIP) #ifdef kvm_irqchips #define kvm_ack_irq_string "irqchip %s pin %u" #define kvm_ack_irq_parm __print_symbolic(__entry->irqchip, kvm_irqchips), __entry->pin #else #define kvm_ack_irq_string "irqchip %d pin %u" #define kvm_ack_irq_parm __entry->irqchip, __entry->pin #endif TRACE_EVENT(kvm_ack_irq, TP_PROTO(unsigned int irqchip, unsigned int pin), TP_ARGS(irqchip, pin), TP_STRUCT__entry( __field( unsigned int, irqchip ) __field( unsigned int, pin ) ), TP_fast_assign( __entry->irqchip = irqchip; __entry->pin = pin; ), TP_printk(kvm_ack_irq_string, kvm_ack_irq_parm) ); #endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */ #define KVM_TRACE_MMIO_READ_UNSATISFIED 0 #define KVM_TRACE_MMIO_READ 1 #define KVM_TRACE_MMIO_WRITE 2 #define kvm_trace_symbol_mmio \ { KVM_TRACE_MMIO_READ_UNSATISFIED, "unsatisfied-read" }, \ { KVM_TRACE_MMIO_READ, "read" }, \ { KVM_TRACE_MMIO_WRITE, "write" } TRACE_EVENT(kvm_mmio, TP_PROTO(int type, int len, u64 gpa, void *val), TP_ARGS(type, len, gpa, val), TP_STRUCT__entry( __field( u32, type ) __field( u32, len ) __field( u64, gpa ) __field( u64, val ) ), TP_fast_assign( __entry->type = type; __entry->len = len; __entry->gpa = gpa; __entry->val = 0; if (val) memcpy(&__entry->val, val, min_t(u32, sizeof(__entry->val), len)); ), TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx", __print_symbolic(__entry->type, kvm_trace_symbol_mmio), __entry->len, __entry->gpa, __entry->val) ); #define KVM_TRACE_IOCSR_READ_UNSATISFIED 0 #define KVM_TRACE_IOCSR_READ 1 #define KVM_TRACE_IOCSR_WRITE 2 #define kvm_trace_symbol_iocsr \ { KVM_TRACE_IOCSR_READ_UNSATISFIED, "unsatisfied-read" }, \ { KVM_TRACE_IOCSR_READ, "read" }, \ { KVM_TRACE_IOCSR_WRITE, "write" } TRACE_EVENT(kvm_iocsr, TP_PROTO(int type, int len, u64 gpa, void *val), TP_ARGS(type, len, gpa, val), TP_STRUCT__entry( __field( u32, type ) __field( u32, len ) __field( u64, gpa ) __field( u64, val ) ), TP_fast_assign( __entry->type = type; __entry->len = len; __entry->gpa = gpa; __entry->val = 0; if (val) memcpy(&__entry->val, val, min_t(u32, sizeof(__entry->val), len)); ), TP_printk("iocsr %s len %u gpa 0x%llx val 0x%llx", __print_symbolic(__entry->type, kvm_trace_symbol_iocsr), __entry->len, __entry->gpa, __entry->val) ); #define kvm_fpu_load_symbol \ {0, "unload"}, \ {1, "load"} TRACE_EVENT(kvm_fpu, TP_PROTO(int load), TP_ARGS(load), TP_STRUCT__entry( __field( u32, load ) ), TP_fast_assign( __entry->load = load; ), TP_printk("%s", __print_symbolic(__entry->load, kvm_fpu_load_symbol)) ); #ifdef CONFIG_KVM_ASYNC_PF DECLARE_EVENT_CLASS(kvm_async_get_page_class, TP_PROTO(u64 gva, u64 gfn), TP_ARGS(gva, gfn), TP_STRUCT__entry( __field(__u64, gva) __field(u64, gfn) ), TP_fast_assign( __entry->gva = gva; __entry->gfn = gfn; ), TP_printk("gva = %#llx, gfn = %#llx", __entry->gva, __entry->gfn) ); DEFINE_EVENT(kvm_async_get_page_class, kvm_try_async_get_page, TP_PROTO(u64 gva, u64 gfn), TP_ARGS(gva, gfn) ); DEFINE_EVENT(kvm_async_get_page_class, kvm_async_pf_repeated_fault, TP_PROTO(u64 gva, u64 gfn), TP_ARGS(gva, gfn) ); DECLARE_EVENT_CLASS(kvm_async_pf_nopresent_ready, TP_PROTO(u64 token, u64 gva), TP_ARGS(token, gva), TP_STRUCT__entry( __field(__u64, token) __field(__u64, gva) ), TP_fast_assign( __entry->token = token; __entry->gva = gva; ), TP_printk("token %#llx gva %#llx", __entry->token, __entry->gva) ); DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_not_present, TP_PROTO(u64 token, u64 gva), TP_ARGS(token, gva) ); DEFINE_EVENT(kvm_async_pf_nopresent_ready, kvm_async_pf_ready, TP_PROTO(u64 token, u64 gva), TP_ARGS(token, gva) ); TRACE_EVENT( kvm_async_pf_completed, TP_PROTO(unsigned long address, u64 gva), TP_ARGS(address, gva), TP_STRUCT__entry( __field(unsigned long, address) __field(u64, gva) ), TP_fast_assign( __entry->address = address; __entry->gva = gva; ), TP_printk("gva %#llx address %#lx", __entry->gva, __entry->address) ); #endif TRACE_EVENT(kvm_halt_poll_ns, TP_PROTO(bool grow, unsigned int vcpu_id, unsigned int new, unsigned int old), TP_ARGS(grow, vcpu_id, new, old), TP_STRUCT__entry( __field(bool, grow) __field(unsigned int, vcpu_id) __field(unsigned int, new) __field(unsigned int, old) ), TP_fast_assign( __entry->grow = grow; __entry->vcpu_id = vcpu_id; __entry->new = new; __entry->old = old; ), TP_printk("vcpu %u: halt_poll_ns %u (%s %u)", __entry->vcpu_id, __entry->new, __entry->grow ? "grow" : "shrink", __entry->old) ); #define trace_kvm_halt_poll_ns_grow(vcpu_id, new, old) \ trace_kvm_halt_poll_ns(true, vcpu_id, new, old) #define trace_kvm_halt_poll_ns_shrink(vcpu_id, new, old) \ trace_kvm_halt_poll_ns(false, vcpu_id, new, old) TRACE_EVENT(kvm_dirty_ring_push, TP_PROTO(struct kvm_dirty_ring *ring, u32 slot, u64 offset), TP_ARGS(ring, slot, offset), TP_STRUCT__entry( __field(int, index) __field(u32, dirty_index) __field(u32, reset_index) __field(u32, slot) __field(u64, offset) ), TP_fast_assign( __entry->index = ring->index; __entry->dirty_index = ring->dirty_index; __entry->reset_index = ring->reset_index; __entry->slot = slot; __entry->offset = offset; ), TP_printk("ring %d: dirty 0x%x reset 0x%x " "slot %u offset 0x%llx (used %u)", __entry->index, __entry->dirty_index, __entry->reset_index, __entry->slot, __entry->offset, __entry->dirty_index - __entry->reset_index) ); TRACE_EVENT(kvm_dirty_ring_reset, TP_PROTO(struct kvm_dirty_ring *ring), TP_ARGS(ring), TP_STRUCT__entry( __field(int, index) __field(u32, dirty_index) __field(u32, reset_index) ), TP_fast_assign( __entry->index = ring->index; __entry->dirty_index = ring->dirty_index; __entry->reset_index = ring->reset_index; ), TP_printk("ring %d: dirty 0x%x reset 0x%x (used %u)", __entry->index, __entry->dirty_index, __entry->reset_index, __entry->dirty_index - __entry->reset_index) ); TRACE_EVENT(kvm_dirty_ring_exit, TP_PROTO(struct kvm_vcpu *vcpu), TP_ARGS(vcpu), TP_STRUCT__entry( __field(int, vcpu_id) ), TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; ), TP_printk("vcpu %d", __entry->vcpu_id) ); TRACE_EVENT(kvm_unmap_hva_range, TP_PROTO(unsigned long start, unsigned long end), TP_ARGS(start, end), TP_STRUCT__entry( __field( unsigned long, start ) __field( unsigned long, end ) ), TP_fast_assign( __entry->start = start; __entry->end = end; ), TP_printk("mmu notifier unmap range: %#016lx -- %#016lx", __entry->start, __entry->end) ); TRACE_EVENT(kvm_age_hva, TP_PROTO(unsigned long start, unsigned long end), TP_ARGS(start, end), TP_STRUCT__entry( __field( unsigned long, start ) __field( unsigned long, end ) ), TP_fast_assign( __entry->start = start; __entry->end = end; ), TP_printk("mmu notifier age hva: %#016lx -- %#016lx", __entry->start, __entry->end) ); TRACE_EVENT(kvm_test_age_hva, TP_PROTO(unsigned long hva), TP_ARGS(hva), TP_STRUCT__entry( __field( unsigned long, hva ) ), TP_fast_assign( __entry->hva = hva; ), TP_printk("mmu notifier test age hva: %#016lx", __entry->hva) ); #endif /* _TRACE_KVM_MAIN_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
| 8 8 8 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 | // SPDX-License-Identifier: GPL-2.0-or-later /* RxRPC virtual connection handler, common bits. * * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/slab.h> #include <linux/net.h> #include <linux/skbuff.h> #include "ar-internal.h" /* * Time till a connection expires after last use (in seconds). */ unsigned int __read_mostly rxrpc_connection_expiry = 10 * 60; unsigned int __read_mostly rxrpc_closed_conn_expiry = 10; static void rxrpc_clean_up_connection(struct work_struct *work); static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet, unsigned long reap_at); void rxrpc_poke_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why) { struct rxrpc_local *local = conn->local; bool busy; if (WARN_ON_ONCE(!local)) return; spin_lock_irq(&local->lock); busy = !list_empty(&conn->attend_link); if (!busy) { rxrpc_get_connection(conn, why); list_add_tail(&conn->attend_link, &local->conn_attend_q); } spin_unlock_irq(&local->lock); rxrpc_wake_up_io_thread(local); } static void rxrpc_connection_timer(struct timer_list *timer) { struct rxrpc_connection *conn = container_of(timer, struct rxrpc_connection, timer); rxrpc_poke_conn(conn, rxrpc_conn_get_poke_timer); } /* * allocate a new connection */ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet, gfp_t gfp) { struct rxrpc_connection *conn; _enter(""); conn = kzalloc(sizeof(struct rxrpc_connection), gfp); if (conn) { INIT_LIST_HEAD(&conn->cache_link); timer_setup(&conn->timer, &rxrpc_connection_timer, 0); INIT_WORK(&conn->processor, rxrpc_process_connection); INIT_WORK(&conn->destructor, rxrpc_clean_up_connection); INIT_LIST_HEAD(&conn->proc_link); INIT_LIST_HEAD(&conn->link); INIT_LIST_HEAD(&conn->attend_link); mutex_init(&conn->security_lock); mutex_init(&conn->tx_data_alloc_lock); skb_queue_head_init(&conn->rx_queue); conn->rxnet = rxnet; conn->security = &rxrpc_no_security; rwlock_init(&conn->security_use_lock); spin_lock_init(&conn->state_lock); conn->debug_id = atomic_inc_return(&rxrpc_debug_id); conn->idle_timestamp = jiffies; } _leave(" = %p{%d}", conn, conn ? conn->debug_id : 0); return conn; } /* * Look up a connection in the cache by protocol parameters. * * If successful, a pointer to the connection is returned, but no ref is taken. * NULL is returned if there is no match. * * When searching for a service call, if we find a peer but no connection, we * return that through *_peer in case we need to create a new service call. * * The caller must be holding the RCU read lock. */ struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *local, struct sockaddr_rxrpc *srx, struct sk_buff *skb) { struct rxrpc_connection *conn; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_peer *peer; _enter(",%x", sp->hdr.cid & RXRPC_CIDMASK); /* Look up client connections by connection ID alone as their * IDs are unique for this machine. */ conn = idr_find(&local->conn_ids, sp->hdr.cid >> RXRPC_CIDSHIFT); if (!conn || refcount_read(&conn->ref) == 0) { _debug("no conn"); goto not_found; } if (conn->proto.epoch != sp->hdr.epoch || conn->local != local) goto not_found; peer = conn->peer; switch (srx->transport.family) { case AF_INET: if (peer->srx.transport.sin.sin_port != srx->transport.sin.sin_port) goto not_found; break; #ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: if (peer->srx.transport.sin6.sin6_port != srx->transport.sin6.sin6_port) goto not_found; break; #endif default: BUG(); } _leave(" = %p", conn); return conn; not_found: _leave(" = NULL"); return NULL; } /* * Disconnect a call and clear any channel it occupies when that call * terminates. The caller must hold the channel_lock and must release the * call's ref on the connection. */ void __rxrpc_disconnect_call(struct rxrpc_connection *conn, struct rxrpc_call *call) { struct rxrpc_channel *chan = &conn->channels[call->cid & RXRPC_CHANNELMASK]; _enter("%d,%x", conn->debug_id, call->cid); if (chan->call == call) { /* Save the result of the call so that we can repeat it if necessary * through the channel, whilst disposing of the actual call record. */ trace_rxrpc_disconnect_call(call); switch (call->completion) { case RXRPC_CALL_SUCCEEDED: chan->last_seq = call->rx_highest_seq; chan->last_type = RXRPC_PACKET_TYPE_ACK; break; case RXRPC_CALL_LOCALLY_ABORTED: chan->last_abort = call->abort_code; chan->last_type = RXRPC_PACKET_TYPE_ABORT; break; default: chan->last_abort = RX_CALL_DEAD; chan->last_type = RXRPC_PACKET_TYPE_ABORT; break; } chan->last_call = chan->call_id; chan->call_id = chan->call_counter; chan->call = NULL; } _leave(""); } /* * Disconnect a call and clear any channel it occupies when that call * terminates. */ void rxrpc_disconnect_call(struct rxrpc_call *call) { struct rxrpc_connection *conn = call->conn; set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); rxrpc_see_call(call, rxrpc_call_see_disconnected); call->peer->cong_ssthresh = call->cong_ssthresh; if (!hlist_unhashed(&call->error_link)) { spin_lock_irq(&call->peer->lock); hlist_del_init(&call->error_link); spin_unlock_irq(&call->peer->lock); } if (rxrpc_is_client_call(call)) { rxrpc_disconnect_client_call(call->bundle, call); } else { __rxrpc_disconnect_call(conn, call); conn->idle_timestamp = jiffies; if (atomic_dec_and_test(&conn->active)) rxrpc_set_service_reap_timer(conn->rxnet, jiffies + rxrpc_connection_expiry * HZ); } rxrpc_put_call(call, rxrpc_call_put_io_thread); } /* * Queue a connection's work processor, getting a ref to pass to the work * queue. */ void rxrpc_queue_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why) { if (atomic_read(&conn->active) >= 0 && rxrpc_queue_work(&conn->processor)) rxrpc_see_connection(conn, why); } /* * Note the re-emergence of a connection. */ void rxrpc_see_connection(struct rxrpc_connection *conn, enum rxrpc_conn_trace why) { if (conn) { int r = refcount_read(&conn->ref); trace_rxrpc_conn(conn->debug_id, r, why); } } /* * Get a ref on a connection. */ struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *conn, enum rxrpc_conn_trace why) { int r; __refcount_inc(&conn->ref, &r); trace_rxrpc_conn(conn->debug_id, r + 1, why); return conn; } /* * Try to get a ref on a connection. */ struct rxrpc_connection * rxrpc_get_connection_maybe(struct rxrpc_connection *conn, enum rxrpc_conn_trace why) { int r; if (conn) { if (__refcount_inc_not_zero(&conn->ref, &r)) trace_rxrpc_conn(conn->debug_id, r + 1, why); else conn = NULL; } return conn; } /* * Set the service connection reap timer. */ static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet, unsigned long reap_at) { if (rxnet->live) timer_reduce(&rxnet->service_conn_reap_timer, reap_at); } /* * destroy a virtual connection */ static void rxrpc_rcu_free_connection(struct rcu_head *rcu) { struct rxrpc_connection *conn = container_of(rcu, struct rxrpc_connection, rcu); struct rxrpc_net *rxnet = conn->rxnet; _enter("{%d,u=%d}", conn->debug_id, refcount_read(&conn->ref)); trace_rxrpc_conn(conn->debug_id, refcount_read(&conn->ref), rxrpc_conn_free); kfree(conn); if (atomic_dec_and_test(&rxnet->nr_conns)) wake_up_var(&rxnet->nr_conns); } /* * Clean up a dead connection. */ static void rxrpc_clean_up_connection(struct work_struct *work) { struct rxrpc_connection *conn = container_of(work, struct rxrpc_connection, destructor); struct rxrpc_net *rxnet = conn->rxnet; ASSERT(!conn->channels[0].call && !conn->channels[1].call && !conn->channels[2].call && !conn->channels[3].call); ASSERT(list_empty(&conn->cache_link)); timer_delete_sync(&conn->timer); cancel_work_sync(&conn->processor); /* Processing may restart the timer */ timer_delete_sync(&conn->timer); write_lock(&rxnet->conn_lock); list_del_init(&conn->proc_link); write_unlock(&rxnet->conn_lock); if (conn->pmtud_probe) { trace_rxrpc_pmtud_lost(conn, 0); conn->peer->pmtud_probing = false; conn->peer->pmtud_pending = true; } rxrpc_purge_queue(&conn->rx_queue); rxrpc_free_skb(conn->tx_response, rxrpc_skb_put_response); rxrpc_kill_client_conn(conn); conn->security->clear(conn); key_put(conn->key); rxrpc_put_bundle(conn->bundle, rxrpc_bundle_put_conn); rxrpc_put_peer(conn->peer, rxrpc_peer_put_conn); rxrpc_put_local(conn->local, rxrpc_local_put_kill_conn); /* Drain the Rx queue. Note that even though we've unpublished, an * incoming packet could still be being added to our Rx queue, so we * will need to drain it again in the RCU cleanup handler. */ rxrpc_purge_queue(&conn->rx_queue); page_frag_cache_drain(&conn->tx_data_alloc); call_rcu(&conn->rcu, rxrpc_rcu_free_connection); } /* * Drop a ref on a connection. */ void rxrpc_put_connection(struct rxrpc_connection *conn, enum rxrpc_conn_trace why) { unsigned int debug_id; bool dead; int r; if (!conn) return; debug_id = conn->debug_id; dead = __refcount_dec_and_test(&conn->ref, &r); trace_rxrpc_conn(debug_id, r - 1, why); if (dead) { timer_delete(&conn->timer); cancel_work(&conn->processor); if (in_softirq() || work_busy(&conn->processor) || timer_pending(&conn->timer)) /* Can't use the rxrpc workqueue as we need to cancel/flush * something that may be running/waiting there. */ schedule_work(&conn->destructor); else rxrpc_clean_up_connection(&conn->destructor); } } /* * reap dead service connections */ void rxrpc_service_connection_reaper(struct work_struct *work) { struct rxrpc_connection *conn, *_p; struct rxrpc_net *rxnet = container_of(work, struct rxrpc_net, service_conn_reaper); unsigned long expire_at, earliest, idle_timestamp, now; int active; LIST_HEAD(graveyard); _enter(""); now = jiffies; earliest = now + MAX_JIFFY_OFFSET; write_lock(&rxnet->conn_lock); list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { ASSERTCMP(atomic_read(&conn->active), >=, 0); if (likely(atomic_read(&conn->active) > 0)) continue; if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) continue; if (rxnet->live && !conn->local->dead) { idle_timestamp = READ_ONCE(conn->idle_timestamp); expire_at = idle_timestamp + rxrpc_connection_expiry * HZ; if (conn->local->service_closed) expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ; _debug("reap CONN %d { a=%d,t=%ld }", conn->debug_id, atomic_read(&conn->active), (long)expire_at - (long)now); if (time_before(now, expire_at)) { if (time_before(expire_at, earliest)) earliest = expire_at; continue; } } /* The activity count sits at 0 whilst the conn is unused on * the list; we reduce that to -1 to make the conn unavailable. */ active = 0; if (!atomic_try_cmpxchg(&conn->active, &active, -1)) continue; rxrpc_see_connection(conn, rxrpc_conn_see_reap_service); if (rxrpc_conn_is_client(conn)) BUG(); else rxrpc_unpublish_service_conn(conn); list_move_tail(&conn->link, &graveyard); } write_unlock(&rxnet->conn_lock); if (earliest != now + MAX_JIFFY_OFFSET) { _debug("reschedule reaper %ld", (long)earliest - (long)now); ASSERT(time_after(earliest, now)); rxrpc_set_service_reap_timer(rxnet, earliest); } while (!list_empty(&graveyard)) { conn = list_entry(graveyard.next, struct rxrpc_connection, link); list_del_init(&conn->link); ASSERTCMP(atomic_read(&conn->active), ==, -1); rxrpc_put_connection(conn, rxrpc_conn_put_service_reaped); } _leave(""); } /* * preemptively destroy all the service connection records rather than * waiting for them to time out */ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet) { struct rxrpc_connection *conn, *_p; bool leak = false; _enter(""); atomic_dec(&rxnet->nr_conns); timer_delete_sync(&rxnet->service_conn_reap_timer); rxrpc_queue_work(&rxnet->service_conn_reaper); flush_workqueue(rxrpc_workqueue); write_lock(&rxnet->conn_lock); list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { pr_err("AF_RXRPC: Leaked conn %p {%d}\n", conn, refcount_read(&conn->ref)); leak = true; } write_unlock(&rxnet->conn_lock); BUG_ON(leak); ASSERT(list_empty(&rxnet->conn_proc_list)); /* We need to wait for the connections to be destroyed by RCU as they * pin things that we still need to get rid of. */ wait_var_event(&rxnet->nr_conns, !atomic_read(&rxnet->nr_conns)); _leave(""); } |
| 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 | /* * CoreChip-sz SR9700 one chip USB 1.1 Ethernet Devices * * Author : Liu Junliang <liujunliang_ljl@163.com> * * Based on dm9601.c * * This file is licensed under the terms of the GNU General Public License * version 2. This program is licensed "as is" without any warranty of any * kind, whether express or implied. */ #include <linux/module.h> #include <linux/sched.h> #include <linux/stddef.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/ethtool.h> #include <linux/mii.h> #include <linux/usb.h> #include <linux/crc32.h> #include <linux/usb/usbnet.h> #include "sr9700.h" static int sr_read(struct usbnet *dev, u8 reg, u16 length, void *data) { int err; err = usbnet_read_cmd(dev, SR_RD_REGS, SR_REQ_RD_REG, 0, reg, data, length); if ((err != length) && (err >= 0)) err = -EINVAL; return err; } static int sr_write(struct usbnet *dev, u8 reg, u16 length, void *data) { int err; err = usbnet_write_cmd(dev, SR_WR_REGS, SR_REQ_WR_REG, 0, reg, data, length); if ((err >= 0) && (err < length)) err = -EINVAL; return err; } static int sr_read_reg(struct usbnet *dev, u8 reg, u8 *value) { return sr_read(dev, reg, 1, value); } static int sr_write_reg(struct usbnet *dev, u8 reg, u8 value) { return usbnet_write_cmd(dev, SR_WR_REGS, SR_REQ_WR_REG, value, reg, NULL, 0); } static void sr_write_async(struct usbnet *dev, u8 reg, u16 length, const void *data) { usbnet_write_cmd_async(dev, SR_WR_REGS, SR_REQ_WR_REG, 0, reg, data, length); } static void sr_write_reg_async(struct usbnet *dev, u8 reg, u8 value) { usbnet_write_cmd_async(dev, SR_WR_REGS, SR_REQ_WR_REG, value, reg, NULL, 0); } static int wait_phy_eeprom_ready(struct usbnet *dev, int phy) { int i; for (i = 0; i < SR_SHARE_TIMEOUT; i++) { u8 tmp = 0; int ret; udelay(1); ret = sr_read_reg(dev, SR_EPCR, &tmp); if (ret < 0) return ret; /* ready */ if (!(tmp & EPCR_ERRE)) return 0; } netdev_err(dev->net, "%s write timed out!\n", phy ? "phy" : "eeprom"); return -EIO; } static int sr_share_read_word(struct usbnet *dev, int phy, u8 reg, __le16 *value) { int ret; mutex_lock(&dev->phy_mutex); sr_write_reg(dev, SR_EPAR, phy ? (reg | EPAR_PHY_ADR) : reg); sr_write_reg(dev, SR_EPCR, phy ? (EPCR_EPOS | EPCR_ERPRR) : EPCR_ERPRR); ret = wait_phy_eeprom_ready(dev, phy); if (ret < 0) goto out_unlock; sr_write_reg(dev, SR_EPCR, 0x0); ret = sr_read(dev, SR_EPDR, 2, value); netdev_dbg(dev->net, "read shared %d 0x%02x returned 0x%04x, %d\n", phy, reg, *value, ret); out_unlock: mutex_unlock(&dev->phy_mutex); return ret; } static int sr_share_write_word(struct usbnet *dev, int phy, u8 reg, __le16 value) { int ret; mutex_lock(&dev->phy_mutex); ret = sr_write(dev, SR_EPDR, 2, &value); if (ret < 0) goto out_unlock; sr_write_reg(dev, SR_EPAR, phy ? (reg | EPAR_PHY_ADR) : reg); sr_write_reg(dev, SR_EPCR, phy ? (EPCR_WEP | EPCR_EPOS | EPCR_ERPRW) : (EPCR_WEP | EPCR_ERPRW)); ret = wait_phy_eeprom_ready(dev, phy); if (ret < 0) goto out_unlock; sr_write_reg(dev, SR_EPCR, 0x0); out_unlock: mutex_unlock(&dev->phy_mutex); return ret; } static int sr_read_eeprom_word(struct usbnet *dev, u8 offset, void *value) { return sr_share_read_word(dev, 0, offset, value); } static int sr9700_get_eeprom_len(struct net_device *netdev) { return SR_EEPROM_LEN; } static int sr9700_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom, u8 *data) { struct usbnet *dev = netdev_priv(netdev); __le16 *buf = (__le16 *)data; int ret = 0; int i; /* access is 16bit */ if ((eeprom->offset & 0x01) || (eeprom->len & 0x01)) return -EINVAL; for (i = 0; i < eeprom->len / 2; i++) { ret = sr_read_eeprom_word(dev, eeprom->offset / 2 + i, buf + i); if (ret < 0) break; } return ret; } static int sr_mdio_read(struct net_device *netdev, int phy_id, int loc) { struct usbnet *dev = netdev_priv(netdev); int err, res; __le16 word; int rc = 0; if (phy_id) { netdev_dbg(netdev, "Only internal phy supported\n"); return 0; } /* Access NSR_LINKST bit for link status instead of MII_BMSR */ if (loc == MII_BMSR) { u8 value; err = sr_read_reg(dev, SR_NSR, &value); if (err < 0) return err; if (value & NSR_LINKST) rc = 1; } err = sr_share_read_word(dev, 1, loc, &word); if (err < 0) return err; if (rc == 1) res = le16_to_cpu(word) | BMSR_LSTATUS; else res = le16_to_cpu(word) & ~BMSR_LSTATUS; netdev_dbg(netdev, "sr_mdio_read() phy_id=0x%02x, loc=0x%02x, returns=0x%04x\n", phy_id, loc, res); return res; } static void sr_mdio_write(struct net_device *netdev, int phy_id, int loc, int val) { struct usbnet *dev = netdev_priv(netdev); __le16 res = cpu_to_le16(val); if (phy_id) { netdev_dbg(netdev, "Only internal phy supported\n"); return; } netdev_dbg(netdev, "sr_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x\n", phy_id, loc, val); sr_share_write_word(dev, 1, loc, res); } static u32 sr9700_get_link(struct net_device *netdev) { struct usbnet *dev = netdev_priv(netdev); u8 value = 0; int rc = 0; /* Get the Link Status directly */ sr_read_reg(dev, SR_NSR, &value); if (value & NSR_LINKST) rc = 1; return rc; } static int sr9700_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd) { struct usbnet *dev = netdev_priv(netdev); return generic_mii_ioctl(&dev->mii, if_mii(rq), cmd, NULL); } static const struct ethtool_ops sr9700_ethtool_ops = { .get_drvinfo = usbnet_get_drvinfo, .get_link = sr9700_get_link, .get_msglevel = usbnet_get_msglevel, .set_msglevel = usbnet_set_msglevel, .get_eeprom_len = sr9700_get_eeprom_len, .get_eeprom = sr9700_get_eeprom, .nway_reset = usbnet_nway_reset, .get_link_ksettings = usbnet_get_link_ksettings_mii, .set_link_ksettings = usbnet_set_link_ksettings_mii, }; static void sr9700_set_multicast(struct net_device *netdev) { struct usbnet *dev = netdev_priv(netdev); /* We use the 20 byte dev->data for our 8 byte filter buffer * to avoid allocating memory that is tricky to free later */ u8 *hashes = (u8 *)&dev->data; /* rx_ctl setting : enable, disable_long, disable_crc */ u8 rx_ctl = RCR_RXEN | RCR_DIS_CRC | RCR_DIS_LONG; memset(hashes, 0x00, SR_MCAST_SIZE); /* broadcast address */ hashes[SR_MCAST_SIZE - 1] |= SR_MCAST_ADDR_FLAG; if (netdev->flags & IFF_PROMISC) { rx_ctl |= RCR_PRMSC; } else if (netdev->flags & IFF_ALLMULTI || netdev_mc_count(netdev) > SR_MCAST_MAX) { rx_ctl |= RCR_RUNT; } else if (!netdev_mc_empty(netdev)) { struct netdev_hw_addr *ha; netdev_for_each_mc_addr(ha, netdev) { u32 crc = ether_crc(ETH_ALEN, ha->addr) >> 26; hashes[crc >> 3] |= 1 << (crc & 0x7); } } sr_write_async(dev, SR_MAR, SR_MCAST_SIZE, hashes); sr_write_reg_async(dev, SR_RCR, rx_ctl); } static int sr9700_set_mac_address(struct net_device *netdev, void *p) { struct usbnet *dev = netdev_priv(netdev); struct sockaddr *addr = p; if (!is_valid_ether_addr(addr->sa_data)) { netdev_err(netdev, "not setting invalid mac address %pM\n", addr->sa_data); return -EINVAL; } eth_hw_addr_set(netdev, addr->sa_data); sr_write_async(dev, SR_PAR, 6, netdev->dev_addr); return 0; } static const struct net_device_ops sr9700_netdev_ops = { .ndo_open = usbnet_open, .ndo_stop = usbnet_stop, .ndo_start_xmit = usbnet_start_xmit, .ndo_tx_timeout = usbnet_tx_timeout, .ndo_change_mtu = usbnet_change_mtu, .ndo_get_stats64 = dev_get_tstats64, .ndo_validate_addr = eth_validate_addr, .ndo_eth_ioctl = sr9700_ioctl, .ndo_set_rx_mode = sr9700_set_multicast, .ndo_set_mac_address = sr9700_set_mac_address, }; static int sr9700_bind(struct usbnet *dev, struct usb_interface *intf) { struct net_device *netdev; struct mii_if_info *mii; u8 addr[ETH_ALEN]; int ret; ret = usbnet_get_endpoints(dev, intf); if (ret) goto out; netdev = dev->net; netdev->netdev_ops = &sr9700_netdev_ops; netdev->ethtool_ops = &sr9700_ethtool_ops; netdev->hard_header_len += SR_TX_OVERHEAD; dev->hard_mtu = netdev->mtu + netdev->hard_header_len; /* bulkin buffer is preferably not less than 3K */ dev->rx_urb_size = 3072; mii = &dev->mii; mii->dev = netdev; mii->mdio_read = sr_mdio_read; mii->mdio_write = sr_mdio_write; mii->phy_id_mask = 0x1f; mii->reg_num_mask = 0x1f; sr_write_reg(dev, SR_NCR, NCR_RST); udelay(20); /* read MAC * After Chip Power on, the Chip will reload the MAC from * EEPROM automatically to PAR. In case there is no EEPROM externally, * a default MAC address is stored in PAR for making chip work properly. */ if (sr_read(dev, SR_PAR, ETH_ALEN, addr) < 0) { netdev_err(netdev, "Error reading MAC address\n"); ret = -ENODEV; goto out; } eth_hw_addr_set(netdev, addr); /* power up and reset phy */ sr_write_reg(dev, SR_PRR, PRR_PHY_RST); /* at least 10ms, here 20ms for safe */ msleep(20); sr_write_reg(dev, SR_PRR, 0); /* at least 1ms, here 2ms for reading right register */ udelay(2 * 1000); /* receive broadcast packets */ sr9700_set_multicast(netdev); sr_mdio_write(netdev, mii->phy_id, MII_BMCR, BMCR_RESET); sr_mdio_write(netdev, mii->phy_id, MII_ADVERTISE, ADVERTISE_ALL | ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP); mii_nway_restart(mii); out: return ret; } static int sr9700_rx_fixup(struct usbnet *dev, struct sk_buff *skb) { struct sk_buff *sr_skb; int len; /* skb content (packets) format : * p0 p1 p2 ...... pm * / \ * / \ * / \ * / \ * p0b0 p0b1 p0b2 p0b3 ...... p0b(n-4) p0b(n-3)...p0bn * * p0 : packet 0 * p0b0 : packet 0 byte 0 * * b0: rx status * b1: packet length (incl crc) low * b2: packet length (incl crc) high * b3..n-4: packet data * bn-3..bn: ethernet packet crc */ if (unlikely(skb->len < SR_RX_OVERHEAD)) { netdev_err(dev->net, "unexpected tiny rx frame\n"); return 0; } /* one skb may contains multiple packets */ while (skb->len > SR_RX_OVERHEAD) { if (skb->data[0] != 0x40) return 0; /* ignore the CRC length */ len = (skb->data[1] | (skb->data[2] << 8)) - 4; if (len > ETH_FRAME_LEN || len > skb->len || len < 0) return 0; /* the last packet of current skb */ if (skb->len == (len + SR_RX_OVERHEAD)) { skb_pull(skb, 3); skb->len = len; skb_set_tail_pointer(skb, len); return 2; } sr_skb = netdev_alloc_skb_ip_align(dev->net, len); if (!sr_skb) return 0; skb_put(sr_skb, len); memcpy(sr_skb->data, skb->data + 3, len); usbnet_skb_return(dev, sr_skb); skb_pull(skb, len + SR_RX_OVERHEAD); } return 0; } static struct sk_buff *sr9700_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags) { int len; /* SR9700 can only send out one ethernet packet at once. * * b0 b1 b2 b3 ...... b(n-4) b(n-3)...bn * * b0: rx status * b1: packet length (incl crc) low * b2: packet length (incl crc) high * b3..n-4: packet data * bn-3..bn: ethernet packet crc */ len = skb->len; if (skb_cow_head(skb, SR_TX_OVERHEAD)) { dev_kfree_skb_any(skb); return NULL; } __skb_push(skb, SR_TX_OVERHEAD); /* usbnet adds padding if length is a multiple of packet size * if so, adjust length value in header */ if ((skb->len % dev->maxpacket) == 0) len++; skb->data[0] = len; skb->data[1] = len >> 8; return skb; } static void sr9700_status(struct usbnet *dev, struct urb *urb) { int link; u8 *buf; /* format: b0: net status b1: tx status 1 b2: tx status 2 b3: rx status b4: rx overflow b5: rx count b6: tx count b7: gpr */ if (urb->actual_length < 8) return; buf = urb->transfer_buffer; link = !!(buf[0] & 0x40); if (netif_carrier_ok(dev->net) != link) { usbnet_link_change(dev, link, 1); netdev_dbg(dev->net, "Link Status is: %d\n", link); } } static int sr9700_link_reset(struct usbnet *dev) { struct ethtool_cmd ecmd; mii_check_media(&dev->mii, 1, 1); mii_ethtool_gset(&dev->mii, &ecmd); netdev_dbg(dev->net, "link_reset() speed: %d duplex: %d\n", ecmd.speed, ecmd.duplex); return 0; } static const struct driver_info sr9700_driver_info = { .description = "CoreChip SR9700 USB Ethernet", .flags = FLAG_ETHER, .bind = sr9700_bind, .rx_fixup = sr9700_rx_fixup, .tx_fixup = sr9700_tx_fixup, .status = sr9700_status, .link_reset = sr9700_link_reset, .reset = sr9700_link_reset, }; static const struct usb_device_id products[] = { { USB_DEVICE(0x0fe6, 0x9700), /* SR9700 device */ .driver_info = (unsigned long)&sr9700_driver_info, }, {}, /* END */ }; MODULE_DEVICE_TABLE(usb, products); static struct usb_driver sr9700_usb_driver = { .name = "sr9700", .id_table = products, .probe = usbnet_probe, .disconnect = usbnet_disconnect, .suspend = usbnet_suspend, .resume = usbnet_resume, .disable_hub_initiated_lpm = 1, }; module_usb_driver(sr9700_usb_driver); MODULE_AUTHOR("liujl <liujunliang_ljl@163.com>"); MODULE_DESCRIPTION("SR9700 one chip USB 1.1 USB to Ethernet device from http://www.corechip-sz.com/"); MODULE_LICENSE("GPL"); |
| 4224 4223 4220 3793 496 3581 2478 3690 2841 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | // SPDX-License-Identifier: GPL-2.0 /* * linux/lib/kasprintf.c * * Copyright (C) 1991, 1992 Linus Torvalds */ #include <linux/stdarg.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/string.h> /* Simplified asprintf. */ char *kvasprintf(gfp_t gfp, const char *fmt, va_list ap) { unsigned int first, second; char *p; va_list aq; va_copy(aq, ap); first = vsnprintf(NULL, 0, fmt, aq); va_end(aq); p = kmalloc_track_caller(first+1, gfp); if (!p) return NULL; second = vsnprintf(p, first+1, fmt, ap); WARN(first != second, "different return values (%u and %u) from vsnprintf(\"%s\", ...)", first, second, fmt); return p; } EXPORT_SYMBOL(kvasprintf); /* * If fmt contains no % (or is exactly %s), use kstrdup_const. If fmt * (or the sole vararg) points to rodata, we will then save a memory * allocation and string copy. In any case, the return value should be * freed using kfree_const(). */ const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list ap) { if (!strchr(fmt, '%')) return kstrdup_const(fmt, gfp); if (!strcmp(fmt, "%s")) return kstrdup_const(va_arg(ap, const char*), gfp); return kvasprintf(gfp, fmt, ap); } EXPORT_SYMBOL(kvasprintf_const); char *kasprintf(gfp_t gfp, const char *fmt, ...) { va_list ap; char *p; va_start(ap, fmt); p = kvasprintf(gfp, fmt, ap); va_end(ap); return p; } EXPORT_SYMBOL(kasprintf); |
| 175 232 231 229 225 228 232 4 4 4 4 4 232 232 231 227 4 4 3 3 232 3 3 3 3 3 3 2 2 2 2 3 1 3 154 107 107 243 49 49 47 45 49 8 8 8 5 8 1 1 1 1 153 153 153 152 23 8 153 153 153 46 38 46 153 42 42 41 42 42 42 42 41 5 1 1 5 5 5 5 27 26 26 14 6 20 55 4 4 2 2 2 2 2 2 2 2 2 2 2 2 2 35 34 34 25 34 35 24 24 24 24 325 7 319 13 76 52 76 69 66 76 153 153 135 130 132 1 244 238 243 231 231 231 232 228 196 195 153 86 22 22 98 133 133 100 167 171 17 17 17 17 3 17 17 17 29 29 30 29 17 17 17 17 17 12 4 11 11 11 1 11 8 8 9 9 1 13 12 13 10 9 9 8 2 1 4 3 4 1 1 1 1 1 1 1 4 3 4 1 1 8 7 8 3 8 3 8 2 8 8 6 6 8 8 8 11 10 2 2 1 10 1 9 11 2 1 10 10 9 1 10 10 6 10 10 7 6 6 7 7 8 7 8 7 7 8 8 8 7 8 7 8 6 7 8 8 8 8 8 6 6 7 7 6 7 6 7 6 7 8 8 10 12 11 13 45 45 14 14 46 46 3 44 14 14 38 38 60 59 56 51 50 43 43 40 7 6 6 6 53 7 7 6 7 3 2 2 2 2 2 2 4 4 4 2 1 1 1 1 1 1 1 8 5 3 8 2 2 2 2 2 2 2 2 2 1 1 1 1 2 4 3 4 1 5 4 5 2 1 1 1 1 1 15 14 14 13 13 12 9 8 10 2 1 2 1 2 2 1 1 3 2 1 1 1 3 13 15 80 80 80 80 80 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 | // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu> * Patrick Schaaf <bof@bof.de> * Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */ /* Kernel module for IP set management */ #include <linux/init.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/ip.h> #include <linux/skbuff.h> #include <linux/spinlock.h> #include <linux/rculist.h> #include <net/netlink.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <linux/netfilter.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/ipset/ip_set.h> static LIST_HEAD(ip_set_type_list); /* all registered set types */ static DEFINE_MUTEX(ip_set_type_mutex); /* protects ip_set_type_list */ static DEFINE_RWLOCK(ip_set_ref_lock); /* protects the set refs */ struct ip_set_net { struct ip_set * __rcu *ip_set_list; /* all individual sets */ ip_set_id_t ip_set_max; /* max number of sets */ bool is_deleted; /* deleted by ip_set_net_exit */ bool is_destroyed; /* all sets are destroyed */ }; static unsigned int ip_set_net_id __read_mostly; static struct ip_set_net *ip_set_pernet(struct net *net) { return net_generic(net, ip_set_net_id); } #define IP_SET_INC 64 #define STRNCMP(a, b) (strncmp(a, b, IPSET_MAXNAMELEN) == 0) static unsigned int max_sets; module_param(max_sets, int, 0600); MODULE_PARM_DESC(max_sets, "maximal number of sets"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); MODULE_DESCRIPTION("core IP set support"); MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET); /* When the nfnl mutex or ip_set_ref_lock is held: */ #define ip_set_dereference(inst) \ rcu_dereference_protected((inst)->ip_set_list, \ lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \ lockdep_is_held(&ip_set_ref_lock) || \ (inst)->is_deleted) #define ip_set(inst, id) \ ip_set_dereference(inst)[id] #define ip_set_ref_netlink(inst,id) \ rcu_dereference_raw((inst)->ip_set_list)[id] #define ip_set_dereference_nfnl(p) \ rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) /* The set types are implemented in modules and registered set types * can be found in ip_set_type_list. Adding/deleting types is * serialized by ip_set_type_mutex. */ static void ip_set_type_lock(void) { mutex_lock(&ip_set_type_mutex); } static void ip_set_type_unlock(void) { mutex_unlock(&ip_set_type_mutex); } /* Register and deregister settype */ static struct ip_set_type * find_set_type(const char *name, u8 family, u8 revision) { struct ip_set_type *type; list_for_each_entry_rcu(type, &ip_set_type_list, list, lockdep_is_held(&ip_set_type_mutex)) if (STRNCMP(type->name, name) && (type->family == family || type->family == NFPROTO_UNSPEC) && revision >= type->revision_min && revision <= type->revision_max) return type; return NULL; } /* Unlock, try to load a set type module and lock again */ static bool load_settype(const char *name) { if (!try_module_get(THIS_MODULE)) return false; nfnl_unlock(NFNL_SUBSYS_IPSET); pr_debug("try to load ip_set_%s\n", name); if (request_module("ip_set_%s", name) < 0) { pr_warn("Can't find ip_set type %s\n", name); nfnl_lock(NFNL_SUBSYS_IPSET); module_put(THIS_MODULE); return false; } nfnl_lock(NFNL_SUBSYS_IPSET); module_put(THIS_MODULE); return true; } /* Find a set type and reference it */ #define find_set_type_get(name, family, revision, found) \ __find_set_type_get(name, family, revision, found, false) static int __find_set_type_get(const char *name, u8 family, u8 revision, struct ip_set_type **found, bool retry) { struct ip_set_type *type; int err; if (retry && !load_settype(name)) return -IPSET_ERR_FIND_TYPE; rcu_read_lock(); *found = find_set_type(name, family, revision); if (*found) { err = !try_module_get((*found)->me) ? -EFAULT : 0; goto unlock; } /* Make sure the type is already loaded * but we don't support the revision */ list_for_each_entry_rcu(type, &ip_set_type_list, list) if (STRNCMP(type->name, name)) { err = -IPSET_ERR_FIND_TYPE; goto unlock; } rcu_read_unlock(); return retry ? -IPSET_ERR_FIND_TYPE : __find_set_type_get(name, family, revision, found, true); unlock: rcu_read_unlock(); return err; } /* Find a given set type by name and family. * If we succeeded, the supported minimal and maximum revisions are * filled out. */ #define find_set_type_minmax(name, family, min, max) \ __find_set_type_minmax(name, family, min, max, false) static int __find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max, bool retry) { struct ip_set_type *type; bool found = false; if (retry && !load_settype(name)) return -IPSET_ERR_FIND_TYPE; *min = 255; *max = 0; rcu_read_lock(); list_for_each_entry_rcu(type, &ip_set_type_list, list) if (STRNCMP(type->name, name) && (type->family == family || type->family == NFPROTO_UNSPEC)) { found = true; if (type->revision_min < *min) *min = type->revision_min; if (type->revision_max > *max) *max = type->revision_max; } rcu_read_unlock(); if (found) return 0; return retry ? -IPSET_ERR_FIND_TYPE : __find_set_type_minmax(name, family, min, max, true); } #define family_name(f) ((f) == NFPROTO_IPV4 ? "inet" : \ (f) == NFPROTO_IPV6 ? "inet6" : "any") /* Register a set type structure. The type is identified by * the unique triple of name, family and revision. */ int ip_set_type_register(struct ip_set_type *type) { int ret = 0; if (type->protocol != IPSET_PROTOCOL) { pr_warn("ip_set type %s, family %s, revision %u:%u uses wrong protocol version %u (want %u)\n", type->name, family_name(type->family), type->revision_min, type->revision_max, type->protocol, IPSET_PROTOCOL); return -EINVAL; } ip_set_type_lock(); if (find_set_type(type->name, type->family, type->revision_min)) { /* Duplicate! */ pr_warn("ip_set type %s, family %s with revision min %u already registered!\n", type->name, family_name(type->family), type->revision_min); ip_set_type_unlock(); return -EINVAL; } list_add_rcu(&type->list, &ip_set_type_list); pr_debug("type %s, family %s, revision %u:%u registered.\n", type->name, family_name(type->family), type->revision_min, type->revision_max); ip_set_type_unlock(); return ret; } EXPORT_SYMBOL_GPL(ip_set_type_register); /* Unregister a set type. There's a small race with ip_set_create */ void ip_set_type_unregister(struct ip_set_type *type) { ip_set_type_lock(); if (!find_set_type(type->name, type->family, type->revision_min)) { pr_warn("ip_set type %s, family %s with revision min %u not registered\n", type->name, family_name(type->family), type->revision_min); ip_set_type_unlock(); return; } list_del_rcu(&type->list); pr_debug("type %s, family %s with revision min %u unregistered.\n", type->name, family_name(type->family), type->revision_min); ip_set_type_unlock(); synchronize_rcu(); } EXPORT_SYMBOL_GPL(ip_set_type_unregister); /* Utility functions */ void * ip_set_alloc(size_t size) { return kvzalloc(size, GFP_KERNEL_ACCOUNT); } EXPORT_SYMBOL_GPL(ip_set_alloc); void ip_set_free(void *members) { pr_debug("%p: free with %s\n", members, is_vmalloc_addr(members) ? "vfree" : "kfree"); kvfree(members); } EXPORT_SYMBOL_GPL(ip_set_free); static bool flag_nested(const struct nlattr *nla) { return nla->nla_type & NLA_F_NESTED; } static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = { [IPSET_ATTR_IPADDR_IPV4] = { .type = NLA_U32 }, [IPSET_ATTR_IPADDR_IPV6] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), }; int ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr) { struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1]; if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy, NULL)) return -IPSET_ERR_PROTOCOL; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV4))) return -IPSET_ERR_PROTOCOL; *ipaddr = nla_get_be32(tb[IPSET_ATTR_IPADDR_IPV4]); return 0; } EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4); int ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr) { struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1]; if (unlikely(!flag_nested(nla))) return -IPSET_ERR_PROTOCOL; if (nla_parse_nested(tb, IPSET_ATTR_IPADDR_MAX, nla, ipaddr_policy, NULL)) return -IPSET_ERR_PROTOCOL; if (unlikely(!ip_set_attr_netorder(tb, IPSET_ATTR_IPADDR_IPV6))) return -IPSET_ERR_PROTOCOL; memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]), sizeof(struct in6_addr)); return 0; } EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6); static u32 ip_set_timeout_get(const unsigned long *timeout) { u32 t; if (*timeout == IPSET_ELEM_PERMANENT) return 0; t = jiffies_to_msecs(*timeout - jiffies) / MSEC_PER_SEC; /* Zero value in userspace means no timeout */ return t == 0 ? 1 : t; } static char * ip_set_comment_uget(struct nlattr *tb) { return nla_data(tb); } /* Called from uadd only, protected by the set spinlock. * The kadt functions don't use the comment extensions in any way. */ void ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment, const struct ip_set_ext *ext) { struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1); size_t len = ext->comment ? strlen(ext->comment) : 0; if (unlikely(c)) { set->ext_size -= sizeof(*c) + strlen(c->str) + 1; kfree_rcu(c, rcu); rcu_assign_pointer(comment->c, NULL); } if (!len) return; if (unlikely(len > IPSET_MAX_COMMENT_SIZE)) len = IPSET_MAX_COMMENT_SIZE; c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC); if (unlikely(!c)) return; strscpy(c->str, ext->comment, len + 1); set->ext_size += sizeof(*c) + strlen(c->str) + 1; rcu_assign_pointer(comment->c, c); } EXPORT_SYMBOL_GPL(ip_set_init_comment); /* Used only when dumping a set, protected by rcu_read_lock() */ static int ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment) { struct ip_set_comment_rcu *c = rcu_dereference(comment->c); if (!c) return 0; return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str); } /* Called from uadd/udel, flush or the garbage collectors protected * by the set spinlock. * Called when the set is destroyed and when there can't be any user * of the set data anymore. */ static void ip_set_comment_free(struct ip_set *set, void *ptr) { struct ip_set_comment *comment = ptr; struct ip_set_comment_rcu *c; c = rcu_dereference_protected(comment->c, 1); if (unlikely(!c)) return; set->ext_size -= sizeof(*c) + strlen(c->str) + 1; kfree_rcu(c, rcu); rcu_assign_pointer(comment->c, NULL); } typedef void (*destroyer)(struct ip_set *, void *); /* ipset data extension types, in size order */ const struct ip_set_ext_type ip_set_extensions[] = { [IPSET_EXT_ID_COUNTER] = { .type = IPSET_EXT_COUNTER, .flag = IPSET_FLAG_WITH_COUNTERS, .len = sizeof(struct ip_set_counter), .align = __alignof__(struct ip_set_counter), }, [IPSET_EXT_ID_TIMEOUT] = { .type = IPSET_EXT_TIMEOUT, .len = sizeof(unsigned long), .align = __alignof__(unsigned long), }, [IPSET_EXT_ID_SKBINFO] = { .type = IPSET_EXT_SKBINFO, .flag = IPSET_FLAG_WITH_SKBINFO, .len = sizeof(struct ip_set_skbinfo), .align = __alignof__(struct ip_set_skbinfo), }, [IPSET_EXT_ID_COMMENT] = { .type = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY, .flag = IPSET_FLAG_WITH_COMMENT, .len = sizeof(struct ip_set_comment), .align = __alignof__(struct ip_set_comment), .destroy = ip_set_comment_free, }, }; EXPORT_SYMBOL_GPL(ip_set_extensions); static bool add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[]) { return ip_set_extensions[id].flag ? (flags & ip_set_extensions[id].flag) : !!tb[IPSET_ATTR_TIMEOUT]; } size_t ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], size_t len, size_t align) { enum ip_set_ext_id id; u32 cadt_flags = 0; if (tb[IPSET_ATTR_CADT_FLAGS]) cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); if (cadt_flags & IPSET_FLAG_WITH_FORCEADD) set->flags |= IPSET_CREATE_FLAG_FORCEADD; if (!align) align = 1; for (id = 0; id < IPSET_EXT_ID_MAX; id++) { if (!add_extension(id, cadt_flags, tb)) continue; if (align < ip_set_extensions[id].align) align = ip_set_extensions[id].align; len = ALIGN(len, ip_set_extensions[id].align); set->offset[id] = len; set->extensions |= ip_set_extensions[id].type; len += ip_set_extensions[id].len; } return ALIGN(len, align); } EXPORT_SYMBOL_GPL(ip_set_elem_len); int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext *ext) { u64 fullmark; if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_PACKETS) || !ip_set_optattr_netorder(tb, IPSET_ATTR_BYTES) || !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBMARK) || !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBPRIO) || !ip_set_optattr_netorder(tb, IPSET_ATTR_SKBQUEUE))) return -IPSET_ERR_PROTOCOL; if (tb[IPSET_ATTR_TIMEOUT]) { if (!SET_WITH_TIMEOUT(set)) return -IPSET_ERR_TIMEOUT; ext->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); } if (tb[IPSET_ATTR_BYTES] || tb[IPSET_ATTR_PACKETS]) { if (!SET_WITH_COUNTER(set)) return -IPSET_ERR_COUNTER; if (tb[IPSET_ATTR_BYTES]) ext->bytes = be64_to_cpu(nla_get_be64( tb[IPSET_ATTR_BYTES])); if (tb[IPSET_ATTR_PACKETS]) ext->packets = be64_to_cpu(nla_get_be64( tb[IPSET_ATTR_PACKETS])); } if (tb[IPSET_ATTR_COMMENT]) { if (!SET_WITH_COMMENT(set)) return -IPSET_ERR_COMMENT; ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]); } if (tb[IPSET_ATTR_SKBMARK]) { if (!SET_WITH_SKBINFO(set)) return -IPSET_ERR_SKBINFO; fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK])); ext->skbinfo.skbmark = fullmark >> 32; ext->skbinfo.skbmarkmask = fullmark & 0xffffffff; } if (tb[IPSET_ATTR_SKBPRIO]) { if (!SET_WITH_SKBINFO(set)) return -IPSET_ERR_SKBINFO; ext->skbinfo.skbprio = be32_to_cpu(nla_get_be32(tb[IPSET_ATTR_SKBPRIO])); } if (tb[IPSET_ATTR_SKBQUEUE]) { if (!SET_WITH_SKBINFO(set)) return -IPSET_ERR_SKBINFO; ext->skbinfo.skbqueue = be16_to_cpu(nla_get_be16(tb[IPSET_ATTR_SKBQUEUE])); } return 0; } EXPORT_SYMBOL_GPL(ip_set_get_extensions); static u64 ip_set_get_bytes(const struct ip_set_counter *counter) { return (u64)atomic64_read(&(counter)->bytes); } static u64 ip_set_get_packets(const struct ip_set_counter *counter) { return (u64)atomic64_read(&(counter)->packets); } static bool ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter) { return nla_put_net64(skb, IPSET_ATTR_BYTES, cpu_to_be64(ip_set_get_bytes(counter)), IPSET_ATTR_PAD) || nla_put_net64(skb, IPSET_ATTR_PACKETS, cpu_to_be64(ip_set_get_packets(counter)), IPSET_ATTR_PAD); } static bool ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo) { /* Send nonzero parameters only */ return ((skbinfo->skbmark || skbinfo->skbmarkmask) && nla_put_net64(skb, IPSET_ATTR_SKBMARK, cpu_to_be64((u64)skbinfo->skbmark << 32 | skbinfo->skbmarkmask), IPSET_ATTR_PAD)) || (skbinfo->skbprio && nla_put_net32(skb, IPSET_ATTR_SKBPRIO, cpu_to_be32(skbinfo->skbprio))) || (skbinfo->skbqueue && nla_put_net16(skb, IPSET_ATTR_SKBQUEUE, cpu_to_be16(skbinfo->skbqueue))); } int ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set, const void *e, bool active) { if (SET_WITH_TIMEOUT(set)) { unsigned long *timeout = ext_timeout(e, set); if (nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(active ? ip_set_timeout_get(timeout) : *timeout))) return -EMSGSIZE; } if (SET_WITH_COUNTER(set) && ip_set_put_counter(skb, ext_counter(e, set))) return -EMSGSIZE; if (SET_WITH_COMMENT(set) && ip_set_put_comment(skb, ext_comment(e, set))) return -EMSGSIZE; if (SET_WITH_SKBINFO(set) && ip_set_put_skbinfo(skb, ext_skbinfo(e, set))) return -EMSGSIZE; return 0; } EXPORT_SYMBOL_GPL(ip_set_put_extensions); static bool ip_set_match_counter(u64 counter, u64 match, u8 op) { switch (op) { case IPSET_COUNTER_NONE: return true; case IPSET_COUNTER_EQ: return counter == match; case IPSET_COUNTER_NE: return counter != match; case IPSET_COUNTER_LT: return counter < match; case IPSET_COUNTER_GT: return counter > match; } return false; } static void ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter) { atomic64_add((long long)bytes, &(counter)->bytes); } static void ip_set_add_packets(u64 packets, struct ip_set_counter *counter) { atomic64_add((long long)packets, &(counter)->packets); } static void ip_set_update_counter(struct ip_set_counter *counter, const struct ip_set_ext *ext, u32 flags) { if (ext->packets != ULLONG_MAX && !(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) { ip_set_add_bytes(ext->bytes, counter); ip_set_add_packets(ext->packets, counter); } } static void ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags) { mext->skbinfo = *skbinfo; } bool ip_set_match_extensions(struct ip_set *set, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags, void *data) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(data, set))) return false; if (SET_WITH_COUNTER(set)) { struct ip_set_counter *counter = ext_counter(data, set); ip_set_update_counter(counter, ext, flags); if (flags & IPSET_FLAG_MATCH_COUNTERS && !(ip_set_match_counter(ip_set_get_packets(counter), mext->packets, mext->packets_op) && ip_set_match_counter(ip_set_get_bytes(counter), mext->bytes, mext->bytes_op))) return false; } if (SET_WITH_SKBINFO(set)) ip_set_get_skbinfo(ext_skbinfo(data, set), ext, mext, flags); return true; } EXPORT_SYMBOL_GPL(ip_set_match_extensions); /* Creating/destroying/renaming/swapping affect the existence and * the properties of a set. All of these can be executed from userspace * only and serialized by the nfnl mutex indirectly from nfnetlink. * * Sets are identified by their index in ip_set_list and the index * is used by the external references (set/SET netfilter modules). * * The set behind an index may change by swapping only, from userspace. */ static void __ip_set_get(struct ip_set *set) { write_lock_bh(&ip_set_ref_lock); set->ref++; write_unlock_bh(&ip_set_ref_lock); } static void __ip_set_put(struct ip_set *set) { write_lock_bh(&ip_set_ref_lock); BUG_ON(set->ref == 0); set->ref--; write_unlock_bh(&ip_set_ref_lock); } /* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need * a separate reference counter */ static void __ip_set_get_netlink(struct ip_set *set) { write_lock_bh(&ip_set_ref_lock); set->ref_netlink++; write_unlock_bh(&ip_set_ref_lock); } static void __ip_set_put_netlink(struct ip_set *set) { write_lock_bh(&ip_set_ref_lock); BUG_ON(set->ref_netlink == 0); set->ref_netlink--; write_unlock_bh(&ip_set_ref_lock); } /* Add, del and test set entries from kernel. * * The set behind the index must exist and must be referenced * so it can't be destroyed (or changed) under our foot. */ static struct ip_set * ip_set_rcu_get(struct net *net, ip_set_id_t index) { struct ip_set_net *inst = ip_set_pernet(net); /* ip_set_list and the set pointer need to be protected */ return ip_set_dereference_nfnl(inst->ip_set_list)[index]; } static inline void ip_set_lock(struct ip_set *set) { if (!set->variant->region_lock) spin_lock_bh(&set->lock); } static inline void ip_set_unlock(struct ip_set *set) { if (!set->variant->region_lock) spin_unlock_bh(&set->lock); } int ip_set_test(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { struct ip_set *set = ip_set_rcu_get(xt_net(par), index); int ret = 0; BUG_ON(!set); pr_debug("set %s, index %u\n", set->name, index); if (opt->dim < set->type->dimension || !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return 0; ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt); if (ret == -EAGAIN) { /* Type requests element to be completed */ pr_debug("element must be completed, ADD is triggered\n"); ip_set_lock(set); set->variant->kadt(set, skb, par, IPSET_ADD, opt); ip_set_unlock(set); ret = 1; } else { /* --return-nomatch: invert matched element */ if ((opt->cmdflags & IPSET_FLAG_RETURN_NOMATCH) && (set->type->features & IPSET_TYPE_NOMATCH) && (ret > 0 || ret == -ENOTEMPTY)) ret = -ret; } /* Convert error codes to nomatch */ return (ret < 0 ? 0 : ret); } EXPORT_SYMBOL_GPL(ip_set_test); int ip_set_add(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { struct ip_set *set = ip_set_rcu_get(xt_net(par), index); int ret; BUG_ON(!set); pr_debug("set %s, index %u\n", set->name, index); if (opt->dim < set->type->dimension || !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return -IPSET_ERR_TYPE_MISMATCH; ip_set_lock(set); ret = set->variant->kadt(set, skb, par, IPSET_ADD, opt); ip_set_unlock(set); return ret; } EXPORT_SYMBOL_GPL(ip_set_add); int ip_set_del(ip_set_id_t index, const struct sk_buff *skb, const struct xt_action_param *par, struct ip_set_adt_opt *opt) { struct ip_set *set = ip_set_rcu_get(xt_net(par), index); int ret = 0; BUG_ON(!set); pr_debug("set %s, index %u\n", set->name, index); if (opt->dim < set->type->dimension || !(opt->family == set->family || set->family == NFPROTO_UNSPEC)) return -IPSET_ERR_TYPE_MISMATCH; ip_set_lock(set); ret = set->variant->kadt(set, skb, par, IPSET_DEL, opt); ip_set_unlock(set); return ret; } EXPORT_SYMBOL_GPL(ip_set_del); /* Find set by name, reference it once. The reference makes sure the * thing pointed to, does not go away under our feet. * */ ip_set_id_t ip_set_get_byname(struct net *net, const char *name, struct ip_set **set) { ip_set_id_t i, index = IPSET_INVALID_ID; struct ip_set *s; struct ip_set_net *inst = ip_set_pernet(net); rcu_read_lock(); for (i = 0; i < inst->ip_set_max; i++) { s = rcu_dereference(inst->ip_set_list)[i]; if (s && STRNCMP(s->name, name)) { __ip_set_get(s); index = i; *set = s; break; } } rcu_read_unlock(); return index; } EXPORT_SYMBOL_GPL(ip_set_get_byname); /* If the given set pointer points to a valid set, decrement * reference count by 1. The caller shall not assume the index * to be valid, after calling this function. * */ static void __ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index) { struct ip_set *set; rcu_read_lock(); set = rcu_dereference(inst->ip_set_list)[index]; if (set) __ip_set_put(set); rcu_read_unlock(); } void ip_set_put_byindex(struct net *net, ip_set_id_t index) { struct ip_set_net *inst = ip_set_pernet(net); __ip_set_put_byindex(inst, index); } EXPORT_SYMBOL_GPL(ip_set_put_byindex); /* Get the name of a set behind a set index. * Set itself is protected by RCU, but its name isn't: to protect against * renaming, grab ip_set_ref_lock as reader (see ip_set_rename()) and copy the * name. */ void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name) { struct ip_set *set = ip_set_rcu_get(net, index); BUG_ON(!set); read_lock_bh(&ip_set_ref_lock); strscpy_pad(name, set->name, IPSET_MAXNAMELEN); read_unlock_bh(&ip_set_ref_lock); } EXPORT_SYMBOL_GPL(ip_set_name_byindex); /* Routines to call by external subsystems, which do not * call nfnl_lock for us. */ /* Find set by index, reference it once. The reference makes sure the * thing pointed to, does not go away under our feet. * * The nfnl mutex is used in the function. */ ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index) { struct ip_set *set; struct ip_set_net *inst = ip_set_pernet(net); if (index >= inst->ip_set_max) return IPSET_INVALID_ID; nfnl_lock(NFNL_SUBSYS_IPSET); set = ip_set(inst, index); if (set) __ip_set_get(set); else index = IPSET_INVALID_ID; nfnl_unlock(NFNL_SUBSYS_IPSET); return index; } EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex); /* If the given set pointer points to a valid set, decrement * reference count by 1. The caller shall not assume the index * to be valid, after calling this function. * * The nfnl mutex is used in the function. */ void ip_set_nfnl_put(struct net *net, ip_set_id_t index) { struct ip_set *set; struct ip_set_net *inst = ip_set_pernet(net); nfnl_lock(NFNL_SUBSYS_IPSET); if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */ set = ip_set(inst, index); if (set) __ip_set_put(set); } nfnl_unlock(NFNL_SUBSYS_IPSET); } EXPORT_SYMBOL_GPL(ip_set_nfnl_put); /* Communication protocol with userspace over netlink. * * The commands are serialized by the nfnl mutex. */ static inline u8 protocol(const struct nlattr * const tb[]) { return nla_get_u8(tb[IPSET_ATTR_PROTOCOL]); } static inline bool protocol_failed(const struct nlattr * const tb[]) { return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) != IPSET_PROTOCOL; } static inline bool protocol_min_failed(const struct nlattr * const tb[]) { return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) < IPSET_PROTOCOL_MIN; } static inline u32 flag_exist(const struct nlmsghdr *nlh) { return nlh->nlmsg_flags & NLM_F_EXCL ? 0 : IPSET_FLAG_EXIST; } static struct nlmsghdr * start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags, enum ipset_cmd cmd) { return nfnl_msg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd), flags, NFPROTO_IPV4, NFNETLINK_V0, 0); } /* Create a set */ static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = { [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, .len = IPSET_MAXNAMELEN - 1 }, [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING, .len = IPSET_MAXNAMELEN - 1}, [IPSET_ATTR_REVISION] = { .type = NLA_U8 }, [IPSET_ATTR_FAMILY] = { .type = NLA_U8 }, [IPSET_ATTR_DATA] = { .type = NLA_NESTED }, }; static struct ip_set * find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id) { struct ip_set *set = NULL; ip_set_id_t i; *id = IPSET_INVALID_ID; for (i = 0; i < inst->ip_set_max; i++) { set = ip_set(inst, i); if (set && STRNCMP(set->name, name)) { *id = i; break; } } return (*id == IPSET_INVALID_ID ? NULL : set); } static inline struct ip_set * find_set(struct ip_set_net *inst, const char *name) { ip_set_id_t id; return find_set_and_id(inst, name, &id); } static int find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index, struct ip_set **set) { struct ip_set *s; ip_set_id_t i; *index = IPSET_INVALID_ID; for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); if (!s) { if (*index == IPSET_INVALID_ID) *index = i; } else if (STRNCMP(name, s->name)) { /* Name clash */ *set = s; return -EEXIST; } } if (*index == IPSET_INVALID_ID) /* No free slot remained */ return -IPSET_ERR_MAX_SETS; return 0; } static int ip_set_none(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const attr[]) { return -EOPNOTSUPP; } static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const attr[]) { struct ip_set_net *inst = ip_set_pernet(info->net); struct ip_set *set, *clash = NULL; ip_set_id_t index = IPSET_INVALID_ID; struct nlattr *tb[IPSET_ATTR_CREATE_MAX + 1] = {}; const char *name, *typename; u8 family, revision; u32 flags = flag_exist(info->nlh); int ret = 0; if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || !attr[IPSET_ATTR_TYPENAME] || !attr[IPSET_ATTR_REVISION] || !attr[IPSET_ATTR_FAMILY] || (attr[IPSET_ATTR_DATA] && !flag_nested(attr[IPSET_ATTR_DATA])))) return -IPSET_ERR_PROTOCOL; name = nla_data(attr[IPSET_ATTR_SETNAME]); typename = nla_data(attr[IPSET_ATTR_TYPENAME]); family = nla_get_u8(attr[IPSET_ATTR_FAMILY]); revision = nla_get_u8(attr[IPSET_ATTR_REVISION]); pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n", name, typename, family_name(family), revision); /* First, and without any locks, allocate and initialize * a normal base set structure. */ set = kzalloc(sizeof(*set), GFP_KERNEL); if (!set) return -ENOMEM; spin_lock_init(&set->lock); strscpy(set->name, name, IPSET_MAXNAMELEN); set->family = family; set->revision = revision; /* Next, check that we know the type, and take * a reference on the type, to make sure it stays available * while constructing our new set. * * After referencing the type, we try to create the type * specific part of the set without holding any locks. */ ret = find_set_type_get(typename, family, revision, &set->type); if (ret) goto out; /* Without holding any locks, create private part. */ if (attr[IPSET_ATTR_DATA] && nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA], set->type->create_policy, NULL)) { ret = -IPSET_ERR_PROTOCOL; goto put_out; } /* Set create flags depending on the type revision */ set->flags |= set->type->create_flags[revision]; ret = set->type->create(info->net, set, tb, flags); if (ret != 0) goto put_out; /* BTW, ret==0 here. */ /* Here, we have a valid, constructed set and we are protected * by the nfnl mutex. Find the first free index in ip_set_list * and check clashing. */ ret = find_free_id(inst, set->name, &index, &clash); if (ret == -EEXIST) { /* If this is the same set and requested, ignore error */ if ((flags & IPSET_FLAG_EXIST) && STRNCMP(set->type->name, clash->type->name) && set->type->family == clash->type->family && set->type->revision_min == clash->type->revision_min && set->type->revision_max == clash->type->revision_max && set->variant->same_set(set, clash)) ret = 0; goto cleanup; } else if (ret == -IPSET_ERR_MAX_SETS) { struct ip_set **list, **tmp; ip_set_id_t i = inst->ip_set_max + IP_SET_INC; if (i < inst->ip_set_max || i == IPSET_INVALID_ID) /* Wraparound */ goto cleanup; list = kvcalloc(i, sizeof(struct ip_set *), GFP_KERNEL); if (!list) goto cleanup; /* nfnl mutex is held, both lists are valid */ tmp = ip_set_dereference(inst); memcpy(list, tmp, sizeof(struct ip_set *) * inst->ip_set_max); rcu_assign_pointer(inst->ip_set_list, list); /* Make sure all current packets have passed through */ synchronize_net(); /* Use new list */ index = inst->ip_set_max; inst->ip_set_max = i; kvfree(tmp); ret = 0; } else if (ret) { goto cleanup; } /* Finally! Add our shiny new set to the list, and be done. */ pr_debug("create: '%s' created with index %u!\n", set->name, index); ip_set(inst, index) = set; return ret; cleanup: set->variant->cancel_gc(set); set->variant->destroy(set); put_out: module_put(set->type->me); out: kfree(set); return ret; } /* Destroy sets */ static const struct nla_policy ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = { [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, .len = IPSET_MAXNAMELEN - 1 }, }; /* In order to return quickly when destroying a single set, it is split * into two stages: * - Cancel garbage collector * - Destroy the set itself via call_rcu() */ static void ip_set_destroy_set_rcu(struct rcu_head *head) { struct ip_set *set = container_of(head, struct ip_set, rcu); set->variant->destroy(set); module_put(set->type->me); kfree(set); } static void _destroy_all_sets(struct ip_set_net *inst) { struct ip_set *set; ip_set_id_t i; bool need_wait = false; /* First cancel gc's: set:list sets are flushed as well */ for (i = 0; i < inst->ip_set_max; i++) { set = ip_set(inst, i); if (set) { set->variant->cancel_gc(set); if (set->type->features & IPSET_TYPE_NAME) need_wait = true; } } /* Must wait for flush to be really finished */ if (need_wait) rcu_barrier(); for (i = 0; i < inst->ip_set_max; i++) { set = ip_set(inst, i); if (set) { ip_set(inst, i) = NULL; set->variant->destroy(set); module_put(set->type->me); kfree(set); } } } static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const attr[]) { struct ip_set_net *inst = ip_set_pernet(info->net); struct ip_set *s; ip_set_id_t i; int ret = 0; if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; /* Commands are serialized and references are * protected by the ip_set_ref_lock. * External systems (i.e. xt_set) must call * ip_set_nfnl_get_* functions, that way we * can safely check references here. * * list:set timer can only decrement the reference * counter, so if it's already zero, we can proceed * without holding the lock. */ if (!attr[IPSET_ATTR_SETNAME]) { read_lock_bh(&ip_set_ref_lock); for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); if (s && (s->ref || s->ref_netlink)) { ret = -IPSET_ERR_BUSY; goto out; } } inst->is_destroyed = true; read_unlock_bh(&ip_set_ref_lock); _destroy_all_sets(inst); /* Modified by ip_set_destroy() only, which is serialized */ inst->is_destroyed = false; } else { u32 flags = flag_exist(info->nlh); u16 features = 0; read_lock_bh(&ip_set_ref_lock); s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &i); if (!s) { if (!(flags & IPSET_FLAG_EXIST)) ret = -ENOENT; goto out; } else if (s->ref || s->ref_netlink) { ret = -IPSET_ERR_BUSY; goto out; } features = s->type->features; ip_set(inst, i) = NULL; read_unlock_bh(&ip_set_ref_lock); /* Must cancel garbage collectors */ s->variant->cancel_gc(s); if (features & IPSET_TYPE_NAME) { /* Must wait for flush to be really finished */ rcu_barrier(); } call_rcu(&s->rcu, ip_set_destroy_set_rcu); } return 0; out: read_unlock_bh(&ip_set_ref_lock); return ret; } /* Flush sets */ static void ip_set_flush_set(struct ip_set *set) { pr_debug("set: %s\n", set->name); ip_set_lock(set); set->variant->flush(set); ip_set_unlock(set); } static int ip_set_flush(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const attr[]) { struct ip_set_net *inst = ip_set_pernet(info->net); struct ip_set *s; ip_set_id_t i; if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; if (!attr[IPSET_ATTR_SETNAME]) { for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); if (s) ip_set_flush_set(s); } } else { s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (!s) return -ENOENT; ip_set_flush_set(s); } return 0; } /* Rename a set */ static const struct nla_policy ip_set_setname2_policy[IPSET_ATTR_CMD_MAX + 1] = { [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, .len = IPSET_MAXNAMELEN - 1 }, [IPSET_ATTR_SETNAME2] = { .type = NLA_NUL_STRING, .len = IPSET_MAXNAMELEN - 1 }, }; static int ip_set_rename(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const attr[]) { struct ip_set_net *inst = ip_set_pernet(info->net); struct ip_set *set, *s; const char *name2; ip_set_id_t i; int ret = 0; if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || !attr[IPSET_ATTR_SETNAME2])) return -IPSET_ERR_PROTOCOL; set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (!set) return -ENOENT; write_lock_bh(&ip_set_ref_lock); if (set->ref != 0 || set->ref_netlink != 0) { ret = -IPSET_ERR_REFERENCED; goto out; } name2 = nla_data(attr[IPSET_ATTR_SETNAME2]); for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); if (s && STRNCMP(s->name, name2)) { ret = -IPSET_ERR_EXIST_SETNAME2; goto out; } } strscpy_pad(set->name, name2, IPSET_MAXNAMELEN); out: write_unlock_bh(&ip_set_ref_lock); return ret; } /* Swap two sets so that name/index points to the other. * References and set names are also swapped. * * The commands are serialized by the nfnl mutex and references are * protected by the ip_set_ref_lock. The kernel interfaces * do not hold the mutex but the pointer settings are atomic * so the ip_set_list always contains valid pointers to the sets. */ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const attr[]) { struct ip_set_net *inst = ip_set_pernet(info->net); struct ip_set *from, *to; ip_set_id_t from_id, to_id; char from_name[IPSET_MAXNAMELEN]; if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || !attr[IPSET_ATTR_SETNAME2])) return -IPSET_ERR_PROTOCOL; from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &from_id); if (!from) return -ENOENT; to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]), &to_id); if (!to) return -IPSET_ERR_EXIST_SETNAME2; /* Features must not change. * Not an artifical restriction anymore, as we must prevent * possible loops created by swapping in setlist type of sets. */ if (!(from->type->features == to->type->features && from->family == to->family)) return -IPSET_ERR_TYPE_MISMATCH; write_lock_bh(&ip_set_ref_lock); if (from->ref_netlink || to->ref_netlink) { write_unlock_bh(&ip_set_ref_lock); return -EBUSY; } strscpy_pad(from_name, from->name, IPSET_MAXNAMELEN); strscpy_pad(from->name, to->name, IPSET_MAXNAMELEN); strscpy_pad(to->name, from_name, IPSET_MAXNAMELEN); swap(from->ref, to->ref); ip_set(inst, from_id) = to; ip_set(inst, to_id) = from; write_unlock_bh(&ip_set_ref_lock); return 0; } /* List/save set data */ #define DUMP_INIT 0 #define DUMP_ALL 1 #define DUMP_ONE 2 #define DUMP_LAST 3 #define DUMP_TYPE(arg) (((u32)(arg)) & 0x0000FFFF) #define DUMP_FLAGS(arg) (((u32)(arg)) >> 16) int ip_set_put_flags(struct sk_buff *skb, struct ip_set *set) { u32 cadt_flags = 0; if (SET_WITH_TIMEOUT(set)) if (unlikely(nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(set->timeout)))) return -EMSGSIZE; if (SET_WITH_COUNTER(set)) cadt_flags |= IPSET_FLAG_WITH_COUNTERS; if (SET_WITH_COMMENT(set)) cadt_flags |= IPSET_FLAG_WITH_COMMENT; if (SET_WITH_SKBINFO(set)) cadt_flags |= IPSET_FLAG_WITH_SKBINFO; if (SET_WITH_FORCEADD(set)) cadt_flags |= IPSET_FLAG_WITH_FORCEADD; if (!cadt_flags) return 0; return nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(cadt_flags)); } EXPORT_SYMBOL_GPL(ip_set_put_flags); static int ip_set_dump_done(struct netlink_callback *cb) { if (cb->args[IPSET_CB_ARG0]) { struct ip_set_net *inst = (struct ip_set_net *)cb->args[IPSET_CB_NET]; ip_set_id_t index = (ip_set_id_t)cb->args[IPSET_CB_INDEX]; struct ip_set *set = ip_set_ref_netlink(inst, index); if (set->variant->uref) set->variant->uref(set, cb, false); pr_debug("release set %s\n", set->name); __ip_set_put_netlink(set); } return 0; } static inline void dump_attrs(struct nlmsghdr *nlh) { const struct nlattr *attr; int rem; pr_debug("dump nlmsg\n"); nlmsg_for_each_attr(attr, nlh, sizeof(struct nfgenmsg), rem) { pr_debug("type: %u, len %u\n", nla_type(attr), attr->nla_len); } } static const struct nla_policy ip_set_dump_policy[IPSET_ATTR_CMD_MAX + 1] = { [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, .len = IPSET_MAXNAMELEN - 1 }, [IPSET_ATTR_FLAGS] = { .type = NLA_U32 }, }; static int ip_set_dump_start(struct netlink_callback *cb) { struct nlmsghdr *nlh = nlmsg_hdr(cb->skb); int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1]; struct nlattr *attr = (void *)nlh + min_len; struct sk_buff *skb = cb->skb; struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk)); u32 dump_type; int ret; ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len, ip_set_dump_policy, NULL); if (ret) goto error; cb->args[IPSET_CB_PROTO] = nla_get_u8(cda[IPSET_ATTR_PROTOCOL]); if (cda[IPSET_ATTR_SETNAME]) { ip_set_id_t index; struct ip_set *set; set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]), &index); if (!set) { ret = -ENOENT; goto error; } dump_type = DUMP_ONE; cb->args[IPSET_CB_INDEX] = index; } else { dump_type = DUMP_ALL; } if (cda[IPSET_ATTR_FLAGS]) { u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]); dump_type |= (f << 16); } cb->args[IPSET_CB_NET] = (unsigned long)inst; cb->args[IPSET_CB_DUMP] = dump_type; return 0; error: /* We have to create and send the error message manually :-( */ if (nlh->nlmsg_flags & NLM_F_ACK) { netlink_ack(cb->skb, nlh, ret, NULL); } return ret; } static int ip_set_dump_do(struct sk_buff *skb, struct netlink_callback *cb) { ip_set_id_t index = IPSET_INVALID_ID, max; struct ip_set *set = NULL; struct nlmsghdr *nlh = NULL; unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0; struct ip_set_net *inst = ip_set_pernet(sock_net(skb->sk)); u32 dump_type, dump_flags; bool is_destroyed; int ret = 0; if (!cb->args[IPSET_CB_DUMP]) return -EINVAL; if (cb->args[IPSET_CB_INDEX] >= inst->ip_set_max) goto out; dump_type = DUMP_TYPE(cb->args[IPSET_CB_DUMP]); dump_flags = DUMP_FLAGS(cb->args[IPSET_CB_DUMP]); max = dump_type == DUMP_ONE ? cb->args[IPSET_CB_INDEX] + 1 : inst->ip_set_max; dump_last: pr_debug("dump type, flag: %u %u index: %ld\n", dump_type, dump_flags, cb->args[IPSET_CB_INDEX]); for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) { index = (ip_set_id_t)cb->args[IPSET_CB_INDEX]; write_lock_bh(&ip_set_ref_lock); set = ip_set(inst, index); is_destroyed = inst->is_destroyed; if (!set || is_destroyed) { write_unlock_bh(&ip_set_ref_lock); if (dump_type == DUMP_ONE) { ret = -ENOENT; goto out; } if (is_destroyed) { /* All sets are just being destroyed */ ret = 0; goto out; } continue; } /* When dumping all sets, we must dump "sorted" * so that lists (unions of sets) are dumped last. */ if (dump_type != DUMP_ONE && ((dump_type == DUMP_ALL) == !!(set->type->features & IPSET_DUMP_LAST))) { write_unlock_bh(&ip_set_ref_lock); continue; } pr_debug("List set: %s\n", set->name); if (!cb->args[IPSET_CB_ARG0]) { /* Start listing: make sure set won't be destroyed */ pr_debug("reference set\n"); set->ref_netlink++; } write_unlock_bh(&ip_set_ref_lock); nlh = start_msg(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags, IPSET_CMD_LIST); if (!nlh) { ret = -EMSGSIZE; goto release_refcount; } if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL, cb->args[IPSET_CB_PROTO]) || nla_put_string(skb, IPSET_ATTR_SETNAME, set->name)) goto nla_put_failure; if (dump_flags & IPSET_FLAG_LIST_SETNAME) goto next_set; switch (cb->args[IPSET_CB_ARG0]) { case 0: /* Core header data */ if (nla_put_string(skb, IPSET_ATTR_TYPENAME, set->type->name) || nla_put_u8(skb, IPSET_ATTR_FAMILY, set->family) || nla_put_u8(skb, IPSET_ATTR_REVISION, set->revision)) goto nla_put_failure; if (cb->args[IPSET_CB_PROTO] > IPSET_PROTOCOL_MIN && nla_put_net16(skb, IPSET_ATTR_INDEX, htons(index))) goto nla_put_failure; ret = set->variant->head(set, skb); if (ret < 0) goto release_refcount; if (dump_flags & IPSET_FLAG_LIST_HEADER) goto next_set; if (set->variant->uref) set->variant->uref(set, cb, true); fallthrough; default: ret = set->variant->list(set, skb, cb); if (!cb->args[IPSET_CB_ARG0]) /* Set is done, proceed with next one */ goto next_set; goto release_refcount; } } /* If we dump all sets, continue with dumping last ones */ if (dump_type == DUMP_ALL) { dump_type = DUMP_LAST; cb->args[IPSET_CB_DUMP] = dump_type | (dump_flags << 16); cb->args[IPSET_CB_INDEX] = 0; if (set && set->variant->uref) set->variant->uref(set, cb, false); goto dump_last; } goto out; nla_put_failure: ret = -EFAULT; next_set: if (dump_type == DUMP_ONE) cb->args[IPSET_CB_INDEX] = IPSET_INVALID_ID; else cb->args[IPSET_CB_INDEX]++; release_refcount: /* If there was an error or set is done, release set */ if (ret || !cb->args[IPSET_CB_ARG0]) { set = ip_set_ref_netlink(inst, index); if (set->variant->uref) set->variant->uref(set, cb, false); pr_debug("release set %s\n", set->name); __ip_set_put_netlink(set); cb->args[IPSET_CB_ARG0] = 0; } out: if (nlh) { nlmsg_end(skb, nlh); pr_debug("nlmsg_len: %u\n", nlh->nlmsg_len); dump_attrs(nlh); } return ret < 0 ? ret : skb->len; } static int ip_set_dump(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const attr[]) { if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; { struct netlink_dump_control c = { .start = ip_set_dump_start, .dump = ip_set_dump_do, .done = ip_set_dump_done, }; return netlink_dump_start(info->sk, skb, info->nlh, &c); } } /* Add, del and test */ static const struct nla_policy ip_set_adt_policy[IPSET_ATTR_CMD_MAX + 1] = { [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, [IPSET_ATTR_SETNAME] = { .type = NLA_NUL_STRING, .len = IPSET_MAXNAMELEN - 1 }, [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_DATA] = { .type = NLA_NESTED }, [IPSET_ATTR_ADT] = { .type = NLA_NESTED }, }; static int call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 flags, bool use_lineno) { int ret; u32 lineno = 0; bool eexist = flags & IPSET_FLAG_EXIST, retried = false; do { if (retried) { __ip_set_get_netlink(set); nfnl_unlock(NFNL_SUBSYS_IPSET); cond_resched(); nfnl_lock(NFNL_SUBSYS_IPSET); __ip_set_put_netlink(set); } ip_set_lock(set); ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); ip_set_unlock(set); retried = true; } while (ret == -ERANGE || (ret == -EAGAIN && set->variant->resize && (ret = set->variant->resize(set, retried)) == 0)); if (!ret || (ret == -IPSET_ERR_EXIST && eexist)) return 0; if (lineno && use_lineno) { /* Error in restore/batch mode: send back lineno */ struct nlmsghdr *rep, *nlh = nlmsg_hdr(skb); struct sk_buff *skb2; struct nlmsgerr *errmsg; size_t payload = min(SIZE_MAX, sizeof(*errmsg) + nlmsg_len(nlh)); int min_len = nlmsg_total_size(sizeof(struct nfgenmsg)); struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1]; struct nlattr *cmdattr; u32 *errline; skb2 = nlmsg_new(payload, GFP_KERNEL); if (!skb2) return -ENOMEM; rep = nlmsg_put(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NLMSG_ERROR, payload, 0); errmsg = nlmsg_data(rep); errmsg->error = ret; unsafe_memcpy(&errmsg->msg, nlh, nlh->nlmsg_len, /* Bounds checked by the skb layer. */); cmdattr = (void *)&errmsg->msg + min_len; ret = nla_parse(cda, IPSET_ATTR_CMD_MAX, cmdattr, nlh->nlmsg_len - min_len, ip_set_adt_policy, NULL); if (ret) { nlmsg_free(skb2); return ret; } errline = nla_data(cda[IPSET_ATTR_LINENO]); *errline = lineno; nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); /* Signal netlink not to send its ACK/errmsg. */ return -EINTR; } return ret; } static int ip_set_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb, enum ipset_adt adt, const struct nlmsghdr *nlh, const struct nlattr * const attr[], struct netlink_ext_ack *extack) { struct ip_set_net *inst = ip_set_pernet(net); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; const struct nlattr *nla; u32 flags = flag_exist(nlh); bool use_lineno; int ret = 0; if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || !((attr[IPSET_ATTR_DATA] != NULL) ^ (attr[IPSET_ATTR_ADT] != NULL)) || (attr[IPSET_ATTR_DATA] && !flag_nested(attr[IPSET_ATTR_DATA])) || (attr[IPSET_ATTR_ADT] && (!flag_nested(attr[IPSET_ATTR_ADT]) || !attr[IPSET_ATTR_LINENO])))) return -IPSET_ERR_PROTOCOL; set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (!set) return -ENOENT; use_lineno = !!attr[IPSET_ATTR_LINENO]; if (attr[IPSET_ATTR_DATA]) { if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; ret = call_ad(net, ctnl, skb, set, tb, adt, flags, use_lineno); } else { int nla_rem; nla_for_each_nested(nla, attr[IPSET_ATTR_ADT], nla_rem) { if (nla_type(nla) != IPSET_ATTR_DATA || !flag_nested(nla) || nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, nla, set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; ret = call_ad(net, ctnl, skb, set, tb, adt, flags, use_lineno); if (ret < 0) return ret; } } return ret; } static int ip_set_uadd(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const attr[]) { return ip_set_ad(info->net, info->sk, skb, IPSET_ADD, info->nlh, attr, info->extack); } static int ip_set_udel(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const attr[]) { return ip_set_ad(info->net, info->sk, skb, IPSET_DEL, info->nlh, attr, info->extack); } static int ip_set_utest(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const attr[]) { struct ip_set_net *inst = ip_set_pernet(info->net); struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; int ret = 0; u32 lineno; if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME] || !attr[IPSET_ATTR_DATA] || !flag_nested(attr[IPSET_ATTR_DATA]))) return -IPSET_ERR_PROTOCOL; set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (!set) return -ENOENT; if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA], set->type->adt_policy, NULL)) return -IPSET_ERR_PROTOCOL; rcu_read_lock_bh(); ret = set->variant->uadt(set, tb, IPSET_TEST, &lineno, 0, 0); rcu_read_unlock_bh(); /* Userspace can't trigger element to be re-added */ if (ret == -EAGAIN) ret = 1; return ret > 0 ? 0 : -IPSET_ERR_EXIST; } /* Get headed data of a set */ static int ip_set_header(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const attr[]) { struct ip_set_net *inst = ip_set_pernet(info->net); const struct ip_set *set; struct sk_buff *skb2; struct nlmsghdr *nlh2; if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_SETNAME])) return -IPSET_ERR_PROTOCOL; set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME])); if (!set) return -ENOENT; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb2) return -ENOMEM; nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, 0, IPSET_CMD_HEADER); if (!nlh2) goto nlmsg_failure; if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) || nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name) || nla_put_string(skb2, IPSET_ATTR_TYPENAME, set->type->name) || nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) || nla_put_u8(skb2, IPSET_ATTR_REVISION, set->revision)) goto nla_put_failure; nlmsg_end(skb2, nlh2); return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid); nla_put_failure: nlmsg_cancel(skb2, nlh2); nlmsg_failure: kfree_skb(skb2); return -EMSGSIZE; } /* Get type data */ static const struct nla_policy ip_set_type_policy[IPSET_ATTR_CMD_MAX + 1] = { [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING, .len = IPSET_MAXNAMELEN - 1 }, [IPSET_ATTR_FAMILY] = { .type = NLA_U8 }, }; static int ip_set_type(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const attr[]) { struct sk_buff *skb2; struct nlmsghdr *nlh2; u8 family, min, max; const char *typename; int ret = 0; if (unlikely(protocol_min_failed(attr) || !attr[IPSET_ATTR_TYPENAME] || !attr[IPSET_ATTR_FAMILY])) return -IPSET_ERR_PROTOCOL; family = nla_get_u8(attr[IPSET_ATTR_FAMILY]); typename = nla_data(attr[IPSET_ATTR_TYPENAME]); ret = find_set_type_minmax(typename, family, &min, &max); if (ret) return ret; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb2) return -ENOMEM; nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, 0, IPSET_CMD_TYPE); if (!nlh2) goto nlmsg_failure; if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) || nla_put_string(skb2, IPSET_ATTR_TYPENAME, typename) || nla_put_u8(skb2, IPSET_ATTR_FAMILY, family) || nla_put_u8(skb2, IPSET_ATTR_REVISION, max) || nla_put_u8(skb2, IPSET_ATTR_REVISION_MIN, min)) goto nla_put_failure; nlmsg_end(skb2, nlh2); pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len); return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid); nla_put_failure: nlmsg_cancel(skb2, nlh2); nlmsg_failure: kfree_skb(skb2); return -EMSGSIZE; } /* Get protocol version */ static const struct nla_policy ip_set_protocol_policy[IPSET_ATTR_CMD_MAX + 1] = { [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, }; static int ip_set_protocol(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const attr[]) { struct sk_buff *skb2; struct nlmsghdr *nlh2; if (unlikely(!attr[IPSET_ATTR_PROTOCOL])) return -IPSET_ERR_PROTOCOL; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb2) return -ENOMEM; nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, 0, IPSET_CMD_PROTOCOL); if (!nlh2) goto nlmsg_failure; if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL)) goto nla_put_failure; if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL_MIN, IPSET_PROTOCOL_MIN)) goto nla_put_failure; nlmsg_end(skb2, nlh2); return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid); nla_put_failure: nlmsg_cancel(skb2, nlh2); nlmsg_failure: kfree_skb(skb2); return -EMSGSIZE; } /* Get set by name or index, from userspace */ static int ip_set_byname(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const attr[]) { struct ip_set_net *inst = ip_set_pernet(info->net); struct sk_buff *skb2; struct nlmsghdr *nlh2; ip_set_id_t id = IPSET_INVALID_ID; const struct ip_set *set; if (unlikely(protocol_failed(attr) || !attr[IPSET_ATTR_SETNAME])) return -IPSET_ERR_PROTOCOL; set = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &id); if (id == IPSET_INVALID_ID) return -ENOENT; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb2) return -ENOMEM; nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, 0, IPSET_CMD_GET_BYNAME); if (!nlh2) goto nlmsg_failure; if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) || nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) || nla_put_net16(skb2, IPSET_ATTR_INDEX, htons(id))) goto nla_put_failure; nlmsg_end(skb2, nlh2); return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid); nla_put_failure: nlmsg_cancel(skb2, nlh2); nlmsg_failure: kfree_skb(skb2); return -EMSGSIZE; } static const struct nla_policy ip_set_index_policy[IPSET_ATTR_CMD_MAX + 1] = { [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 }, [IPSET_ATTR_INDEX] = { .type = NLA_U16 }, }; static int ip_set_byindex(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const attr[]) { struct ip_set_net *inst = ip_set_pernet(info->net); struct sk_buff *skb2; struct nlmsghdr *nlh2; ip_set_id_t id = IPSET_INVALID_ID; const struct ip_set *set; if (unlikely(protocol_failed(attr) || !attr[IPSET_ATTR_INDEX])) return -IPSET_ERR_PROTOCOL; id = ip_set_get_h16(attr[IPSET_ATTR_INDEX]); if (id >= inst->ip_set_max) return -ENOENT; set = ip_set(inst, id); if (set == NULL) return -ENOENT; skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb2) return -ENOMEM; nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, info->nlh->nlmsg_seq, 0, IPSET_CMD_GET_BYINDEX); if (!nlh2) goto nlmsg_failure; if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) || nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name)) goto nla_put_failure; nlmsg_end(skb2, nlh2); return nfnetlink_unicast(skb2, info->net, NETLINK_CB(skb).portid); nla_put_failure: nlmsg_cancel(skb2, nlh2); nlmsg_failure: kfree_skb(skb2); return -EMSGSIZE; } static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = { [IPSET_CMD_NONE] = { .call = ip_set_none, .type = NFNL_CB_MUTEX, .attr_count = IPSET_ATTR_CMD_MAX, }, [IPSET_CMD_CREATE] = { .call = ip_set_create, .type = NFNL_CB_MUTEX, .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_create_policy, }, [IPSET_CMD_DESTROY] = { .call = ip_set_destroy, .type = NFNL_CB_MUTEX, .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_setname_policy, }, [IPSET_CMD_FLUSH] = { .call = ip_set_flush, .type = NFNL_CB_MUTEX, .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_setname_policy, }, [IPSET_CMD_RENAME] = { .call = ip_set_rename, .type = NFNL_CB_MUTEX, .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_setname2_policy, }, [IPSET_CMD_SWAP] = { .call = ip_set_swap, .type = NFNL_CB_MUTEX, .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_setname2_policy, }, [IPSET_CMD_LIST] = { .call = ip_set_dump, .type = NFNL_CB_MUTEX, .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_dump_policy, }, [IPSET_CMD_SAVE] = { .call = ip_set_dump, .type = NFNL_CB_MUTEX, .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_setname_policy, }, [IPSET_CMD_ADD] = { .call = ip_set_uadd, .type = NFNL_CB_MUTEX, .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_adt_policy, }, [IPSET_CMD_DEL] = { .call = ip_set_udel, .type = NFNL_CB_MUTEX, .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_adt_policy, }, [IPSET_CMD_TEST] = { .call = ip_set_utest, .type = NFNL_CB_MUTEX, .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_adt_policy, }, [IPSET_CMD_HEADER] = { .call = ip_set_header, .type = NFNL_CB_MUTEX, .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_setname_policy, }, [IPSET_CMD_TYPE] = { .call = ip_set_type, .type = NFNL_CB_MUTEX, .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_type_policy, }, [IPSET_CMD_PROTOCOL] = { .call = ip_set_protocol, .type = NFNL_CB_MUTEX, .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_protocol_policy, }, [IPSET_CMD_GET_BYNAME] = { .call = ip_set_byname, .type = NFNL_CB_MUTEX, .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_setname_policy, }, [IPSET_CMD_GET_BYINDEX] = { .call = ip_set_byindex, .type = NFNL_CB_MUTEX, .attr_count = IPSET_ATTR_CMD_MAX, .policy = ip_set_index_policy, }, }; static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = { .name = "ip_set", .subsys_id = NFNL_SUBSYS_IPSET, .cb_count = IPSET_MSG_MAX, .cb = ip_set_netlink_subsys_cb, }; /* Interface to iptables/ip6tables */ static int ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) { unsigned int *op; void *data; int copylen = *len, ret = 0; struct net *net = sock_net(sk); struct ip_set_net *inst = ip_set_pernet(net); if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (optval != SO_IP_SET) return -EBADF; if (*len < sizeof(unsigned int)) return -EINVAL; data = vmalloc(*len); if (!data) return -ENOMEM; if (copy_from_user(data, user, *len) != 0) { ret = -EFAULT; goto done; } op = data; if (*op < IP_SET_OP_VERSION) { /* Check the version at the beginning of operations */ struct ip_set_req_version *req_version = data; if (*len < sizeof(struct ip_set_req_version)) { ret = -EINVAL; goto done; } if (req_version->version < IPSET_PROTOCOL_MIN) { ret = -EPROTO; goto done; } } switch (*op) { case IP_SET_OP_VERSION: { struct ip_set_req_version *req_version = data; if (*len != sizeof(struct ip_set_req_version)) { ret = -EINVAL; goto done; } req_version->version = IPSET_PROTOCOL; if (copy_to_user(user, req_version, sizeof(struct ip_set_req_version))) ret = -EFAULT; goto done; } case IP_SET_OP_GET_BYNAME: { struct ip_set_req_get_set *req_get = data; ip_set_id_t id; if (*len != sizeof(struct ip_set_req_get_set)) { ret = -EINVAL; goto done; } req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; nfnl_lock(NFNL_SUBSYS_IPSET); find_set_and_id(inst, req_get->set.name, &id); req_get->set.index = id; nfnl_unlock(NFNL_SUBSYS_IPSET); goto copy; } case IP_SET_OP_GET_FNAME: { struct ip_set_req_get_set_family *req_get = data; ip_set_id_t id; if (*len != sizeof(struct ip_set_req_get_set_family)) { ret = -EINVAL; goto done; } req_get->set.name[IPSET_MAXNAMELEN - 1] = '\0'; nfnl_lock(NFNL_SUBSYS_IPSET); find_set_and_id(inst, req_get->set.name, &id); req_get->set.index = id; if (id != IPSET_INVALID_ID) req_get->family = ip_set(inst, id)->family; nfnl_unlock(NFNL_SUBSYS_IPSET); goto copy; } case IP_SET_OP_GET_BYINDEX: { struct ip_set_req_get_set *req_get = data; struct ip_set *set; if (*len != sizeof(struct ip_set_req_get_set) || req_get->set.index >= inst->ip_set_max) { ret = -EINVAL; goto done; } nfnl_lock(NFNL_SUBSYS_IPSET); set = ip_set(inst, req_get->set.index); ret = strscpy(req_get->set.name, set ? set->name : "", IPSET_MAXNAMELEN); nfnl_unlock(NFNL_SUBSYS_IPSET); if (ret < 0) goto done; goto copy; } default: ret = -EBADMSG; goto done; } /* end of switch(op) */ copy: if (copy_to_user(user, data, copylen)) ret = -EFAULT; done: vfree(data); if (ret > 0) ret = 0; return ret; } static struct nf_sockopt_ops so_set __read_mostly = { .pf = PF_INET, .get_optmin = SO_IP_SET, .get_optmax = SO_IP_SET + 1, .get = ip_set_sockfn_get, .owner = THIS_MODULE, }; static int __net_init ip_set_net_init(struct net *net) { struct ip_set_net *inst = ip_set_pernet(net); struct ip_set **list; inst->ip_set_max = max_sets ? max_sets : CONFIG_IP_SET_MAX; if (inst->ip_set_max >= IPSET_INVALID_ID) inst->ip_set_max = IPSET_INVALID_ID - 1; list = kvcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL); if (!list) return -ENOMEM; inst->is_deleted = false; inst->is_destroyed = false; rcu_assign_pointer(inst->ip_set_list, list); return 0; } static void __net_exit ip_set_net_pre_exit(struct net *net) { struct ip_set_net *inst = ip_set_pernet(net); inst->is_deleted = true; /* flag for ip_set_nfnl_put */ } static void __net_exit ip_set_net_exit(struct net *net) { struct ip_set_net *inst = ip_set_pernet(net); _destroy_all_sets(inst); kvfree(rcu_dereference_protected(inst->ip_set_list, 1)); } static struct pernet_operations ip_set_net_ops = { .init = ip_set_net_init, .pre_exit = ip_set_net_pre_exit, .exit = ip_set_net_exit, .id = &ip_set_net_id, .size = sizeof(struct ip_set_net), }; static int __init ip_set_init(void) { int ret = register_pernet_subsys(&ip_set_net_ops); if (ret) { pr_err("ip_set: cannot register pernet_subsys.\n"); return ret; } ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); if (ret != 0) { pr_err("ip_set: cannot register with nfnetlink.\n"); unregister_pernet_subsys(&ip_set_net_ops); return ret; } ret = nf_register_sockopt(&so_set); if (ret != 0) { pr_err("SO_SET registry failed: %d\n", ret); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); unregister_pernet_subsys(&ip_set_net_ops); return ret; } return 0; } static void __exit ip_set_fini(void) { nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); unregister_pernet_subsys(&ip_set_net_ops); /* Wait for call_rcu() in destroy */ rcu_barrier(); pr_debug("these are the famous last words\n"); } module_init(ip_set_init); module_exit(ip_set_fini); MODULE_DESCRIPTION("ip_set: protocol " __stringify(IPSET_PROTOCOL)); |
| 326 354 1079 492 2 43 43 43 2 41 43 21 43 1017 496 164 317 318 320 286 267 252 315 90 149 398 395 393 18 630 636 636 635 635 773 757 255 767 330 537 539 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the IP router. * * Version: @(#)route.h 1.0.4 05/27/93 * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Fixes: * Alan Cox : Reformatted. Added ip_rt_local() * Alan Cox : Support for TCP parameters. * Alexey Kuznetsov: Major changes for new routing code. * Mike McLagan : Routing by source * Robert Olsson : Added rt_cache statistics */ #ifndef _ROUTE_H #define _ROUTE_H #include <net/dst.h> #include <net/inetpeer.h> #include <net/flow.h> #include <net/inet_sock.h> #include <net/ip_fib.h> #include <net/arp.h> #include <net/ndisc.h> #include <net/inet_dscp.h> #include <net/sock.h> #include <linux/in_route.h> #include <linux/rtnetlink.h> #include <linux/rcupdate.h> #include <linux/route.h> #include <linux/ip.h> #include <linux/cache.h> #include <linux/security.h> static inline __u8 ip_sock_rt_scope(const struct sock *sk) { if (sock_flag(sk, SOCK_LOCALROUTE)) return RT_SCOPE_LINK; return RT_SCOPE_UNIVERSE; } static inline __u8 ip_sock_rt_tos(const struct sock *sk) { return READ_ONCE(inet_sk(sk)->tos) & INET_DSCP_MASK; } struct ip_tunnel_info; struct fib_nh; struct fib_info; struct uncached_list; struct rtable { struct dst_entry dst; int rt_genid; unsigned int rt_flags; __u16 rt_type; __u8 rt_is_input; __u8 rt_uses_gateway; int rt_iif; u8 rt_gw_family; /* Info on neighbour */ union { __be32 rt_gw4; struct in6_addr rt_gw6; }; /* Miscellaneous cached information */ u32 rt_mtu_locked:1, rt_pmtu:31; }; #define dst_rtable(_ptr) container_of_const(_ptr, struct rtable, dst) /** * skb_rtable - Returns the skb &rtable * @skb: buffer */ static inline struct rtable *skb_rtable(const struct sk_buff *skb) { return dst_rtable(skb_dst(skb)); } static inline bool rt_is_input_route(const struct rtable *rt) { return rt->rt_is_input != 0; } static inline bool rt_is_output_route(const struct rtable *rt) { return rt->rt_is_input == 0; } static inline __be32 rt_nexthop(const struct rtable *rt, __be32 daddr) { if (rt->rt_gw_family == AF_INET) return rt->rt_gw4; return daddr; } struct ip_rt_acct { __u32 o_bytes; __u32 o_packets; __u32 i_bytes; __u32 i_packets; }; struct rt_cache_stat { unsigned int in_slow_tot; unsigned int in_slow_mc; unsigned int in_no_route; unsigned int in_brd; unsigned int in_martian_dst; unsigned int in_martian_src; unsigned int out_slow_tot; unsigned int out_slow_mc; }; extern struct ip_rt_acct __percpu *ip_rt_acct; struct in_device; int ip_rt_init(void); void rt_cache_flush(struct net *net); void rt_flush_dev(struct net_device *dev); static inline void inet_sk_init_flowi4(const struct inet_sock *inet, struct flowi4 *fl4) { const struct ip_options_rcu *ip4_opt; const struct sock *sk; __be32 daddr; rcu_read_lock(); ip4_opt = rcu_dereference(inet->inet_opt); /* Source routing option overrides the socket destination address */ if (ip4_opt && ip4_opt->opt.srr) daddr = ip4_opt->opt.faddr; else daddr = inet->inet_daddr; rcu_read_unlock(); sk = &inet->sk; flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), sk->sk_protocol, inet_sk_flowi_flags(sk), daddr, inet->inet_saddr, inet->inet_dport, inet->inet_sport, sk->sk_uid); security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); } struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *flp, const struct sk_buff *skb); struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *flp, struct fib_result *res, const struct sk_buff *skb); static inline struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *flp) { return ip_route_output_key_hash(net, flp, NULL); } struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, const struct sock *sk); struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig); static inline struct rtable *ip_route_output_key(struct net *net, struct flowi4 *flp) { return ip_route_output_flow(net, flp, NULL); } /* Simplistic IPv4 route lookup function. * This is only suitable for some particular use cases: since the flowi4 * structure is only partially set, it may bypass some fib-rules. */ static inline struct rtable *ip_route_output(struct net *net, __be32 daddr, __be32 saddr, dscp_t dscp, int oif, __u8 scope) { struct flowi4 fl4 = { .flowi4_oif = oif, .flowi4_tos = inet_dscp_to_dsfield(dscp), .flowi4_scope = scope, .daddr = daddr, .saddr = saddr, }; return ip_route_output_key(net, &fl4); } static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi4 *fl4, const struct sock *sk, __be32 daddr, __be32 saddr, __be16 dport, __be16 sport, __u8 proto, __u8 tos, int oif) { flowi4_init_output(fl4, oif, sk ? READ_ONCE(sk->sk_mark) : 0, tos, sk ? ip_sock_rt_scope(sk) : RT_SCOPE_UNIVERSE, proto, sk ? inet_sk_flowi_flags(sk) : 0, daddr, saddr, dport, sport, sock_net_uid(net, sk)); if (sk) security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(net, fl4, sk); } enum skb_drop_reason ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev, struct in_device *in_dev, u32 *itag); enum skb_drop_reason ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev); enum skb_drop_reason ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev, const struct sk_buff *hint); static inline enum skb_drop_reason ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, dscp_t dscp, struct net_device *devin) { enum skb_drop_reason reason; rcu_read_lock(); reason = ip_route_input_noref(skb, dst, src, dscp, devin); if (!reason) { skb_dst_force(skb); if (!skb_dst(skb)) reason = SKB_DROP_REASON_NOT_SPECIFIED; } rcu_read_unlock(); return reason; } void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif, u8 protocol); void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu); void ipv4_redirect(struct sk_buff *skb, struct net *net, int oif, u8 protocol); void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk); void ip_rt_send_redirect(struct sk_buff *skb); unsigned int inet_addr_type(struct net *net, __be32 addr); unsigned int inet_addr_type_table(struct net *net, __be32 addr, u32 tb_id); unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, __be32 addr); unsigned int inet_addr_type_dev_table(struct net *net, const struct net_device *dev, __be32 addr); void ip_rt_multicast_event(struct in_device *); int ip_rt_ioctl(struct net *, unsigned int cmd, struct rtentry *rt); void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt); struct rtable *rt_dst_alloc(struct net_device *dev, unsigned int flags, u16 type, bool noxfrm); struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt); struct in_ifaddr; void fib_add_ifaddr(struct in_ifaddr *); void fib_del_ifaddr(struct in_ifaddr *, struct in_ifaddr *); void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric); void rt_add_uncached_list(struct rtable *rt); void rt_del_uncached_list(struct rtable *rt); int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb, u32 table_id, struct fib_info *fi, int *fa_index, int fa_start, unsigned int flags); static inline void ip_rt_put(struct rtable *rt) { /* dst_release() accepts a NULL parameter. * We rely on dst being first structure in struct rtable */ BUILD_BUG_ON(offsetof(struct rtable, dst) != 0); dst_release(&rt->dst); } extern const __u8 ip_tos2prio[16]; static inline char rt_tos2priority(u8 tos) { return ip_tos2prio[IPTOS_TOS(tos)>>1]; } /* ip_route_connect() and ip_route_newports() work in tandem whilst * binding a socket for a new outgoing connection. * * In order to use IPSEC properly, we must, in the end, have a * route that was looked up using all available keys including source * and destination ports. * * However, if a source port needs to be allocated (the user specified * a wildcard source port) we need to obtain addressing information * in order to perform that allocation. * * So ip_route_connect() looks up a route using wildcarded source and * destination ports in the key, simply so that we can get a pair of * addresses to use for port allocation. * * Later, once the ports are allocated, ip_route_newports() will make * another route lookup if needed to make sure we catch any IPSEC * rules keyed on the port information. * * The callers allocate the flow key on their stack, and must pass in * the same flowi4 object to both the ip_route_connect() and the * ip_route_newports() calls. */ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32 src, int oif, u8 protocol, __be16 sport, __be16 dport, const struct sock *sk) { __u8 flow_flags = 0; if (inet_test_bit(TRANSPARENT, sk)) flow_flags |= FLOWI_FLAG_ANYSRC; if (IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) && !sport) flow_flags |= FLOWI_FLAG_ANY_SPORT; flowi4_init_output(fl4, oif, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), protocol, flow_flags, dst, src, dport, sport, sk->sk_uid); } static inline struct rtable *ip_route_connect(struct flowi4 *fl4, __be32 dst, __be32 src, int oif, u8 protocol, __be16 sport, __be16 dport, const struct sock *sk) { struct net *net = sock_net(sk); struct rtable *rt; ip_route_connect_init(fl4, dst, src, oif, protocol, sport, dport, sk); if (!dst || !src) { rt = __ip_route_output_key(net, fl4); if (IS_ERR(rt)) return rt; ip_rt_put(rt); flowi4_update_output(fl4, oif, fl4->daddr, fl4->saddr); } security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(net, fl4, sk); } static inline struct rtable *ip_route_newports(struct flowi4 *fl4, struct rtable *rt, __be16 orig_sport, __be16 orig_dport, __be16 sport, __be16 dport, const struct sock *sk) { if (sport != orig_sport || dport != orig_dport) { fl4->fl4_dport = dport; fl4->fl4_sport = sport; ip_rt_put(rt); flowi4_update_output(fl4, sk->sk_bound_dev_if, fl4->daddr, fl4->saddr); security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); return ip_route_output_flow(sock_net(sk), fl4, sk); } return rt; } static inline int inet_iif(const struct sk_buff *skb) { struct rtable *rt = skb_rtable(skb); if (rt && rt->rt_iif) return rt->rt_iif; return skb->skb_iif; } static inline int ip4_dst_hoplimit(const struct dst_entry *dst) { int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); if (hoplimit == 0) { const struct net *net; rcu_read_lock(); net = dev_net_rcu(dst->dev); hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); rcu_read_unlock(); } return hoplimit; } static inline struct neighbour *ip_neigh_gw4(struct net_device *dev, __be32 daddr) { struct neighbour *neigh; neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)daddr); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &daddr, dev, false); return neigh; } static inline struct neighbour *ip_neigh_for_gw(struct rtable *rt, struct sk_buff *skb, bool *is_v6gw) { struct net_device *dev = rt->dst.dev; struct neighbour *neigh; if (likely(rt->rt_gw_family == AF_INET)) { neigh = ip_neigh_gw4(dev, rt->rt_gw4); } else if (rt->rt_gw_family == AF_INET6) { neigh = ip_neigh_gw6(dev, &rt->rt_gw6); *is_v6gw = true; } else { neigh = ip_neigh_gw4(dev, ip_hdr(skb)->daddr); } return neigh; } #endif /* _ROUTE_H */ |
| 58 10 55 3 52 53 80 80 80 79 79 41 41 41 41 40 40 41 41 41 42 13 13 13 42 41 41 41 41 58 59 58 59 71 71 71 71 59 29 59 55 21 12 10 3 10 10 7 4 2 6 5 4 58 59 27 17 17 27 8 8 8 8 8 8 1 8 27 27 8 8 1 27 13 13 1 13 17 27 17 17 26 25 25 25 1 7 7 7 7 3 5 7 7 1 25 17 25 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 | // SPDX-License-Identifier: GPL-2.0-or-later /* * inet fragments management * * Authors: Pavel Emelyanov <xemul@openvz.org> * Started as consolidation of ipv4/ip_fragment.c, * ipv6/reassembly. and ipv6 nf conntrack reassembly */ #include <linux/list.h> #include <linux/spinlock.h> #include <linux/module.h> #include <linux/timer.h> #include <linux/mm.h> #include <linux/random.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/slab.h> #include <linux/rhashtable.h> #include <net/sock.h> #include <net/inet_frag.h> #include <net/inet_ecn.h> #include <net/ip.h> #include <net/ipv6.h> #include "../core/sock_destructor.h" /* Use skb->cb to track consecutive/adjacent fragments coming at * the end of the queue. Nodes in the rb-tree queue will * contain "runs" of one or more adjacent fragments. * * Invariants: * - next_frag is NULL at the tail of a "run"; * - the head of a "run" has the sum of all fragment lengths in frag_run_len. */ struct ipfrag_skb_cb { union { struct inet_skb_parm h4; struct inet6_skb_parm h6; }; struct sk_buff *next_frag; int frag_run_len; int ip_defrag_offset; }; #define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) static void fragcb_clear(struct sk_buff *skb) { RB_CLEAR_NODE(&skb->rbnode); FRAG_CB(skb)->next_frag = NULL; FRAG_CB(skb)->frag_run_len = skb->len; } /* Append skb to the last "run". */ static void fragrun_append_to_last(struct inet_frag_queue *q, struct sk_buff *skb) { fragcb_clear(skb); FRAG_CB(q->last_run_head)->frag_run_len += skb->len; FRAG_CB(q->fragments_tail)->next_frag = skb; q->fragments_tail = skb; } /* Create a new "run" with the skb. */ static void fragrun_create(struct inet_frag_queue *q, struct sk_buff *skb) { BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb)); fragcb_clear(skb); if (q->last_run_head) rb_link_node(&skb->rbnode, &q->last_run_head->rbnode, &q->last_run_head->rbnode.rb_right); else rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node); rb_insert_color(&skb->rbnode, &q->rb_fragments); q->fragments_tail = skb; q->last_run_head = skb; } /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements * Value : 0xff if frame should be dropped. * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field */ const u8 ip_frag_ecn_table[16] = { /* at least one fragment had CE, and others ECT_0 or ECT_1 */ [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE, [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE, [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE, /* invalid combinations : drop frame */ [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, }; EXPORT_SYMBOL(ip_frag_ecn_table); int inet_frags_init(struct inet_frags *f) { f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0, NULL); if (!f->frags_cachep) return -ENOMEM; refcount_set(&f->refcnt, 1); init_completion(&f->completion); return 0; } EXPORT_SYMBOL(inet_frags_init); void inet_frags_fini(struct inet_frags *f) { if (refcount_dec_and_test(&f->refcnt)) complete(&f->completion); wait_for_completion(&f->completion); kmem_cache_destroy(f->frags_cachep); f->frags_cachep = NULL; } EXPORT_SYMBOL(inet_frags_fini); /* called from rhashtable_free_and_destroy() at netns_frags dismantle */ static void inet_frags_free_cb(void *ptr, void *arg) { struct inet_frag_queue *fq = ptr; int count; count = timer_delete_sync(&fq->timer) ? 1 : 0; spin_lock_bh(&fq->lock); fq->flags |= INET_FRAG_DROP; if (!(fq->flags & INET_FRAG_COMPLETE)) { fq->flags |= INET_FRAG_COMPLETE; count++; } else if (fq->flags & INET_FRAG_HASH_DEAD) { count++; } spin_unlock_bh(&fq->lock); inet_frag_putn(fq, count); } static LLIST_HEAD(fqdir_free_list); static void fqdir_free_fn(struct work_struct *work) { struct llist_node *kill_list; struct fqdir *fqdir, *tmp; struct inet_frags *f; /* Atomically snapshot the list of fqdirs to free */ kill_list = llist_del_all(&fqdir_free_list); /* We need to make sure all ongoing call_rcu(..., inet_frag_destroy_rcu) * have completed, since they need to dereference fqdir. * Would it not be nice to have kfree_rcu_barrier() ? :) */ rcu_barrier(); llist_for_each_entry_safe(fqdir, tmp, kill_list, free_list) { f = fqdir->f; if (refcount_dec_and_test(&f->refcnt)) complete(&f->completion); kfree(fqdir); } } static DECLARE_DELAYED_WORK(fqdir_free_work, fqdir_free_fn); static void fqdir_work_fn(struct work_struct *work) { struct fqdir *fqdir = container_of(work, struct fqdir, destroy_work); rhashtable_free_and_destroy(&fqdir->rhashtable, inet_frags_free_cb, NULL); if (llist_add(&fqdir->free_list, &fqdir_free_list)) queue_delayed_work(system_wq, &fqdir_free_work, HZ); } int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net) { struct fqdir *fqdir = kzalloc(sizeof(*fqdir), GFP_KERNEL); int res; if (!fqdir) return -ENOMEM; fqdir->f = f; fqdir->net = net; res = rhashtable_init(&fqdir->rhashtable, &fqdir->f->rhash_params); if (res < 0) { kfree(fqdir); return res; } refcount_inc(&f->refcnt); *fqdirp = fqdir; return 0; } EXPORT_SYMBOL(fqdir_init); static struct workqueue_struct *inet_frag_wq; static int __init inet_frag_wq_init(void) { inet_frag_wq = create_workqueue("inet_frag_wq"); if (!inet_frag_wq) panic("Could not create inet frag workq"); return 0; } pure_initcall(inet_frag_wq_init); void fqdir_exit(struct fqdir *fqdir) { INIT_WORK(&fqdir->destroy_work, fqdir_work_fn); queue_work(inet_frag_wq, &fqdir->destroy_work); } EXPORT_SYMBOL(fqdir_exit); void inet_frag_kill(struct inet_frag_queue *fq, int *refs) { if (timer_delete(&fq->timer)) (*refs)++; if (!(fq->flags & INET_FRAG_COMPLETE)) { struct fqdir *fqdir = fq->fqdir; fq->flags |= INET_FRAG_COMPLETE; rcu_read_lock(); /* The RCU read lock provides a memory barrier * guaranteeing that if fqdir->dead is false then * the hash table destruction will not start until * after we unlock. Paired with fqdir_pre_exit(). */ if (!READ_ONCE(fqdir->dead)) { rhashtable_remove_fast(&fqdir->rhashtable, &fq->node, fqdir->f->rhash_params); (*refs)++; } else { fq->flags |= INET_FRAG_HASH_DEAD; } rcu_read_unlock(); } } EXPORT_SYMBOL(inet_frag_kill); static void inet_frag_destroy_rcu(struct rcu_head *head) { struct inet_frag_queue *q = container_of(head, struct inet_frag_queue, rcu); struct inet_frags *f = q->fqdir->f; if (f->destructor) f->destructor(q); kmem_cache_free(f->frags_cachep, q); } unsigned int inet_frag_rbtree_purge(struct rb_root *root, enum skb_drop_reason reason) { struct rb_node *p = rb_first(root); unsigned int sum = 0; while (p) { struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); p = rb_next(p); rb_erase(&skb->rbnode, root); while (skb) { struct sk_buff *next = FRAG_CB(skb)->next_frag; sum += skb->truesize; kfree_skb_reason(skb, reason); skb = next; } } return sum; } EXPORT_SYMBOL(inet_frag_rbtree_purge); void inet_frag_destroy(struct inet_frag_queue *q) { unsigned int sum, sum_truesize = 0; enum skb_drop_reason reason; struct inet_frags *f; struct fqdir *fqdir; WARN_ON(!(q->flags & INET_FRAG_COMPLETE)); reason = (q->flags & INET_FRAG_DROP) ? SKB_DROP_REASON_FRAG_REASM_TIMEOUT : SKB_CONSUMED; WARN_ON(timer_delete(&q->timer) != 0); /* Release all fragment data. */ fqdir = q->fqdir; f = fqdir->f; sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments, reason); sum = sum_truesize + f->qsize; call_rcu(&q->rcu, inet_frag_destroy_rcu); sub_frag_mem_limit(fqdir, sum); } EXPORT_SYMBOL(inet_frag_destroy); static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir, struct inet_frags *f, void *arg) { struct inet_frag_queue *q; q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC); if (!q) return NULL; q->fqdir = fqdir; f->constructor(q, arg); add_frag_mem_limit(fqdir, f->qsize); timer_setup(&q->timer, f->frag_expire, 0); spin_lock_init(&q->lock); /* One reference for the timer, one for the hash table. */ refcount_set(&q->refcnt, 2); return q; } static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir, void *arg, struct inet_frag_queue **prev) { struct inet_frags *f = fqdir->f; struct inet_frag_queue *q; q = inet_frag_alloc(fqdir, f, arg); if (!q) { *prev = ERR_PTR(-ENOMEM); return NULL; } mod_timer(&q->timer, jiffies + fqdir->timeout); *prev = rhashtable_lookup_get_insert_key(&fqdir->rhashtable, &q->key, &q->node, f->rhash_params); if (*prev) { /* We could not insert in the hash table, * we need to cancel what inet_frag_alloc() * anticipated. */ int refs = 1; q->flags |= INET_FRAG_COMPLETE; inet_frag_kill(q, &refs); inet_frag_putn(q, refs); return NULL; } return q; } struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key) { /* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */ long high_thresh = READ_ONCE(fqdir->high_thresh); struct inet_frag_queue *fq = NULL, *prev; if (!high_thresh || frag_mem_limit(fqdir) > high_thresh) return NULL; prev = rhashtable_lookup(&fqdir->rhashtable, key, fqdir->f->rhash_params); if (!prev) fq = inet_frag_create(fqdir, key, &prev); if (!IS_ERR_OR_NULL(prev)) fq = prev; return fq; } EXPORT_SYMBOL(inet_frag_find); int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, int offset, int end) { struct sk_buff *last = q->fragments_tail; /* RFC5722, Section 4, amended by Errata ID : 3089 * When reassembling an IPv6 datagram, if * one or more its constituent fragments is determined to be an * overlapping fragment, the entire datagram (and any constituent * fragments) MUST be silently discarded. * * Duplicates, however, should be ignored (i.e. skb dropped, but the * queue/fragments kept for later reassembly). */ if (!last) fragrun_create(q, skb); /* First fragment. */ else if (FRAG_CB(last)->ip_defrag_offset + last->len < end) { /* This is the common case: skb goes to the end. */ /* Detect and discard overlaps. */ if (offset < FRAG_CB(last)->ip_defrag_offset + last->len) return IPFRAG_OVERLAP; if (offset == FRAG_CB(last)->ip_defrag_offset + last->len) fragrun_append_to_last(q, skb); else fragrun_create(q, skb); } else { /* Binary search. Note that skb can become the first fragment, * but not the last (covered above). */ struct rb_node **rbn, *parent; rbn = &q->rb_fragments.rb_node; do { struct sk_buff *curr; int curr_run_end; parent = *rbn; curr = rb_to_skb(parent); curr_run_end = FRAG_CB(curr)->ip_defrag_offset + FRAG_CB(curr)->frag_run_len; if (end <= FRAG_CB(curr)->ip_defrag_offset) rbn = &parent->rb_left; else if (offset >= curr_run_end) rbn = &parent->rb_right; else if (offset >= FRAG_CB(curr)->ip_defrag_offset && end <= curr_run_end) return IPFRAG_DUP; else return IPFRAG_OVERLAP; } while (*rbn); /* Here we have parent properly set, and rbn pointing to * one of its NULL left/right children. Insert skb. */ fragcb_clear(skb); rb_link_node(&skb->rbnode, parent, rbn); rb_insert_color(&skb->rbnode, &q->rb_fragments); } FRAG_CB(skb)->ip_defrag_offset = offset; return IPFRAG_OK; } EXPORT_SYMBOL(inet_frag_queue_insert); void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, struct sk_buff *parent) { struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments); void (*destructor)(struct sk_buff *); unsigned int orig_truesize = 0; struct sk_buff **nextp = NULL; struct sock *sk = skb->sk; int delta; if (sk && is_skb_wmem(skb)) { /* TX: skb->sk might have been passed as argument to * dst->output and must remain valid until tx completes. * * Move sk to reassembled skb and fix up wmem accounting. */ orig_truesize = skb->truesize; destructor = skb->destructor; } if (head != skb) { fp = skb_clone(skb, GFP_ATOMIC); if (!fp) { head = skb; goto out_restore_sk; } FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; if (RB_EMPTY_NODE(&skb->rbnode)) FRAG_CB(parent)->next_frag = fp; else rb_replace_node(&skb->rbnode, &fp->rbnode, &q->rb_fragments); if (q->fragments_tail == skb) q->fragments_tail = fp; if (orig_truesize) { /* prevent skb_morph from releasing sk */ skb->sk = NULL; skb->destructor = NULL; } skb_morph(skb, head); FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; rb_replace_node(&head->rbnode, &skb->rbnode, &q->rb_fragments); consume_skb(head); head = skb; } WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0); delta = -head->truesize; /* Head of list must not be cloned. */ if (skb_unclone(head, GFP_ATOMIC)) goto out_restore_sk; delta += head->truesize; if (delta) add_frag_mem_limit(q->fqdir, delta); /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part * and the second, holding only fragments. */ if (skb_has_frag_list(head)) { struct sk_buff *clone; int i, plen = 0; clone = alloc_skb(0, GFP_ATOMIC); if (!clone) goto out_restore_sk; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; skb_frag_list_init(head); for (i = 0; i < skb_shinfo(head)->nr_frags; i++) plen += skb_frag_size(&skb_shinfo(head)->frags[i]); clone->data_len = head->data_len - plen; clone->len = clone->data_len; head->truesize += clone->truesize; clone->csum = 0; clone->ip_summed = head->ip_summed; add_frag_mem_limit(q->fqdir, clone->truesize); skb_shinfo(head)->frag_list = clone; nextp = &clone->next; } else { nextp = &skb_shinfo(head)->frag_list; } out_restore_sk: if (orig_truesize) { int ts_delta = head->truesize - orig_truesize; /* if this reassembled skb is fragmented later, * fraglist skbs will get skb->sk assigned from head->sk, * and each frag skb will be released via sock_wfree. * * Update sk_wmem_alloc. */ head->sk = sk; head->destructor = destructor; refcount_add(ts_delta, &sk->sk_wmem_alloc); } return nextp; } EXPORT_SYMBOL(inet_frag_reasm_prepare); void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, void *reasm_data, bool try_coalesce) { struct sock *sk = is_skb_wmem(head) ? head->sk : NULL; const unsigned int head_truesize = head->truesize; struct sk_buff **nextp = reasm_data; struct rb_node *rbn; struct sk_buff *fp; int sum_truesize; skb_push(head, head->data - skb_network_header(head)); /* Traverse the tree in order, to build frag_list. */ fp = FRAG_CB(head)->next_frag; rbn = rb_next(&head->rbnode); rb_erase(&head->rbnode, &q->rb_fragments); sum_truesize = head->truesize; while (rbn || fp) { /* fp points to the next sk_buff in the current run; * rbn points to the next run. */ /* Go through the current run. */ while (fp) { struct sk_buff *next_frag = FRAG_CB(fp)->next_frag; bool stolen; int delta; sum_truesize += fp->truesize; if (head->ip_summed != fp->ip_summed) head->ip_summed = CHECKSUM_NONE; else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); if (try_coalesce && skb_try_coalesce(head, fp, &stolen, &delta)) { kfree_skb_partial(fp, stolen); } else { fp->prev = NULL; memset(&fp->rbnode, 0, sizeof(fp->rbnode)); fp->sk = NULL; head->data_len += fp->len; head->len += fp->len; head->truesize += fp->truesize; *nextp = fp; nextp = &fp->next; } fp = next_frag; } /* Move to the next run. */ if (rbn) { struct rb_node *rbnext = rb_next(rbn); fp = rb_to_skb(rbn); rb_erase(rbn, &q->rb_fragments); rbn = rbnext; } } sub_frag_mem_limit(q->fqdir, sum_truesize); *nextp = NULL; skb_mark_not_on_list(head); head->prev = NULL; head->tstamp = q->stamp; head->tstamp_type = q->tstamp_type; if (sk) refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc); } EXPORT_SYMBOL(inet_frag_reasm_finish); struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q) { struct sk_buff *head, *skb; head = skb_rb_first(&q->rb_fragments); if (!head) return NULL; skb = FRAG_CB(head)->next_frag; if (skb) rb_replace_node(&head->rbnode, &skb->rbnode, &q->rb_fragments); else rb_erase(&head->rbnode, &q->rb_fragments); memset(&head->rbnode, 0, sizeof(head->rbnode)); barrier(); if (head == q->fragments_tail) q->fragments_tail = NULL; sub_frag_mem_limit(q->fqdir, head->truesize); return head; } EXPORT_SYMBOL(inet_frag_pull_head); |
| 16 4 12 7 7 1 5 5 1 4 4 1 13 3 10 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 | // SPDX-License-Identifier: GPL-2.0-only /* * Transparent proxy support for Linux/iptables * * Copyright (C) 2007-2008 BalaBit IT Ltd. * Author: Krisztian Kovacs */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <net/tcp.h> #include <net/udp.h> #include <net/icmp.h> #include <net/sock.h> #include <net/inet_sock.h> #include <net/netfilter/ipv4/nf_defrag_ipv4.h> #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) #include <linux/netfilter_ipv6/ip6_tables.h> #include <net/inet6_hashtables.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> #endif #include <net/netfilter/nf_socket.h> #include <linux/netfilter/xt_socket.h> /* "socket" match based redirection (no specific rule) * =================================================== * * There are connections with dynamic endpoints (e.g. FTP data * connection) that the user is unable to add explicit rules * for. These are taken care of by a generic "socket" rule. It is * assumed that the proxy application is trusted to open such * connections without explicit iptables rule (except of course the * generic 'socket' rule). In this case the following sockets are * matched in preference order: * * - match: if there's a fully established connection matching the * _packet_ tuple * * - match: if there's a non-zero bound listener (possibly with a * non-local address) We don't accept zero-bound listeners, since * then local services could intercept traffic going through the * box. */ static bool socket_match(const struct sk_buff *skb, struct xt_action_param *par, const struct xt_socket_mtinfo1 *info) { struct sk_buff *pskb = (struct sk_buff *)skb; struct sock *sk = skb->sk; if (sk && !net_eq(xt_net(par), sock_net(sk))) sk = NULL; if (!sk) sk = nf_sk_lookup_slow_v4(xt_net(par), skb, xt_in(par)); if (sk) { bool wildcard; bool transparent = true; /* Ignore sockets listening on INADDR_ANY, * unless XT_SOCKET_NOWILDCARD is set */ wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) && sk_fullsock(sk) && inet_sk(sk)->inet_rcv_saddr == 0); /* Ignore non-transparent sockets, * if XT_SOCKET_TRANSPARENT is used */ if (info->flags & XT_SOCKET_TRANSPARENT) transparent = inet_sk_transparent(sk); if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && transparent && sk_fullsock(sk)) pskb->mark = READ_ONCE(sk->sk_mark); if (sk != skb->sk) sock_gen_put(sk); if (wildcard || !transparent) sk = NULL; } return sk != NULL; } static bool socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par) { static struct xt_socket_mtinfo1 xt_info_v0 = { .flags = 0, }; return socket_match(skb, par, &xt_info_v0); } static bool socket_mt4_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) { return socket_match(skb, par, par->matchinfo); } #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static bool socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; struct sk_buff *pskb = (struct sk_buff *)skb; struct sock *sk = skb->sk; if (sk && !net_eq(xt_net(par), sock_net(sk))) sk = NULL; if (!sk) sk = nf_sk_lookup_slow_v6(xt_net(par), skb, xt_in(par)); if (sk) { bool wildcard; bool transparent = true; /* Ignore sockets listening on INADDR_ANY * unless XT_SOCKET_NOWILDCARD is set */ wildcard = (!(info->flags & XT_SOCKET_NOWILDCARD) && sk_fullsock(sk) && ipv6_addr_any(&sk->sk_v6_rcv_saddr)); /* Ignore non-transparent sockets, * if XT_SOCKET_TRANSPARENT is used */ if (info->flags & XT_SOCKET_TRANSPARENT) transparent = inet_sk_transparent(sk); if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && transparent && sk_fullsock(sk)) pskb->mark = READ_ONCE(sk->sk_mark); if (sk != skb->sk) sock_gen_put(sk); if (wildcard || !transparent) sk = NULL; } return sk != NULL; } #endif static int socket_mt_enable_defrag(struct net *net, int family) { switch (family) { case NFPROTO_IPV4: return nf_defrag_ipv4_enable(net); #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) case NFPROTO_IPV6: return nf_defrag_ipv6_enable(net); #endif } WARN_ONCE(1, "Unknown family %d\n", family); return 0; } static int socket_mt_v1_check(const struct xt_mtchk_param *par) { const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; int err; err = socket_mt_enable_defrag(par->net, par->family); if (err) return err; if (info->flags & ~XT_SOCKET_FLAGS_V1) { pr_info_ratelimited("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V1); return -EINVAL; } return 0; } static int socket_mt_v2_check(const struct xt_mtchk_param *par) { const struct xt_socket_mtinfo2 *info = (struct xt_socket_mtinfo2 *) par->matchinfo; int err; err = socket_mt_enable_defrag(par->net, par->family); if (err) return err; if (info->flags & ~XT_SOCKET_FLAGS_V2) { pr_info_ratelimited("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V2); return -EINVAL; } return 0; } static int socket_mt_v3_check(const struct xt_mtchk_param *par) { const struct xt_socket_mtinfo3 *info = (struct xt_socket_mtinfo3 *)par->matchinfo; int err; err = socket_mt_enable_defrag(par->net, par->family); if (err) return err; if (info->flags & ~XT_SOCKET_FLAGS_V3) { pr_info_ratelimited("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V3); return -EINVAL; } return 0; } static void socket_mt_destroy(const struct xt_mtdtor_param *par) { if (par->family == NFPROTO_IPV4) nf_defrag_ipv4_disable(par->net); #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) else if (par->family == NFPROTO_IPV6) nf_defrag_ipv6_disable(par->net); #endif } static struct xt_match socket_mt_reg[] __read_mostly = { { .name = "socket", .revision = 0, .family = NFPROTO_IPV4, .match = socket_mt4_v0, .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, { .name = "socket", .revision = 1, .family = NFPROTO_IPV4, .match = socket_mt4_v1_v2_v3, .destroy = socket_mt_destroy, .checkentry = socket_mt_v1_check, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "socket", .revision = 1, .family = NFPROTO_IPV6, .match = socket_mt6_v1_v2_v3, .checkentry = socket_mt_v1_check, .matchsize = sizeof(struct xt_socket_mtinfo1), .destroy = socket_mt_destroy, .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, #endif { .name = "socket", .revision = 2, .family = NFPROTO_IPV4, .match = socket_mt4_v1_v2_v3, .checkentry = socket_mt_v2_check, .destroy = socket_mt_destroy, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "socket", .revision = 2, .family = NFPROTO_IPV6, .match = socket_mt6_v1_v2_v3, .checkentry = socket_mt_v2_check, .destroy = socket_mt_destroy, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, #endif { .name = "socket", .revision = 3, .family = NFPROTO_IPV4, .match = socket_mt4_v1_v2_v3, .checkentry = socket_mt_v3_check, .destroy = socket_mt_destroy, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "socket", .revision = 3, .family = NFPROTO_IPV6, .match = socket_mt6_v1_v2_v3, .checkentry = socket_mt_v3_check, .destroy = socket_mt_destroy, .matchsize = sizeof(struct xt_socket_mtinfo1), .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN), .me = THIS_MODULE, }, #endif }; static int __init socket_mt_init(void) { return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg)); } static void __exit socket_mt_exit(void) { xt_unregister_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg)); } module_init(socket_mt_init); module_exit(socket_mt_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler"); MODULE_DESCRIPTION("x_tables socket match module"); MODULE_ALIAS("ipt_socket"); MODULE_ALIAS("ip6t_socket"); |
| 28 28 28 28 28 28 28 28 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | // SPDX-License-Identifier: GPL-2.0-or-later /* mpi-sub-ui.c - Subtract an unsigned integer from an MPI. * * Copyright 1991, 1993, 1994, 1996, 1999-2002, 2004, 2012, 2013, 2015 * Free Software Foundation, Inc. * * This file was based on the GNU MP Library source file: * https://gmplib.org/repo/gmp-6.2/file/510b83519d1c/mpz/aors_ui.h * * The GNU MP Library is free software; you can redistribute it and/or modify * it under the terms of either: * * * the GNU Lesser General Public License as published by the Free * Software Foundation; either version 3 of the License, or (at your * option) any later version. * * or * * * the GNU General Public License as published by the Free Software * Foundation; either version 2 of the License, or (at your option) any * later version. * * or both in parallel, as here. * * The GNU MP Library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * for more details. * * You should have received copies of the GNU General Public License and the * GNU Lesser General Public License along with the GNU MP Library. If not, * see https://www.gnu.org/licenses/. */ #include "mpi-internal.h" int mpi_sub_ui(MPI w, MPI u, unsigned long vval) { if (u->nlimbs == 0) { if (mpi_resize(w, 1) < 0) return -ENOMEM; w->d[0] = vval; w->nlimbs = (vval != 0); w->sign = (vval != 0); return 0; } /* If not space for W (and possible carry), increase space. */ if (mpi_resize(w, u->nlimbs + 1)) return -ENOMEM; if (u->sign) { mpi_limb_t cy; cy = mpihelp_add_1(w->d, u->d, u->nlimbs, (mpi_limb_t) vval); w->d[u->nlimbs] = cy; w->nlimbs = u->nlimbs + cy; w->sign = 1; } else { /* The signs are different. Need exact comparison to determine * which operand to subtract from which. */ if (u->nlimbs == 1 && u->d[0] < vval) { w->d[0] = vval - u->d[0]; w->nlimbs = 1; w->sign = 1; } else { mpihelp_sub_1(w->d, u->d, u->nlimbs, (mpi_limb_t) vval); /* Size can decrease with at most one limb. */ w->nlimbs = (u->nlimbs - (w->d[u->nlimbs - 1] == 0)); w->sign = 0; } } mpi_normalize(w); return 0; } EXPORT_SYMBOL_GPL(mpi_sub_ui); |
| 9 85 83 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 | /* SPDX-License-Identifier: GPL-2.0-only */ #ifndef _INPUT_MT_H #define _INPUT_MT_H /* * Input Multitouch Library * * Copyright (c) 2010 Henrik Rydberg */ #include <linux/input.h> #define TRKID_MAX 0xffff #define INPUT_MT_POINTER 0x0001 /* pointer device, e.g. trackpad */ #define INPUT_MT_DIRECT 0x0002 /* direct device, e.g. touchscreen */ #define INPUT_MT_DROP_UNUSED 0x0004 /* drop contacts not seen in frame */ #define INPUT_MT_TRACK 0x0008 /* use in-kernel tracking */ #define INPUT_MT_SEMI_MT 0x0010 /* semi-mt device, finger count handled manually */ /** * struct input_mt_slot - represents the state of an input MT slot * @abs: holds current values of ABS_MT axes for this slot * @frame: last frame at which input_mt_report_slot_state() was called * @key: optional driver designation of this slot */ struct input_mt_slot { int abs[ABS_MT_LAST - ABS_MT_FIRST + 1]; unsigned int frame; unsigned int key; }; /** * struct input_mt - state of tracked contacts * @trkid: stores MT tracking ID for the next contact * @num_slots: number of MT slots the device uses * @slot: MT slot currently being transmitted * @flags: input_mt operation flags * @frame: increases every time input_mt_sync_frame() is called * @red: reduced cost matrix for in-kernel tracking * @slots: array of slots holding current values of tracked contacts */ struct input_mt { int trkid; int num_slots; int slot; unsigned int flags; unsigned int frame; int *red; struct input_mt_slot slots[] __counted_by(num_slots); }; static inline void input_mt_set_value(struct input_mt_slot *slot, unsigned code, int value) { slot->abs[code - ABS_MT_FIRST] = value; } static inline int input_mt_get_value(const struct input_mt_slot *slot, unsigned code) { return slot->abs[code - ABS_MT_FIRST]; } static inline bool input_mt_is_active(const struct input_mt_slot *slot) { return input_mt_get_value(slot, ABS_MT_TRACKING_ID) >= 0; } static inline bool input_mt_is_used(const struct input_mt *mt, const struct input_mt_slot *slot) { return slot->frame == mt->frame; } int input_mt_init_slots(struct input_dev *dev, unsigned int num_slots, unsigned int flags); void input_mt_destroy_slots(struct input_dev *dev); static inline int input_mt_new_trkid(struct input_mt *mt) { return mt->trkid++ & TRKID_MAX; } static inline void input_mt_slot(struct input_dev *dev, int slot) { input_event(dev, EV_ABS, ABS_MT_SLOT, slot); } static inline bool input_is_mt_value(int axis) { return axis >= ABS_MT_FIRST && axis <= ABS_MT_LAST; } static inline bool input_is_mt_axis(int axis) { return axis == ABS_MT_SLOT || input_is_mt_value(axis); } bool input_mt_report_slot_state(struct input_dev *dev, unsigned int tool_type, bool active); static inline void input_mt_report_slot_inactive(struct input_dev *dev) { input_mt_report_slot_state(dev, 0, false); } void input_mt_report_finger_count(struct input_dev *dev, int count); void input_mt_report_pointer_emulation(struct input_dev *dev, bool use_count); void input_mt_drop_unused(struct input_dev *dev); void input_mt_sync_frame(struct input_dev *dev); /** * struct input_mt_pos - contact position * @x: horizontal coordinate * @y: vertical coordinate */ struct input_mt_pos { s16 x, y; }; int input_mt_assign_slots(struct input_dev *dev, int *slots, const struct input_mt_pos *pos, int num_pos, int dmax); int input_mt_get_slot_by_key(struct input_dev *dev, int key); #endif |
| 3 1 2 3 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 5 5 5 2 5 5 5 5 2 2 2 2 2 2 2 2 2 2 2 5 5 5 2 2 4 3 3 1 3 3 3 3 5 1 4 3 4 1 3 3 3 3 1 2 3 5 6 5 6 5 3 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 | // SPDX-License-Identifier: GPL-2.0+ /* * HID driver for Valve Steam Controller * * Copyright (c) 2018 Rodrigo Rivas Costa <rodrigorivascosta@gmail.com> * Copyright (c) 2022 Valve Software * * Supports both the wired and wireless interfaces. * * This controller has a builtin emulation of mouse and keyboard: the right pad * can be used as a mouse, the shoulder buttons are mouse buttons, A and B * buttons are ENTER and ESCAPE, and so on. This is implemented as additional * HID interfaces. * * This is known as the "lizard mode", because apparently lizards like to use * the computer from the coach, without a proper mouse and keyboard. * * This driver will disable the lizard mode when the input device is opened * and re-enable it when the input device is closed, so as not to break user * mode behaviour. The lizard_mode parameter can be used to change that. * * There are a few user space applications (notably Steam Client) that use * the hidraw interface directly to create input devices (XTest, uinput...). * In order to avoid breaking them this driver creates a layered hidraw device, * so it can detect when the client is running and then: * - it will not send any command to the controller. * - this input device will be removed, to avoid double input of the same * user action. * When the client is closed, this input device will be created again. * * For additional functions, such as changing the right-pad margin or switching * the led, you can use the user-space tool at: * * https://github.com/rodrigorc/steamctrl */ #include <linux/device.h> #include <linux/input.h> #include <linux/hid.h> #include <linux/module.h> #include <linux/workqueue.h> #include <linux/mutex.h> #include <linux/rcupdate.h> #include <linux/delay.h> #include <linux/power_supply.h> #include "hid-ids.h" MODULE_DESCRIPTION("HID driver for Valve Steam Controller"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Rodrigo Rivas Costa <rodrigorivascosta@gmail.com>"); static bool lizard_mode = true; static DEFINE_MUTEX(steam_devices_lock); static LIST_HEAD(steam_devices); #define STEAM_QUIRK_WIRELESS BIT(0) #define STEAM_QUIRK_DECK BIT(1) /* Touch pads are 40 mm in diameter and 65535 units */ #define STEAM_PAD_RESOLUTION 1638 /* Trigger runs are about 5 mm and 256 units */ #define STEAM_TRIGGER_RESOLUTION 51 /* Joystick runs are about 5 mm and 256 units */ #define STEAM_JOYSTICK_RESOLUTION 51 /* Trigger runs are about 6 mm and 32768 units */ #define STEAM_DECK_TRIGGER_RESOLUTION 5461 /* Joystick runs are about 5 mm and 32768 units */ #define STEAM_DECK_JOYSTICK_RESOLUTION 6553 /* Accelerometer has 16 bit resolution and a range of +/- 2g */ #define STEAM_DECK_ACCEL_RES_PER_G 16384 #define STEAM_DECK_ACCEL_RANGE 32768 #define STEAM_DECK_ACCEL_FUZZ 32 /* Gyroscope has 16 bit resolution and a range of +/- 2000 dps */ #define STEAM_DECK_GYRO_RES_PER_DPS 16 #define STEAM_DECK_GYRO_RANGE 32768 #define STEAM_DECK_GYRO_FUZZ 1 #define STEAM_PAD_FUZZ 256 /* * Commands that can be sent in a feature report. * Thanks to Valve and SDL for the names. */ enum { ID_SET_DIGITAL_MAPPINGS = 0x80, ID_CLEAR_DIGITAL_MAPPINGS = 0x81, ID_GET_DIGITAL_MAPPINGS = 0x82, ID_GET_ATTRIBUTES_VALUES = 0x83, ID_GET_ATTRIBUTE_LABEL = 0x84, ID_SET_DEFAULT_DIGITAL_MAPPINGS = 0x85, ID_FACTORY_RESET = 0x86, ID_SET_SETTINGS_VALUES = 0x87, ID_CLEAR_SETTINGS_VALUES = 0x88, ID_GET_SETTINGS_VALUES = 0x89, ID_GET_SETTING_LABEL = 0x8A, ID_GET_SETTINGS_MAXS = 0x8B, ID_GET_SETTINGS_DEFAULTS = 0x8C, ID_SET_CONTROLLER_MODE = 0x8D, ID_LOAD_DEFAULT_SETTINGS = 0x8E, ID_TRIGGER_HAPTIC_PULSE = 0x8F, ID_TURN_OFF_CONTROLLER = 0x9F, ID_GET_DEVICE_INFO = 0xA1, ID_CALIBRATE_TRACKPADS = 0xA7, ID_RESERVED_0 = 0xA8, ID_SET_SERIAL_NUMBER = 0xA9, ID_GET_TRACKPAD_CALIBRATION = 0xAA, ID_GET_TRACKPAD_FACTORY_CALIBRATION = 0xAB, ID_GET_TRACKPAD_RAW_DATA = 0xAC, ID_ENABLE_PAIRING = 0xAD, ID_GET_STRING_ATTRIBUTE = 0xAE, ID_RADIO_ERASE_RECORDS = 0xAF, ID_RADIO_WRITE_RECORD = 0xB0, ID_SET_DONGLE_SETTING = 0xB1, ID_DONGLE_DISCONNECT_DEVICE = 0xB2, ID_DONGLE_COMMIT_DEVICE = 0xB3, ID_DONGLE_GET_WIRELESS_STATE = 0xB4, ID_CALIBRATE_GYRO = 0xB5, ID_PLAY_AUDIO = 0xB6, ID_AUDIO_UPDATE_START = 0xB7, ID_AUDIO_UPDATE_DATA = 0xB8, ID_AUDIO_UPDATE_COMPLETE = 0xB9, ID_GET_CHIPID = 0xBA, ID_CALIBRATE_JOYSTICK = 0xBF, ID_CALIBRATE_ANALOG_TRIGGERS = 0xC0, ID_SET_AUDIO_MAPPING = 0xC1, ID_CHECK_GYRO_FW_LOAD = 0xC2, ID_CALIBRATE_ANALOG = 0xC3, ID_DONGLE_GET_CONNECTED_SLOTS = 0xC4, ID_RESET_IMU = 0xCE, ID_TRIGGER_HAPTIC_CMD = 0xEA, ID_TRIGGER_RUMBLE_CMD = 0xEB, }; /* Settings IDs */ enum { /* 0 */ SETTING_MOUSE_SENSITIVITY, SETTING_MOUSE_ACCELERATION, SETTING_TRACKBALL_ROTATION_ANGLE, SETTING_HAPTIC_INTENSITY_UNUSED, SETTING_LEFT_GAMEPAD_STICK_ENABLED, SETTING_RIGHT_GAMEPAD_STICK_ENABLED, SETTING_USB_DEBUG_MODE, SETTING_LEFT_TRACKPAD_MODE, SETTING_RIGHT_TRACKPAD_MODE, SETTING_MOUSE_POINTER_ENABLED, /* 10 */ SETTING_DPAD_DEADZONE, SETTING_MINIMUM_MOMENTUM_VEL, SETTING_MOMENTUM_DECAY_AMMOUNT, SETTING_TRACKPAD_RELATIVE_MODE_TICKS_PER_PIXEL, SETTING_HAPTIC_INCREMENT, SETTING_DPAD_ANGLE_SIN, SETTING_DPAD_ANGLE_COS, SETTING_MOMENTUM_VERTICAL_DIVISOR, SETTING_MOMENTUM_MAXIMUM_VELOCITY, SETTING_TRACKPAD_Z_ON, /* 20 */ SETTING_TRACKPAD_Z_OFF, SETTING_SENSITIVY_SCALE_AMMOUNT, SETTING_LEFT_TRACKPAD_SECONDARY_MODE, SETTING_RIGHT_TRACKPAD_SECONDARY_MODE, SETTING_SMOOTH_ABSOLUTE_MOUSE, SETTING_STEAMBUTTON_POWEROFF_TIME, SETTING_UNUSED_1, SETTING_TRACKPAD_OUTER_RADIUS, SETTING_TRACKPAD_Z_ON_LEFT, SETTING_TRACKPAD_Z_OFF_LEFT, /* 30 */ SETTING_TRACKPAD_OUTER_SPIN_VEL, SETTING_TRACKPAD_OUTER_SPIN_RADIUS, SETTING_TRACKPAD_OUTER_SPIN_HORIZONTAL_ONLY, SETTING_TRACKPAD_RELATIVE_MODE_DEADZONE, SETTING_TRACKPAD_RELATIVE_MODE_MAX_VEL, SETTING_TRACKPAD_RELATIVE_MODE_INVERT_Y, SETTING_TRACKPAD_DOUBLE_TAP_BEEP_ENABLED, SETTING_TRACKPAD_DOUBLE_TAP_BEEP_PERIOD, SETTING_TRACKPAD_DOUBLE_TAP_BEEP_COUNT, SETTING_TRACKPAD_OUTER_RADIUS_RELEASE_ON_TRANSITION, /* 40 */ SETTING_RADIAL_MODE_ANGLE, SETTING_HAPTIC_INTENSITY_MOUSE_MODE, SETTING_LEFT_DPAD_REQUIRES_CLICK, SETTING_RIGHT_DPAD_REQUIRES_CLICK, SETTING_LED_BASELINE_BRIGHTNESS, SETTING_LED_USER_BRIGHTNESS, SETTING_ENABLE_RAW_JOYSTICK, SETTING_ENABLE_FAST_SCAN, SETTING_IMU_MODE, SETTING_WIRELESS_PACKET_VERSION, /* 50 */ SETTING_SLEEP_INACTIVITY_TIMEOUT, SETTING_TRACKPAD_NOISE_THRESHOLD, SETTING_LEFT_TRACKPAD_CLICK_PRESSURE, SETTING_RIGHT_TRACKPAD_CLICK_PRESSURE, SETTING_LEFT_BUMPER_CLICK_PRESSURE, SETTING_RIGHT_BUMPER_CLICK_PRESSURE, SETTING_LEFT_GRIP_CLICK_PRESSURE, SETTING_RIGHT_GRIP_CLICK_PRESSURE, SETTING_LEFT_GRIP2_CLICK_PRESSURE, SETTING_RIGHT_GRIP2_CLICK_PRESSURE, /* 60 */ SETTING_PRESSURE_MODE, SETTING_CONTROLLER_TEST_MODE, SETTING_TRIGGER_MODE, SETTING_TRACKPAD_Z_THRESHOLD, SETTING_FRAME_RATE, SETTING_TRACKPAD_FILT_CTRL, SETTING_TRACKPAD_CLIP, SETTING_DEBUG_OUTPUT_SELECT, SETTING_TRIGGER_THRESHOLD_PERCENT, SETTING_TRACKPAD_FREQUENCY_HOPPING, /* 70 */ SETTING_HAPTICS_ENABLED, SETTING_STEAM_WATCHDOG_ENABLE, SETTING_TIMP_TOUCH_THRESHOLD_ON, SETTING_TIMP_TOUCH_THRESHOLD_OFF, SETTING_FREQ_HOPPING, SETTING_TEST_CONTROL, SETTING_HAPTIC_MASTER_GAIN_DB, SETTING_THUMB_TOUCH_THRESH, SETTING_DEVICE_POWER_STATUS, SETTING_HAPTIC_INTENSITY, /* 80 */ SETTING_STABILIZER_ENABLED, SETTING_TIMP_MODE_MTE, }; /* Input report identifiers */ enum { ID_CONTROLLER_STATE = 1, ID_CONTROLLER_DEBUG = 2, ID_CONTROLLER_WIRELESS = 3, ID_CONTROLLER_STATUS = 4, ID_CONTROLLER_DEBUG2 = 5, ID_CONTROLLER_SECONDARY_STATE = 6, ID_CONTROLLER_BLE_STATE = 7, ID_CONTROLLER_DECK_STATE = 9 }; /* String attribute identifiers */ enum { ATTRIB_STR_BOARD_SERIAL, ATTRIB_STR_UNIT_SERIAL, }; /* Values for GYRO_MODE (bitmask) */ enum { SETTING_GYRO_MODE_OFF = 0, SETTING_GYRO_MODE_STEERING = BIT(0), SETTING_GYRO_MODE_TILT = BIT(1), SETTING_GYRO_MODE_SEND_ORIENTATION = BIT(2), SETTING_GYRO_MODE_SEND_RAW_ACCEL = BIT(3), SETTING_GYRO_MODE_SEND_RAW_GYRO = BIT(4), }; /* Trackpad modes */ enum { TRACKPAD_ABSOLUTE_MOUSE, TRACKPAD_RELATIVE_MOUSE, TRACKPAD_DPAD_FOUR_WAY_DISCRETE, TRACKPAD_DPAD_FOUR_WAY_OVERLAP, TRACKPAD_DPAD_EIGHT_WAY, TRACKPAD_RADIAL_MODE, TRACKPAD_ABSOLUTE_DPAD, TRACKPAD_NONE, TRACKPAD_GESTURE_KEYBOARD, }; /* Pad identifiers for the deck */ #define STEAM_PAD_LEFT 0 #define STEAM_PAD_RIGHT 1 #define STEAM_PAD_BOTH 2 /* Other random constants */ #define STEAM_SERIAL_LEN 0x15 struct steam_device { struct list_head list; spinlock_t lock; struct hid_device *hdev, *client_hdev; struct mutex report_mutex; unsigned long client_opened; struct input_dev __rcu *input; struct input_dev __rcu *sensors; unsigned long quirks; struct work_struct work_connect; bool connected; char serial_no[STEAM_SERIAL_LEN + 1]; struct power_supply_desc battery_desc; struct power_supply __rcu *battery; u8 battery_charge; u16 voltage; struct delayed_work mode_switch; bool did_mode_switch; bool gamepad_mode; struct work_struct rumble_work; u16 rumble_left; u16 rumble_right; unsigned int sensor_timestamp_us; struct work_struct unregister_work; }; static int steam_recv_report(struct steam_device *steam, u8 *data, int size) { struct hid_report *r; u8 *buf; int ret; r = steam->hdev->report_enum[HID_FEATURE_REPORT].report_id_hash[0]; if (!r) { hid_err(steam->hdev, "No HID_FEATURE_REPORT submitted - nothing to read\n"); return -EINVAL; } if (hid_report_len(r) < 64) return -EINVAL; buf = hid_alloc_report_buf(r, GFP_KERNEL); if (!buf) return -ENOMEM; /* * The report ID is always 0, so strip the first byte from the output. * hid_report_len() is not counting the report ID, so +1 to the length * or else we get a EOVERFLOW. We are safe from a buffer overflow * because hid_alloc_report_buf() allocates +7 bytes. */ ret = hid_hw_raw_request(steam->hdev, 0x00, buf, hid_report_len(r) + 1, HID_FEATURE_REPORT, HID_REQ_GET_REPORT); if (ret > 0) memcpy(data, buf + 1, min(size, ret - 1)); kfree(buf); return ret; } static int steam_send_report(struct steam_device *steam, u8 *cmd, int size) { struct hid_report *r; u8 *buf; unsigned int retries = 50; int ret; r = steam->hdev->report_enum[HID_FEATURE_REPORT].report_id_hash[0]; if (!r) { hid_err(steam->hdev, "No HID_FEATURE_REPORT submitted - nothing to read\n"); return -EINVAL; } if (hid_report_len(r) < 64) return -EINVAL; buf = hid_alloc_report_buf(r, GFP_KERNEL); if (!buf) return -ENOMEM; /* The report ID is always 0 */ memcpy(buf + 1, cmd, size); /* * Sometimes the wireless controller fails with EPIPE * when sending a feature report. * Doing a HID_REQ_GET_REPORT and waiting for a while * seems to fix that. */ do { ret = hid_hw_raw_request(steam->hdev, 0, buf, max(size, 64) + 1, HID_FEATURE_REPORT, HID_REQ_SET_REPORT); if (ret != -EPIPE) break; msleep(20); } while (--retries); kfree(buf); if (ret < 0) hid_err(steam->hdev, "%s: error %d (%*ph)\n", __func__, ret, size, cmd); return ret; } static inline int steam_send_report_byte(struct steam_device *steam, u8 cmd) { return steam_send_report(steam, &cmd, 1); } static int steam_write_settings(struct steam_device *steam, /* u8 reg, u16 val */...) { /* Send: 0x87 len (reg valLo valHi)* */ u8 reg; u16 val; u8 cmd[64] = {ID_SET_SETTINGS_VALUES, 0x00}; int ret; va_list args; va_start(args, steam); for (;;) { reg = va_arg(args, int); if (reg == 0) break; val = va_arg(args, int); cmd[cmd[1] + 2] = reg; cmd[cmd[1] + 3] = val & 0xff; cmd[cmd[1] + 4] = val >> 8; cmd[1] += 3; } va_end(args); ret = steam_send_report(steam, cmd, 2 + cmd[1]); if (ret < 0) return ret; /* * Sometimes a lingering report for this command can * get read back instead of the last set report if * this isn't explicitly queried */ return steam_recv_report(steam, cmd, 2 + cmd[1]); } static int steam_get_serial(struct steam_device *steam) { /* * Send: 0xae 0x15 0x01 * Recv: 0xae 0x15 0x01 serialnumber */ int ret = 0; u8 cmd[] = {ID_GET_STRING_ATTRIBUTE, sizeof(steam->serial_no), ATTRIB_STR_UNIT_SERIAL}; u8 reply[3 + STEAM_SERIAL_LEN + 1]; mutex_lock(&steam->report_mutex); ret = steam_send_report(steam, cmd, sizeof(cmd)); if (ret < 0) goto out; ret = steam_recv_report(steam, reply, sizeof(reply)); if (ret < 0) goto out; if (reply[0] != ID_GET_STRING_ATTRIBUTE || reply[1] < 1 || reply[1] > sizeof(steam->serial_no) || reply[2] != ATTRIB_STR_UNIT_SERIAL) { ret = -EIO; goto out; } reply[3 + STEAM_SERIAL_LEN] = 0; strscpy(steam->serial_no, reply + 3, reply[1]); out: mutex_unlock(&steam->report_mutex); return ret; } /* * This command requests the wireless adaptor to post an event * with the connection status. Useful if this driver is loaded when * the controller is already connected. */ static inline int steam_request_conn_status(struct steam_device *steam) { int ret; mutex_lock(&steam->report_mutex); ret = steam_send_report_byte(steam, ID_DONGLE_GET_WIRELESS_STATE); mutex_unlock(&steam->report_mutex); return ret; } /* * Send a haptic pulse to the trackpads * Duration and interval are measured in microseconds, count is the number * of pulses to send for duration time with interval microseconds between them * and gain is measured in decibels, ranging from -24 to +6 */ static inline int steam_haptic_pulse(struct steam_device *steam, u8 pad, u16 duration, u16 interval, u16 count, u8 gain) { int ret; u8 report[10] = {ID_TRIGGER_HAPTIC_PULSE, 8}; /* Left and right are swapped on this report for legacy reasons */ if (pad < STEAM_PAD_BOTH) pad ^= 1; report[2] = pad; report[3] = duration & 0xFF; report[4] = duration >> 8; report[5] = interval & 0xFF; report[6] = interval >> 8; report[7] = count & 0xFF; report[8] = count >> 8; report[9] = gain; mutex_lock(&steam->report_mutex); ret = steam_send_report(steam, report, sizeof(report)); mutex_unlock(&steam->report_mutex); return ret; } static inline int steam_haptic_rumble(struct steam_device *steam, u16 intensity, u16 left_speed, u16 right_speed, u8 left_gain, u8 right_gain) { int ret; u8 report[11] = {ID_TRIGGER_RUMBLE_CMD, 9}; report[3] = intensity & 0xFF; report[4] = intensity >> 8; report[5] = left_speed & 0xFF; report[6] = left_speed >> 8; report[7] = right_speed & 0xFF; report[8] = right_speed >> 8; report[9] = left_gain; report[10] = right_gain; mutex_lock(&steam->report_mutex); ret = steam_send_report(steam, report, sizeof(report)); mutex_unlock(&steam->report_mutex); return ret; } static void steam_haptic_rumble_cb(struct work_struct *work) { struct steam_device *steam = container_of(work, struct steam_device, rumble_work); steam_haptic_rumble(steam, 0, steam->rumble_left, steam->rumble_right, 2, 0); } #ifdef CONFIG_STEAM_FF static int steam_play_effect(struct input_dev *dev, void *data, struct ff_effect *effect) { struct steam_device *steam = input_get_drvdata(dev); steam->rumble_left = effect->u.rumble.strong_magnitude; steam->rumble_right = effect->u.rumble.weak_magnitude; return schedule_work(&steam->rumble_work); } #endif static void steam_set_lizard_mode(struct steam_device *steam, bool enable) { if (steam->gamepad_mode) enable = false; mutex_lock(&steam->report_mutex); if (enable) { /* enable esc, enter, cursors */ steam_send_report_byte(steam, ID_SET_DEFAULT_DIGITAL_MAPPINGS); /* reset settings */ steam_send_report_byte(steam, ID_LOAD_DEFAULT_SETTINGS); } else { /* disable esc, enter, cursor */ steam_send_report_byte(steam, ID_CLEAR_DIGITAL_MAPPINGS); if (steam->quirks & STEAM_QUIRK_DECK) { steam_write_settings(steam, SETTING_LEFT_TRACKPAD_MODE, TRACKPAD_NONE, /* disable mouse */ SETTING_RIGHT_TRACKPAD_MODE, TRACKPAD_NONE, /* disable mouse */ SETTING_LEFT_TRACKPAD_CLICK_PRESSURE, 0xFFFF, /* disable haptic click */ SETTING_RIGHT_TRACKPAD_CLICK_PRESSURE, 0xFFFF, /* disable haptic click */ SETTING_STEAM_WATCHDOG_ENABLE, 0, /* disable watchdog that tests if Steam is active */ 0); } else { steam_write_settings(steam, SETTING_LEFT_TRACKPAD_MODE, TRACKPAD_NONE, /* disable mouse */ SETTING_RIGHT_TRACKPAD_MODE, TRACKPAD_NONE, /* disable mouse */ 0); } } mutex_unlock(&steam->report_mutex); } static int steam_input_open(struct input_dev *dev) { struct steam_device *steam = input_get_drvdata(dev); unsigned long flags; bool set_lizard_mode; /* * Disabling lizard mode automatically is only done on the Steam * Controller. On the Steam Deck, this is toggled manually by holding * the options button instead, handled by steam_mode_switch_cb. */ if (!(steam->quirks & STEAM_QUIRK_DECK)) { spin_lock_irqsave(&steam->lock, flags); set_lizard_mode = !steam->client_opened && lizard_mode; spin_unlock_irqrestore(&steam->lock, flags); if (set_lizard_mode) steam_set_lizard_mode(steam, false); } return 0; } static void steam_input_close(struct input_dev *dev) { struct steam_device *steam = input_get_drvdata(dev); unsigned long flags; bool set_lizard_mode; if (!(steam->quirks & STEAM_QUIRK_DECK)) { spin_lock_irqsave(&steam->lock, flags); set_lizard_mode = !steam->client_opened && lizard_mode; spin_unlock_irqrestore(&steam->lock, flags); if (set_lizard_mode) steam_set_lizard_mode(steam, true); } } static enum power_supply_property steam_battery_props[] = { POWER_SUPPLY_PROP_PRESENT, POWER_SUPPLY_PROP_SCOPE, POWER_SUPPLY_PROP_VOLTAGE_NOW, POWER_SUPPLY_PROP_CAPACITY, }; static int steam_battery_get_property(struct power_supply *psy, enum power_supply_property psp, union power_supply_propval *val) { struct steam_device *steam = power_supply_get_drvdata(psy); unsigned long flags; s16 volts; u8 batt; int ret = 0; spin_lock_irqsave(&steam->lock, flags); volts = steam->voltage; batt = steam->battery_charge; spin_unlock_irqrestore(&steam->lock, flags); switch (psp) { case POWER_SUPPLY_PROP_PRESENT: val->intval = 1; break; case POWER_SUPPLY_PROP_SCOPE: val->intval = POWER_SUPPLY_SCOPE_DEVICE; break; case POWER_SUPPLY_PROP_VOLTAGE_NOW: val->intval = volts * 1000; /* mV -> uV */ break; case POWER_SUPPLY_PROP_CAPACITY: val->intval = batt; break; default: ret = -EINVAL; break; } return ret; } static int steam_battery_register(struct steam_device *steam) { struct power_supply *battery; struct power_supply_config battery_cfg = { .drv_data = steam, }; unsigned long flags; int ret; steam->battery_desc.type = POWER_SUPPLY_TYPE_BATTERY; steam->battery_desc.properties = steam_battery_props; steam->battery_desc.num_properties = ARRAY_SIZE(steam_battery_props); steam->battery_desc.get_property = steam_battery_get_property; steam->battery_desc.name = devm_kasprintf(&steam->hdev->dev, GFP_KERNEL, "steam-controller-%s-battery", steam->serial_no); if (!steam->battery_desc.name) return -ENOMEM; /* avoid the warning of 0% battery while waiting for the first info */ spin_lock_irqsave(&steam->lock, flags); steam->voltage = 3000; steam->battery_charge = 100; spin_unlock_irqrestore(&steam->lock, flags); battery = power_supply_register(&steam->hdev->dev, &steam->battery_desc, &battery_cfg); if (IS_ERR(battery)) { ret = PTR_ERR(battery); hid_err(steam->hdev, "%s:power_supply_register failed with error %d\n", __func__, ret); return ret; } rcu_assign_pointer(steam->battery, battery); power_supply_powers(battery, &steam->hdev->dev); return 0; } static int steam_input_register(struct steam_device *steam) { struct hid_device *hdev = steam->hdev; struct input_dev *input; int ret; rcu_read_lock(); input = rcu_dereference(steam->input); rcu_read_unlock(); if (input) { dbg_hid("%s: already connected\n", __func__); return 0; } input = input_allocate_device(); if (!input) return -ENOMEM; input_set_drvdata(input, steam); input->dev.parent = &hdev->dev; input->open = steam_input_open; input->close = steam_input_close; input->name = (steam->quirks & STEAM_QUIRK_WIRELESS) ? "Wireless Steam Controller" : (steam->quirks & STEAM_QUIRK_DECK) ? "Steam Deck" : "Steam Controller"; input->phys = hdev->phys; input->uniq = steam->serial_no; input->id.bustype = hdev->bus; input->id.vendor = hdev->vendor; input->id.product = hdev->product; input->id.version = hdev->version; input_set_capability(input, EV_KEY, BTN_TR2); input_set_capability(input, EV_KEY, BTN_TL2); input_set_capability(input, EV_KEY, BTN_TR); input_set_capability(input, EV_KEY, BTN_TL); input_set_capability(input, EV_KEY, BTN_Y); input_set_capability(input, EV_KEY, BTN_B); input_set_capability(input, EV_KEY, BTN_X); input_set_capability(input, EV_KEY, BTN_A); input_set_capability(input, EV_KEY, BTN_DPAD_UP); input_set_capability(input, EV_KEY, BTN_DPAD_RIGHT); input_set_capability(input, EV_KEY, BTN_DPAD_LEFT); input_set_capability(input, EV_KEY, BTN_DPAD_DOWN); input_set_capability(input, EV_KEY, BTN_SELECT); input_set_capability(input, EV_KEY, BTN_MODE); input_set_capability(input, EV_KEY, BTN_START); input_set_capability(input, EV_KEY, BTN_THUMBR); input_set_capability(input, EV_KEY, BTN_THUMBL); input_set_capability(input, EV_KEY, BTN_THUMB); input_set_capability(input, EV_KEY, BTN_THUMB2); if (steam->quirks & STEAM_QUIRK_DECK) { input_set_capability(input, EV_KEY, BTN_BASE); input_set_capability(input, EV_KEY, BTN_TRIGGER_HAPPY1); input_set_capability(input, EV_KEY, BTN_TRIGGER_HAPPY2); input_set_capability(input, EV_KEY, BTN_TRIGGER_HAPPY3); input_set_capability(input, EV_KEY, BTN_TRIGGER_HAPPY4); } else { input_set_capability(input, EV_KEY, BTN_GEAR_DOWN); input_set_capability(input, EV_KEY, BTN_GEAR_UP); } input_set_abs_params(input, ABS_X, -32767, 32767, 0, 0); input_set_abs_params(input, ABS_Y, -32767, 32767, 0, 0); input_set_abs_params(input, ABS_HAT0X, -32767, 32767, STEAM_PAD_FUZZ, 0); input_set_abs_params(input, ABS_HAT0Y, -32767, 32767, STEAM_PAD_FUZZ, 0); if (steam->quirks & STEAM_QUIRK_DECK) { input_set_abs_params(input, ABS_HAT2Y, 0, 32767, 0, 0); input_set_abs_params(input, ABS_HAT2X, 0, 32767, 0, 0); input_set_abs_params(input, ABS_RX, -32767, 32767, 0, 0); input_set_abs_params(input, ABS_RY, -32767, 32767, 0, 0); input_set_abs_params(input, ABS_HAT1X, -32767, 32767, STEAM_PAD_FUZZ, 0); input_set_abs_params(input, ABS_HAT1Y, -32767, 32767, STEAM_PAD_FUZZ, 0); input_abs_set_res(input, ABS_X, STEAM_DECK_JOYSTICK_RESOLUTION); input_abs_set_res(input, ABS_Y, STEAM_DECK_JOYSTICK_RESOLUTION); input_abs_set_res(input, ABS_RX, STEAM_DECK_JOYSTICK_RESOLUTION); input_abs_set_res(input, ABS_RY, STEAM_DECK_JOYSTICK_RESOLUTION); input_abs_set_res(input, ABS_HAT1X, STEAM_PAD_RESOLUTION); input_abs_set_res(input, ABS_HAT1Y, STEAM_PAD_RESOLUTION); input_abs_set_res(input, ABS_HAT2Y, STEAM_DECK_TRIGGER_RESOLUTION); input_abs_set_res(input, ABS_HAT2X, STEAM_DECK_TRIGGER_RESOLUTION); } else { input_set_abs_params(input, ABS_HAT2Y, 0, 255, 0, 0); input_set_abs_params(input, ABS_HAT2X, 0, 255, 0, 0); input_set_abs_params(input, ABS_RX, -32767, 32767, STEAM_PAD_FUZZ, 0); input_set_abs_params(input, ABS_RY, -32767, 32767, STEAM_PAD_FUZZ, 0); input_abs_set_res(input, ABS_X, STEAM_JOYSTICK_RESOLUTION); input_abs_set_res(input, ABS_Y, STEAM_JOYSTICK_RESOLUTION); input_abs_set_res(input, ABS_RX, STEAM_PAD_RESOLUTION); input_abs_set_res(input, ABS_RY, STEAM_PAD_RESOLUTION); input_abs_set_res(input, ABS_HAT2Y, STEAM_TRIGGER_RESOLUTION); input_abs_set_res(input, ABS_HAT2X, STEAM_TRIGGER_RESOLUTION); } input_abs_set_res(input, ABS_HAT0X, STEAM_PAD_RESOLUTION); input_abs_set_res(input, ABS_HAT0Y, STEAM_PAD_RESOLUTION); #ifdef CONFIG_STEAM_FF if (steam->quirks & STEAM_QUIRK_DECK) { input_set_capability(input, EV_FF, FF_RUMBLE); ret = input_ff_create_memless(input, NULL, steam_play_effect); if (ret) goto input_register_fail; } #endif ret = input_register_device(input); if (ret) goto input_register_fail; rcu_assign_pointer(steam->input, input); return 0; input_register_fail: input_free_device(input); return ret; } static int steam_sensors_register(struct steam_device *steam) { struct hid_device *hdev = steam->hdev; struct input_dev *sensors; int ret; if (!(steam->quirks & STEAM_QUIRK_DECK)) return 0; rcu_read_lock(); sensors = rcu_dereference(steam->sensors); rcu_read_unlock(); if (sensors) { dbg_hid("%s: already connected\n", __func__); return 0; } sensors = input_allocate_device(); if (!sensors) return -ENOMEM; input_set_drvdata(sensors, steam); sensors->dev.parent = &hdev->dev; sensors->name = "Steam Deck Motion Sensors"; sensors->phys = hdev->phys; sensors->uniq = steam->serial_no; sensors->id.bustype = hdev->bus; sensors->id.vendor = hdev->vendor; sensors->id.product = hdev->product; sensors->id.version = hdev->version; __set_bit(INPUT_PROP_ACCELEROMETER, sensors->propbit); __set_bit(EV_MSC, sensors->evbit); __set_bit(MSC_TIMESTAMP, sensors->mscbit); input_set_abs_params(sensors, ABS_X, -STEAM_DECK_ACCEL_RANGE, STEAM_DECK_ACCEL_RANGE, STEAM_DECK_ACCEL_FUZZ, 0); input_set_abs_params(sensors, ABS_Y, -STEAM_DECK_ACCEL_RANGE, STEAM_DECK_ACCEL_RANGE, STEAM_DECK_ACCEL_FUZZ, 0); input_set_abs_params(sensors, ABS_Z, -STEAM_DECK_ACCEL_RANGE, STEAM_DECK_ACCEL_RANGE, STEAM_DECK_ACCEL_FUZZ, 0); input_abs_set_res(sensors, ABS_X, STEAM_DECK_ACCEL_RES_PER_G); input_abs_set_res(sensors, ABS_Y, STEAM_DECK_ACCEL_RES_PER_G); input_abs_set_res(sensors, ABS_Z, STEAM_DECK_ACCEL_RES_PER_G); input_set_abs_params(sensors, ABS_RX, -STEAM_DECK_GYRO_RANGE, STEAM_DECK_GYRO_RANGE, STEAM_DECK_GYRO_FUZZ, 0); input_set_abs_params(sensors, ABS_RY, -STEAM_DECK_GYRO_RANGE, STEAM_DECK_GYRO_RANGE, STEAM_DECK_GYRO_FUZZ, 0); input_set_abs_params(sensors, ABS_RZ, -STEAM_DECK_GYRO_RANGE, STEAM_DECK_GYRO_RANGE, STEAM_DECK_GYRO_FUZZ, 0); input_abs_set_res(sensors, ABS_RX, STEAM_DECK_GYRO_RES_PER_DPS); input_abs_set_res(sensors, ABS_RY, STEAM_DECK_GYRO_RES_PER_DPS); input_abs_set_res(sensors, ABS_RZ, STEAM_DECK_GYRO_RES_PER_DPS); ret = input_register_device(sensors); if (ret) goto sensors_register_fail; rcu_assign_pointer(steam->sensors, sensors); return 0; sensors_register_fail: input_free_device(sensors); return ret; } static void steam_input_unregister(struct steam_device *steam) { struct input_dev *input; rcu_read_lock(); input = rcu_dereference(steam->input); rcu_read_unlock(); if (!input) return; RCU_INIT_POINTER(steam->input, NULL); synchronize_rcu(); input_unregister_device(input); } static void steam_sensors_unregister(struct steam_device *steam) { struct input_dev *sensors; if (!(steam->quirks & STEAM_QUIRK_DECK)) return; rcu_read_lock(); sensors = rcu_dereference(steam->sensors); rcu_read_unlock(); if (!sensors) return; RCU_INIT_POINTER(steam->sensors, NULL); synchronize_rcu(); input_unregister_device(sensors); } static void steam_battery_unregister(struct steam_device *steam) { struct power_supply *battery; rcu_read_lock(); battery = rcu_dereference(steam->battery); rcu_read_unlock(); if (!battery) return; RCU_INIT_POINTER(steam->battery, NULL); synchronize_rcu(); power_supply_unregister(battery); } static int steam_register(struct steam_device *steam) { int ret; unsigned long client_opened; unsigned long flags; /* * This function can be called several times in a row with the * wireless adaptor, without steam_unregister() between them, because * another client send a get_connection_status command, for example. * The battery and serial number are set just once per device. */ if (!steam->serial_no[0]) { /* * Unlikely, but getting the serial could fail, and it is not so * important, so make up a serial number and go on. */ if (steam_get_serial(steam) < 0) strscpy(steam->serial_no, "XXXXXXXXXX", sizeof(steam->serial_no)); hid_info(steam->hdev, "Steam Controller '%s' connected", steam->serial_no); /* ignore battery errors, we can live without it */ if (steam->quirks & STEAM_QUIRK_WIRELESS) steam_battery_register(steam); mutex_lock(&steam_devices_lock); if (list_empty(&steam->list)) list_add(&steam->list, &steam_devices); mutex_unlock(&steam_devices_lock); } spin_lock_irqsave(&steam->lock, flags); client_opened = steam->client_opened; spin_unlock_irqrestore(&steam->lock, flags); if (!client_opened) { steam_set_lizard_mode(steam, lizard_mode); ret = steam_input_register(steam); if (ret != 0) goto steam_register_input_fail; ret = steam_sensors_register(steam); if (ret != 0) goto steam_register_sensors_fail; } return 0; steam_register_sensors_fail: steam_input_unregister(steam); steam_register_input_fail: return ret; } static void steam_unregister(struct steam_device *steam) { steam_battery_unregister(steam); steam_sensors_unregister(steam); steam_input_unregister(steam); if (steam->serial_no[0]) { hid_info(steam->hdev, "Steam Controller '%s' disconnected", steam->serial_no); mutex_lock(&steam_devices_lock); list_del_init(&steam->list); mutex_unlock(&steam_devices_lock); steam->serial_no[0] = 0; } } static void steam_work_connect_cb(struct work_struct *work) { struct steam_device *steam = container_of(work, struct steam_device, work_connect); unsigned long flags; bool connected; int ret; spin_lock_irqsave(&steam->lock, flags); connected = steam->connected; spin_unlock_irqrestore(&steam->lock, flags); if (connected) { ret = steam_register(steam); if (ret) { hid_err(steam->hdev, "%s:steam_register failed with error %d\n", __func__, ret); } } else { steam_unregister(steam); } } static void steam_mode_switch_cb(struct work_struct *work) { struct steam_device *steam = container_of(to_delayed_work(work), struct steam_device, mode_switch); unsigned long flags; bool client_opened; if (!lizard_mode) return; steam->gamepad_mode = !steam->gamepad_mode; if (steam->gamepad_mode) steam_set_lizard_mode(steam, false); else { spin_lock_irqsave(&steam->lock, flags); client_opened = steam->client_opened; spin_unlock_irqrestore(&steam->lock, flags); if (!client_opened) steam_set_lizard_mode(steam, lizard_mode); } steam_haptic_pulse(steam, STEAM_PAD_RIGHT, 0x190, 0, 1, 0); if (steam->gamepad_mode) { steam_haptic_pulse(steam, STEAM_PAD_LEFT, 0x14D, 0x14D, 0x2D, 0); } else { steam_haptic_pulse(steam, STEAM_PAD_LEFT, 0x1F4, 0x1F4, 0x1E, 0); } } static void steam_work_unregister_cb(struct work_struct *work) { struct steam_device *steam = container_of(work, struct steam_device, unregister_work); unsigned long flags; bool connected; bool opened; spin_lock_irqsave(&steam->lock, flags); opened = steam->client_opened; connected = steam->connected; spin_unlock_irqrestore(&steam->lock, flags); if (connected) { if (opened) { steam_sensors_unregister(steam); steam_input_unregister(steam); } else { steam_set_lizard_mode(steam, lizard_mode); steam_input_register(steam); steam_sensors_register(steam); } } } static bool steam_is_valve_interface(struct hid_device *hdev) { struct hid_report_enum *rep_enum; /* * The wired device creates 3 interfaces: * 0: emulated mouse. * 1: emulated keyboard. * 2: the real game pad. * The wireless device creates 5 interfaces: * 0: emulated keyboard. * 1-4: slots where up to 4 real game pads will be connected to. * We know which one is the real gamepad interface because they are the * only ones with a feature report. */ rep_enum = &hdev->report_enum[HID_FEATURE_REPORT]; return !list_empty(&rep_enum->report_list); } static int steam_client_ll_parse(struct hid_device *hdev) { struct steam_device *steam = hdev->driver_data; return hid_parse_report(hdev, steam->hdev->dev_rdesc, steam->hdev->dev_rsize); } static int steam_client_ll_start(struct hid_device *hdev) { return 0; } static void steam_client_ll_stop(struct hid_device *hdev) { } static int steam_client_ll_open(struct hid_device *hdev) { struct steam_device *steam = hdev->driver_data; unsigned long flags; spin_lock_irqsave(&steam->lock, flags); steam->client_opened++; spin_unlock_irqrestore(&steam->lock, flags); schedule_work(&steam->unregister_work); return 0; } static void steam_client_ll_close(struct hid_device *hdev) { struct steam_device *steam = hdev->driver_data; unsigned long flags; spin_lock_irqsave(&steam->lock, flags); steam->client_opened--; spin_unlock_irqrestore(&steam->lock, flags); schedule_work(&steam->unregister_work); } static int steam_client_ll_raw_request(struct hid_device *hdev, unsigned char reportnum, u8 *buf, size_t count, unsigned char report_type, int reqtype) { struct steam_device *steam = hdev->driver_data; return hid_hw_raw_request(steam->hdev, reportnum, buf, count, report_type, reqtype); } static const struct hid_ll_driver steam_client_ll_driver = { .parse = steam_client_ll_parse, .start = steam_client_ll_start, .stop = steam_client_ll_stop, .open = steam_client_ll_open, .close = steam_client_ll_close, .raw_request = steam_client_ll_raw_request, }; static struct hid_device *steam_create_client_hid(struct hid_device *hdev) { struct hid_device *client_hdev; client_hdev = hid_allocate_device(); if (IS_ERR(client_hdev)) return client_hdev; client_hdev->ll_driver = &steam_client_ll_driver; client_hdev->dev.parent = hdev->dev.parent; client_hdev->bus = hdev->bus; client_hdev->vendor = hdev->vendor; client_hdev->product = hdev->product; client_hdev->version = hdev->version; client_hdev->type = hdev->type; client_hdev->country = hdev->country; strscpy(client_hdev->name, hdev->name, sizeof(client_hdev->name)); strscpy(client_hdev->phys, hdev->phys, sizeof(client_hdev->phys)); /* * Since we use the same device info than the real interface to * trick userspace, we will be calling steam_probe recursively. * We need to recognize the client interface somehow. */ client_hdev->group = HID_GROUP_STEAM; return client_hdev; } static int steam_probe(struct hid_device *hdev, const struct hid_device_id *id) { struct steam_device *steam; int ret; ret = hid_parse(hdev); if (ret) { hid_err(hdev, "%s:parse of hid interface failed\n", __func__); return ret; } /* * The virtual client_dev is only used for hidraw. * Also avoid the recursive probe. */ if (hdev->group == HID_GROUP_STEAM) return hid_hw_start(hdev, HID_CONNECT_HIDRAW); /* * The non-valve interfaces (mouse and keyboard emulation) are * connected without changes. */ if (!steam_is_valve_interface(hdev)) return hid_hw_start(hdev, HID_CONNECT_DEFAULT); steam = devm_kzalloc(&hdev->dev, sizeof(*steam), GFP_KERNEL); if (!steam) return -ENOMEM; steam->hdev = hdev; hid_set_drvdata(hdev, steam); spin_lock_init(&steam->lock); mutex_init(&steam->report_mutex); steam->quirks = id->driver_data; INIT_WORK(&steam->work_connect, steam_work_connect_cb); INIT_DELAYED_WORK(&steam->mode_switch, steam_mode_switch_cb); INIT_LIST_HEAD(&steam->list); INIT_WORK(&steam->rumble_work, steam_haptic_rumble_cb); steam->sensor_timestamp_us = 0; INIT_WORK(&steam->unregister_work, steam_work_unregister_cb); /* * With the real steam controller interface, do not connect hidraw. * Instead, create the client_hid and connect that. */ ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_HIDRAW); if (ret) goto err_cancel_work; ret = hid_hw_open(hdev); if (ret) { hid_err(hdev, "%s:hid_hw_open\n", __func__); goto err_hw_stop; } if (steam->quirks & STEAM_QUIRK_WIRELESS) { hid_info(hdev, "Steam wireless receiver connected"); /* If using a wireless adaptor ask for connection status */ steam->connected = false; steam_request_conn_status(steam); } else { /* A wired connection is always present */ steam->connected = true; ret = steam_register(steam); if (ret) { hid_err(hdev, "%s:steam_register failed with error %d\n", __func__, ret); goto err_hw_close; } } steam->client_hdev = steam_create_client_hid(hdev); if (IS_ERR(steam->client_hdev)) { ret = PTR_ERR(steam->client_hdev); goto err_steam_unregister; } steam->client_hdev->driver_data = steam; ret = hid_add_device(steam->client_hdev); if (ret) goto err_destroy; return 0; err_destroy: hid_destroy_device(steam->client_hdev); err_steam_unregister: if (steam->connected) steam_unregister(steam); err_hw_close: hid_hw_close(hdev); err_hw_stop: hid_hw_stop(hdev); err_cancel_work: cancel_work_sync(&steam->work_connect); cancel_delayed_work_sync(&steam->mode_switch); cancel_work_sync(&steam->rumble_work); cancel_work_sync(&steam->unregister_work); return ret; } static void steam_remove(struct hid_device *hdev) { struct steam_device *steam = hid_get_drvdata(hdev); if (!steam || hdev->group == HID_GROUP_STEAM) { hid_hw_stop(hdev); return; } hid_destroy_device(steam->client_hdev); cancel_delayed_work_sync(&steam->mode_switch); cancel_work_sync(&steam->work_connect); cancel_work_sync(&steam->rumble_work); cancel_work_sync(&steam->unregister_work); steam->client_hdev = NULL; steam->client_opened = 0; if (steam->quirks & STEAM_QUIRK_WIRELESS) { hid_info(hdev, "Steam wireless receiver disconnected"); } hid_hw_close(hdev); hid_hw_stop(hdev); steam_unregister(steam); } static void steam_do_connect_event(struct steam_device *steam, bool connected) { unsigned long flags; bool changed; spin_lock_irqsave(&steam->lock, flags); changed = steam->connected != connected; steam->connected = connected; spin_unlock_irqrestore(&steam->lock, flags); if (changed && schedule_work(&steam->work_connect) == 0) dbg_hid("%s: connected=%d event already queued\n", __func__, connected); } /* * Some input data in the protocol has the opposite sign. * Clamp the values to 32767..-32767 so that the range is * symmetrical and can be negated safely. */ static inline s16 steam_le16(u8 *data) { s16 x = (s16) le16_to_cpup((__le16 *)data); return x == -32768 ? -32767 : x; } /* * The size for this message payload is 60. * The known values are: * (* values are not sent through wireless) * (* accelerator/gyro is disabled by default) * Offset| Type | Mapped to |Meaning * -------+-------+-----------+-------------------------- * 4-7 | u32 | -- | sequence number * 8-10 | 24bit | see below | buttons * 11 | u8 | ABS_HAT2Y | left trigger * 12 | u8 | ABS_HAT2X | right trigger * 13-15 | -- | -- | always 0 * 16-17 | s16 | ABS_X/ABS_HAT0X | X value * 18-19 | s16 | ABS_Y/ABS_HAT0Y | Y value * 20-21 | s16 | ABS_RX | right-pad X value * 22-23 | s16 | ABS_RY | right-pad Y value * 24-25 | s16 | -- | * left trigger * 26-27 | s16 | -- | * right trigger * 28-29 | s16 | -- | * accelerometer X value * 30-31 | s16 | -- | * accelerometer Y value * 32-33 | s16 | -- | * accelerometer Z value * 34-35 | s16 | -- | gyro X value * 36-36 | s16 | -- | gyro Y value * 38-39 | s16 | -- | gyro Z value * 40-41 | s16 | -- | quaternion W value * 42-43 | s16 | -- | quaternion X value * 44-45 | s16 | -- | quaternion Y value * 46-47 | s16 | -- | quaternion Z value * 48-49 | -- | -- | always 0 * 50-51 | s16 | -- | * left trigger (uncalibrated) * 52-53 | s16 | -- | * right trigger (uncalibrated) * 54-55 | s16 | -- | * joystick X value (uncalibrated) * 56-57 | s16 | -- | * joystick Y value (uncalibrated) * 58-59 | s16 | -- | * left-pad X value * 60-61 | s16 | -- | * left-pad Y value * 62-63 | u16 | -- | * battery voltage * * The buttons are: * Bit | Mapped to | Description * ------+------------+-------------------------------- * 8.0 | BTN_TR2 | right trigger fully pressed * 8.1 | BTN_TL2 | left trigger fully pressed * 8.2 | BTN_TR | right shoulder * 8.3 | BTN_TL | left shoulder * 8.4 | BTN_Y | button Y * 8.5 | BTN_B | button B * 8.6 | BTN_X | button X * 8.7 | BTN_A | button A * 9.0 | BTN_DPAD_UP | left-pad up * 9.1 | BTN_DPAD_RIGHT | left-pad right * 9.2 | BTN_DPAD_LEFT | left-pad left * 9.3 | BTN_DPAD_DOWN | left-pad down * 9.4 | BTN_SELECT | menu left * 9.5 | BTN_MODE | steam logo * 9.6 | BTN_START | menu right * 9.7 | BTN_GEAR_DOWN | left back lever * 10.0 | BTN_GEAR_UP | right back lever * 10.1 | -- | left-pad clicked * 10.2 | BTN_THUMBR | right-pad clicked * 10.3 | BTN_THUMB | left-pad touched (but see explanation below) * 10.4 | BTN_THUMB2 | right-pad touched * 10.5 | -- | unknown * 10.6 | BTN_THUMBL | joystick clicked * 10.7 | -- | lpad_and_joy */ static void steam_do_input_event(struct steam_device *steam, struct input_dev *input, u8 *data) { /* 24 bits of buttons */ u8 b8, b9, b10; s16 x, y; bool lpad_touched, lpad_and_joy; b8 = data[8]; b9 = data[9]; b10 = data[10]; input_report_abs(input, ABS_HAT2Y, data[11]); input_report_abs(input, ABS_HAT2X, data[12]); /* * These two bits tells how to interpret the values X and Y. * lpad_and_joy tells that the joystick and the lpad are used at the * same time. * lpad_touched tells whether X/Y are to be read as lpad coord or * joystick values. * (lpad_touched || lpad_and_joy) tells if the lpad is really touched. */ lpad_touched = b10 & BIT(3); lpad_and_joy = b10 & BIT(7); x = steam_le16(data + 16); y = -steam_le16(data + 18); input_report_abs(input, lpad_touched ? ABS_HAT0X : ABS_X, x); input_report_abs(input, lpad_touched ? ABS_HAT0Y : ABS_Y, y); /* Check if joystick is centered */ if (lpad_touched && !lpad_and_joy) { input_report_abs(input, ABS_X, 0); input_report_abs(input, ABS_Y, 0); } /* Check if lpad is untouched */ if (!(lpad_touched || lpad_and_joy)) { input_report_abs(input, ABS_HAT0X, 0); input_report_abs(input, ABS_HAT0Y, 0); } input_report_abs(input, ABS_RX, steam_le16(data + 20)); input_report_abs(input, ABS_RY, -steam_le16(data + 22)); input_event(input, EV_KEY, BTN_TR2, !!(b8 & BIT(0))); input_event(input, EV_KEY, BTN_TL2, !!(b8 & BIT(1))); input_event(input, EV_KEY, BTN_TR, !!(b8 & BIT(2))); input_event(input, EV_KEY, BTN_TL, !!(b8 & BIT(3))); input_event(input, EV_KEY, BTN_Y, !!(b8 & BIT(4))); input_event(input, EV_KEY, BTN_B, !!(b8 & BIT(5))); input_event(input, EV_KEY, BTN_X, !!(b8 & BIT(6))); input_event(input, EV_KEY, BTN_A, !!(b8 & BIT(7))); input_event(input, EV_KEY, BTN_SELECT, !!(b9 & BIT(4))); input_event(input, EV_KEY, BTN_MODE, !!(b9 & BIT(5))); input_event(input, EV_KEY, BTN_START, !!(b9 & BIT(6))); input_event(input, EV_KEY, BTN_GEAR_DOWN, !!(b9 & BIT(7))); input_event(input, EV_KEY, BTN_GEAR_UP, !!(b10 & BIT(0))); input_event(input, EV_KEY, BTN_THUMBR, !!(b10 & BIT(2))); input_event(input, EV_KEY, BTN_THUMBL, !!(b10 & BIT(6))); input_event(input, EV_KEY, BTN_THUMB, lpad_touched || lpad_and_joy); input_event(input, EV_KEY, BTN_THUMB2, !!(b10 & BIT(4))); input_event(input, EV_KEY, BTN_DPAD_UP, !!(b9 & BIT(0))); input_event(input, EV_KEY, BTN_DPAD_RIGHT, !!(b9 & BIT(1))); input_event(input, EV_KEY, BTN_DPAD_LEFT, !!(b9 & BIT(2))); input_event(input, EV_KEY, BTN_DPAD_DOWN, !!(b9 & BIT(3))); input_sync(input); } /* * The size for this message payload is 56. * The known values are: * Offset| Type | Mapped to |Meaning * -------+-------+-----------+-------------------------- * 4-7 | u32 | -- | sequence number * 8-15 | u64 | see below | buttons * 16-17 | s16 | ABS_HAT0X | left-pad X value * 18-19 | s16 | ABS_HAT0Y | left-pad Y value * 20-21 | s16 | ABS_HAT1X | right-pad X value * 22-23 | s16 | ABS_HAT1Y | right-pad Y value * 24-25 | s16 | IMU ABS_X | accelerometer X value * 26-27 | s16 | IMU ABS_Z | accelerometer Y value * 28-29 | s16 | IMU ABS_Y | accelerometer Z value * 30-31 | s16 | IMU ABS_RX | gyro X value * 32-33 | s16 | IMU ABS_RZ | gyro Y value * 34-35 | s16 | IMU ABS_RY | gyro Z value * 36-37 | s16 | -- | quaternion W value * 38-39 | s16 | -- | quaternion X value * 40-41 | s16 | -- | quaternion Y value * 42-43 | s16 | -- | quaternion Z value * 44-45 | u16 | ABS_HAT2Y | left trigger (uncalibrated) * 46-47 | u16 | ABS_HAT2X | right trigger (uncalibrated) * 48-49 | s16 | ABS_X | left joystick X * 50-51 | s16 | ABS_Y | left joystick Y * 52-53 | s16 | ABS_RX | right joystick X * 54-55 | s16 | ABS_RY | right joystick Y * 56-57 | u16 | -- | left pad pressure * 58-59 | u16 | -- | right pad pressure * * The buttons are: * Bit | Mapped to | Description * ------+------------+-------------------------------- * 8.0 | BTN_TR2 | right trigger fully pressed * 8.1 | BTN_TL2 | left trigger fully pressed * 8.2 | BTN_TR | right shoulder * 8.3 | BTN_TL | left shoulder * 8.4 | BTN_Y | button Y * 8.5 | BTN_B | button B * 8.6 | BTN_X | button X * 8.7 | BTN_A | button A * 9.0 | BTN_DPAD_UP | left-pad up * 9.1 | BTN_DPAD_RIGHT | left-pad right * 9.2 | BTN_DPAD_LEFT | left-pad left * 9.3 | BTN_DPAD_DOWN | left-pad down * 9.4 | BTN_SELECT | menu left * 9.5 | BTN_MODE | steam logo * 9.6 | BTN_START | menu right * 9.7 | BTN_TRIGGER_HAPPY3 | left bottom grip button * 10.0 | BTN_TRIGGER_HAPPY4 | right bottom grip button * 10.1 | BTN_THUMB | left pad pressed * 10.2 | BTN_THUMB2 | right pad pressed * 10.3 | -- | left pad touched * 10.4 | -- | right pad touched * 10.5 | -- | unknown * 10.6 | BTN_THUMBL | left joystick clicked * 10.7 | -- | unknown * 11.0 | -- | unknown * 11.1 | -- | unknown * 11.2 | BTN_THUMBR | right joystick clicked * 11.3 | -- | unknown * 11.4 | -- | unknown * 11.5 | -- | unknown * 11.6 | -- | unknown * 11.7 | -- | unknown * 12.0 | -- | unknown * 12.1 | -- | unknown * 12.2 | -- | unknown * 12.3 | -- | unknown * 12.4 | -- | unknown * 12.5 | -- | unknown * 12.6 | -- | unknown * 12.7 | -- | unknown * 13.0 | -- | unknown * 13.1 | BTN_TRIGGER_HAPPY1 | left top grip button * 13.2 | BTN_TRIGGER_HAPPY2 | right top grip button * 13.3 | -- | unknown * 13.4 | -- | unknown * 13.5 | -- | unknown * 13.6 | -- | left joystick touched * 13.7 | -- | right joystick touched * 14.0 | -- | unknown * 14.1 | -- | unknown * 14.2 | BTN_BASE | quick access button * 14.3 | -- | unknown * 14.4 | -- | unknown * 14.5 | -- | unknown * 14.6 | -- | unknown * 14.7 | -- | unknown * 15.0 | -- | unknown * 15.1 | -- | unknown * 15.2 | -- | unknown * 15.3 | -- | unknown * 15.4 | -- | unknown * 15.5 | -- | unknown * 15.6 | -- | unknown * 15.7 | -- | unknown */ static void steam_do_deck_input_event(struct steam_device *steam, struct input_dev *input, u8 *data) { u8 b8, b9, b10, b11, b13, b14; bool lpad_touched, rpad_touched; b8 = data[8]; b9 = data[9]; b10 = data[10]; b11 = data[11]; b13 = data[13]; b14 = data[14]; if (!(b9 & BIT(6)) && steam->did_mode_switch) { steam->did_mode_switch = false; cancel_delayed_work(&steam->mode_switch); } else if (!steam->client_opened && (b9 & BIT(6)) && !steam->did_mode_switch) { steam->did_mode_switch = true; schedule_delayed_work(&steam->mode_switch, 45 * HZ / 100); } if (!steam->gamepad_mode && lizard_mode) return; lpad_touched = b10 & BIT(3); rpad_touched = b10 & BIT(4); if (lpad_touched) { input_report_abs(input, ABS_HAT0X, steam_le16(data + 16)); input_report_abs(input, ABS_HAT0Y, steam_le16(data + 18)); } else { input_report_abs(input, ABS_HAT0X, 0); input_report_abs(input, ABS_HAT0Y, 0); } if (rpad_touched) { input_report_abs(input, ABS_HAT1X, steam_le16(data + 20)); input_report_abs(input, ABS_HAT1Y, steam_le16(data + 22)); } else { input_report_abs(input, ABS_HAT1X, 0); input_report_abs(input, ABS_HAT1Y, 0); } input_report_abs(input, ABS_X, steam_le16(data + 48)); input_report_abs(input, ABS_Y, -steam_le16(data + 50)); input_report_abs(input, ABS_RX, steam_le16(data + 52)); input_report_abs(input, ABS_RY, -steam_le16(data + 54)); input_report_abs(input, ABS_HAT2Y, steam_le16(data + 44)); input_report_abs(input, ABS_HAT2X, steam_le16(data + 46)); input_event(input, EV_KEY, BTN_TR2, !!(b8 & BIT(0))); input_event(input, EV_KEY, BTN_TL2, !!(b8 & BIT(1))); input_event(input, EV_KEY, BTN_TR, !!(b8 & BIT(2))); input_event(input, EV_KEY, BTN_TL, !!(b8 & BIT(3))); input_event(input, EV_KEY, BTN_Y, !!(b8 & BIT(4))); input_event(input, EV_KEY, BTN_B, !!(b8 & BIT(5))); input_event(input, EV_KEY, BTN_X, !!(b8 & BIT(6))); input_event(input, EV_KEY, BTN_A, !!(b8 & BIT(7))); input_event(input, EV_KEY, BTN_SELECT, !!(b9 & BIT(4))); input_event(input, EV_KEY, BTN_MODE, !!(b9 & BIT(5))); input_event(input, EV_KEY, BTN_START, !!(b9 & BIT(6))); input_event(input, EV_KEY, BTN_TRIGGER_HAPPY3, !!(b9 & BIT(7))); input_event(input, EV_KEY, BTN_TRIGGER_HAPPY4, !!(b10 & BIT(0))); input_event(input, EV_KEY, BTN_THUMBL, !!(b10 & BIT(6))); input_event(input, EV_KEY, BTN_THUMBR, !!(b11 & BIT(2))); input_event(input, EV_KEY, BTN_DPAD_UP, !!(b9 & BIT(0))); input_event(input, EV_KEY, BTN_DPAD_RIGHT, !!(b9 & BIT(1))); input_event(input, EV_KEY, BTN_DPAD_LEFT, !!(b9 & BIT(2))); input_event(input, EV_KEY, BTN_DPAD_DOWN, !!(b9 & BIT(3))); input_event(input, EV_KEY, BTN_THUMB, !!(b10 & BIT(1))); input_event(input, EV_KEY, BTN_THUMB2, !!(b10 & BIT(2))); input_event(input, EV_KEY, BTN_TRIGGER_HAPPY1, !!(b13 & BIT(1))); input_event(input, EV_KEY, BTN_TRIGGER_HAPPY2, !!(b13 & BIT(2))); input_event(input, EV_KEY, BTN_BASE, !!(b14 & BIT(2))); input_sync(input); } static void steam_do_deck_sensors_event(struct steam_device *steam, struct input_dev *sensors, u8 *data) { /* * The deck input report is received every 4 ms on average, * with a jitter of +/- 4 ms even though the USB descriptor claims * that it uses 1 kHz. * Since the HID report does not include a sensor timestamp, * use a fixed increment here. */ steam->sensor_timestamp_us += 4000; if (!steam->gamepad_mode && lizard_mode) return; input_event(sensors, EV_MSC, MSC_TIMESTAMP, steam->sensor_timestamp_us); input_report_abs(sensors, ABS_X, steam_le16(data + 24)); input_report_abs(sensors, ABS_Z, -steam_le16(data + 26)); input_report_abs(sensors, ABS_Y, steam_le16(data + 28)); input_report_abs(sensors, ABS_RX, steam_le16(data + 30)); input_report_abs(sensors, ABS_RZ, -steam_le16(data + 32)); input_report_abs(sensors, ABS_RY, steam_le16(data + 34)); input_sync(sensors); } /* * The size for this message payload is 11. * The known values are: * Offset| Type | Meaning * -------+-------+--------------------------- * 4-7 | u32 | sequence number * 8-11 | -- | always 0 * 12-13 | u16 | voltage (mV) * 14 | u8 | battery percent */ static void steam_do_battery_event(struct steam_device *steam, struct power_supply *battery, u8 *data) { unsigned long flags; s16 volts = steam_le16(data + 12); u8 batt = data[14]; /* Creating the battery may have failed */ rcu_read_lock(); battery = rcu_dereference(steam->battery); if (likely(battery)) { spin_lock_irqsave(&steam->lock, flags); steam->voltage = volts; steam->battery_charge = batt; spin_unlock_irqrestore(&steam->lock, flags); power_supply_changed(battery); } rcu_read_unlock(); } static int steam_raw_event(struct hid_device *hdev, struct hid_report *report, u8 *data, int size) { struct steam_device *steam = hid_get_drvdata(hdev); struct input_dev *input; struct input_dev *sensors; struct power_supply *battery; if (!steam) return 0; if (steam->client_opened) hid_input_report(steam->client_hdev, HID_FEATURE_REPORT, data, size, 0); /* * All messages are size=64, all values little-endian. * The format is: * Offset| Meaning * -------+-------------------------------------------- * 0-1 | always 0x01, 0x00, maybe protocol version? * 2 | type of message * 3 | length of the real payload (not checked) * 4-n | payload data, depends on the type * * There are these known types of message: * 0x01: input data (60 bytes) * 0x03: wireless connect/disconnect (1 byte) * 0x04: battery status (11 bytes) * 0x09: Steam Deck input data (56 bytes) */ if (size != 64 || data[0] != 1 || data[1] != 0) return 0; switch (data[2]) { case ID_CONTROLLER_STATE: if (steam->client_opened) return 0; rcu_read_lock(); input = rcu_dereference(steam->input); if (likely(input)) steam_do_input_event(steam, input, data); rcu_read_unlock(); break; case ID_CONTROLLER_DECK_STATE: if (steam->client_opened) return 0; rcu_read_lock(); input = rcu_dereference(steam->input); if (likely(input)) steam_do_deck_input_event(steam, input, data); sensors = rcu_dereference(steam->sensors); if (likely(sensors)) steam_do_deck_sensors_event(steam, sensors, data); rcu_read_unlock(); break; case ID_CONTROLLER_WIRELESS: /* * The payload of this event is a single byte: * 0x01: disconnected. * 0x02: connected. */ switch (data[4]) { case 0x01: steam_do_connect_event(steam, false); break; case 0x02: steam_do_connect_event(steam, true); break; } break; case ID_CONTROLLER_STATUS: if (steam->quirks & STEAM_QUIRK_WIRELESS) { rcu_read_lock(); battery = rcu_dereference(steam->battery); if (likely(battery)) { steam_do_battery_event(steam, battery, data); } else { dbg_hid( "%s: battery data without connect event\n", __func__); steam_do_connect_event(steam, true); } rcu_read_unlock(); } break; } return 0; } static int steam_param_set_lizard_mode(const char *val, const struct kernel_param *kp) { struct steam_device *steam; int ret; ret = param_set_bool(val, kp); if (ret) return ret; mutex_lock(&steam_devices_lock); list_for_each_entry(steam, &steam_devices, list) { if (!steam->client_opened) steam_set_lizard_mode(steam, lizard_mode); } mutex_unlock(&steam_devices_lock); return 0; } static const struct kernel_param_ops steam_lizard_mode_ops = { .set = steam_param_set_lizard_mode, .get = param_get_bool, }; module_param_cb(lizard_mode, &steam_lizard_mode_ops, &lizard_mode, 0644); MODULE_PARM_DESC(lizard_mode, "Enable mouse and keyboard emulation (lizard mode) when the gamepad is not in use"); static const struct hid_device_id steam_controllers[] = { { /* Wired Steam Controller */ HID_USB_DEVICE(USB_VENDOR_ID_VALVE, USB_DEVICE_ID_STEAM_CONTROLLER) }, { /* Wireless Steam Controller */ HID_USB_DEVICE(USB_VENDOR_ID_VALVE, USB_DEVICE_ID_STEAM_CONTROLLER_WIRELESS), .driver_data = STEAM_QUIRK_WIRELESS }, { /* Steam Deck */ HID_USB_DEVICE(USB_VENDOR_ID_VALVE, USB_DEVICE_ID_STEAM_DECK), .driver_data = STEAM_QUIRK_DECK }, {} }; MODULE_DEVICE_TABLE(hid, steam_controllers); static struct hid_driver steam_controller_driver = { .name = "hid-steam", .id_table = steam_controllers, .probe = steam_probe, .remove = steam_remove, .raw_event = steam_raw_event, }; module_hid_driver(steam_controller_driver); |
| 14 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 | // SPDX-License-Identifier: GPL-2.0 /* * This file contains the base functions to manage periodic tick * related events. * * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner */ #include <linux/compiler.h> #include <linux/cpu.h> #include <linux/err.h> #include <linux/hrtimer.h> #include <linux/interrupt.h> #include <linux/nmi.h> #include <linux/percpu.h> #include <linux/profile.h> #include <linux/sched.h> #include <linux/module.h> #include <trace/events/power.h> #include <asm/irq_regs.h> #include "tick-internal.h" /* * Tick devices */ DEFINE_PER_CPU(struct tick_device, tick_cpu_device); /* * Tick next event: keeps track of the tick time. It's updated by the * CPU which handles the tick and protected by jiffies_lock. There is * no requirement to write hold the jiffies seqcount for it. */ ktime_t tick_next_period; /* * tick_do_timer_cpu is a timer core internal variable which holds the CPU NR * which is responsible for calling do_timer(), i.e. the timekeeping stuff. This * variable has two functions: * * 1) Prevent a thundering herd issue of a gazillion of CPUs trying to grab the * timekeeping lock all at once. Only the CPU which is assigned to do the * update is handling it. * * 2) Hand off the duty in the NOHZ idle case by setting the value to * TICK_DO_TIMER_NONE, i.e. a non existing CPU. So the next cpu which looks * at it will take over and keep the time keeping alive. The handover * procedure also covers cpu hotplug. */ int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT; #ifdef CONFIG_NO_HZ_FULL /* * tick_do_timer_boot_cpu indicates the boot CPU temporarily owns * tick_do_timer_cpu and it should be taken over by an eligible secondary * when one comes online. */ static int tick_do_timer_boot_cpu __read_mostly = -1; #endif /* * Debugging: see timer_list.c */ struct tick_device *tick_get_device(int cpu) { return &per_cpu(tick_cpu_device, cpu); } /** * tick_is_oneshot_available - check for a oneshot capable event device */ int tick_is_oneshot_available(void) { struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT)) return 0; if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) return 1; return tick_broadcast_oneshot_available(); } /* * Periodic tick */ static void tick_periodic(int cpu) { if (READ_ONCE(tick_do_timer_cpu) == cpu) { raw_spin_lock(&jiffies_lock); write_seqcount_begin(&jiffies_seq); /* Keep track of the next tick event */ tick_next_period = ktime_add_ns(tick_next_period, TICK_NSEC); do_timer(1); write_seqcount_end(&jiffies_seq); raw_spin_unlock(&jiffies_lock); update_wall_time(); } update_process_times(user_mode(get_irq_regs())); profile_tick(CPU_PROFILING); } /* * Event handler for periodic ticks */ void tick_handle_periodic(struct clock_event_device *dev) { int cpu = smp_processor_id(); ktime_t next = dev->next_event; tick_periodic(cpu); /* * The cpu might have transitioned to HIGHRES or NOHZ mode via * update_process_times() -> run_local_timers() -> * hrtimer_run_queues(). */ if (IS_ENABLED(CONFIG_TICK_ONESHOT) && dev->event_handler != tick_handle_periodic) return; if (!clockevent_state_oneshot(dev)) return; for (;;) { /* * Setup the next period for devices, which do not have * periodic mode: */ next = ktime_add_ns(next, TICK_NSEC); if (!clockevents_program_event(dev, next, false)) return; /* * Have to be careful here. If we're in oneshot mode, * before we call tick_periodic() in a loop, we need * to be sure we're using a real hardware clocksource. * Otherwise we could get trapped in an infinite * loop, as the tick_periodic() increments jiffies, * which then will increment time, possibly causing * the loop to trigger again and again. */ if (timekeeping_valid_for_hres()) tick_periodic(cpu); } } /* * Setup the device for a periodic tick */ void tick_setup_periodic(struct clock_event_device *dev, int broadcast) { tick_set_periodic_handler(dev, broadcast); /* Broadcast setup ? */ if (!tick_device_is_functional(dev)) return; if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) && !tick_broadcast_oneshot_active()) { clockevents_switch_state(dev, CLOCK_EVT_STATE_PERIODIC); } else { unsigned int seq; ktime_t next; do { seq = read_seqcount_begin(&jiffies_seq); next = tick_next_period; } while (read_seqcount_retry(&jiffies_seq, seq)); clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); for (;;) { if (!clockevents_program_event(dev, next, false)) return; next = ktime_add_ns(next, TICK_NSEC); } } } /* * Setup the tick device */ static void tick_setup_device(struct tick_device *td, struct clock_event_device *newdev, int cpu, const struct cpumask *cpumask) { void (*handler)(struct clock_event_device *) = NULL; ktime_t next_event = 0; /* * First device setup ? */ if (!td->evtdev) { /* * If no cpu took the do_timer update, assign it to * this cpu: */ if (READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_BOOT) { WRITE_ONCE(tick_do_timer_cpu, cpu); tick_next_period = ktime_get(); #ifdef CONFIG_NO_HZ_FULL /* * The boot CPU may be nohz_full, in which case the * first housekeeping secondary will take do_timer() * from it. */ if (tick_nohz_full_cpu(cpu)) tick_do_timer_boot_cpu = cpu; } else if (tick_do_timer_boot_cpu != -1 && !tick_nohz_full_cpu(cpu)) { tick_do_timer_boot_cpu = -1; /* * The boot CPU will stay in periodic (NOHZ disabled) * mode until clocksource_done_booting() called after * smp_init() selects a high resolution clocksource and * timekeeping_notify() kicks the NOHZ stuff alive. * * So this WRITE_ONCE can only race with the READ_ONCE * check in tick_periodic() but this race is harmless. */ WRITE_ONCE(tick_do_timer_cpu, cpu); #endif } /* * Startup in periodic mode first. */ td->mode = TICKDEV_MODE_PERIODIC; } else { handler = td->evtdev->event_handler; next_event = td->evtdev->next_event; td->evtdev->event_handler = clockevents_handle_noop; } td->evtdev = newdev; /* * When the device is not per cpu, pin the interrupt to the * current cpu: */ if (!cpumask_equal(newdev->cpumask, cpumask)) irq_set_affinity(newdev->irq, cpumask); /* * When global broadcasting is active, check if the current * device is registered as a placeholder for broadcast mode. * This allows us to handle this x86 misfeature in a generic * way. This function also returns !=0 when we keep the * current active broadcast state for this CPU. */ if (tick_device_uses_broadcast(newdev, cpu)) return; if (td->mode == TICKDEV_MODE_PERIODIC) tick_setup_periodic(newdev, 0); else tick_setup_oneshot(newdev, handler, next_event); } void tick_install_replacement(struct clock_event_device *newdev) { struct tick_device *td = this_cpu_ptr(&tick_cpu_device); int cpu = smp_processor_id(); clockevents_exchange_device(td->evtdev, newdev); tick_setup_device(td, newdev, cpu, cpumask_of(cpu)); if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) tick_oneshot_notify(); } static bool tick_check_percpu(struct clock_event_device *curdev, struct clock_event_device *newdev, int cpu) { if (!cpumask_test_cpu(cpu, newdev->cpumask)) return false; if (cpumask_equal(newdev->cpumask, cpumask_of(cpu))) return true; /* Check if irq affinity can be set */ if (newdev->irq >= 0 && !irq_can_set_affinity(newdev->irq)) return false; /* Prefer an existing cpu local device */ if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu))) return false; return true; } static bool tick_check_preferred(struct clock_event_device *curdev, struct clock_event_device *newdev) { /* Prefer oneshot capable device */ if (!(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) { if (curdev && (curdev->features & CLOCK_EVT_FEAT_ONESHOT)) return false; if (tick_oneshot_mode_active()) return false; } /* * Use the higher rated one, but prefer a CPU local device with a lower * rating than a non-CPU local device */ return !curdev || newdev->rating > curdev->rating || !cpumask_equal(curdev->cpumask, newdev->cpumask); } /* * Check whether the new device is a better fit than curdev. curdev * can be NULL ! */ bool tick_check_replacement(struct clock_event_device *curdev, struct clock_event_device *newdev) { if (!tick_check_percpu(curdev, newdev, smp_processor_id())) return false; return tick_check_preferred(curdev, newdev); } /* * Check, if the new registered device should be used. Called with * clockevents_lock held and interrupts disabled. */ void tick_check_new_device(struct clock_event_device *newdev) { struct clock_event_device *curdev; struct tick_device *td; int cpu; cpu = smp_processor_id(); td = &per_cpu(tick_cpu_device, cpu); curdev = td->evtdev; if (!tick_check_replacement(curdev, newdev)) goto out_bc; if (!try_module_get(newdev->owner)) return; /* * Replace the eventually existing device by the new * device. If the current device is the broadcast device, do * not give it back to the clockevents layer ! */ if (tick_is_broadcast_device(curdev)) { clockevents_shutdown(curdev); curdev = NULL; } clockevents_exchange_device(curdev, newdev); tick_setup_device(td, newdev, cpu, cpumask_of(cpu)); if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) tick_oneshot_notify(); return; out_bc: /* * Can the new device be used as a broadcast device ? */ tick_install_broadcast_device(newdev, cpu); } /** * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode * @state: The target state (enter/exit) * * The system enters/leaves a state, where affected devices might stop * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups. * * Called with interrupts disabled, so clockevents_lock is not * required here because the local clock event device cannot go away * under us. */ int tick_broadcast_oneshot_control(enum tick_broadcast_state state) { struct tick_device *td = this_cpu_ptr(&tick_cpu_device); if (!(td->evtdev->features & CLOCK_EVT_FEAT_C3STOP)) return 0; return __tick_broadcast_oneshot_control(state); } EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control); #ifdef CONFIG_HOTPLUG_CPU void tick_assert_timekeeping_handover(void) { WARN_ON_ONCE(tick_do_timer_cpu == smp_processor_id()); } /* * Stop the tick and transfer the timekeeping job away from a dying cpu. */ int tick_cpu_dying(unsigned int dying_cpu) { /* * If the current CPU is the timekeeper, it's the only one that can * safely hand over its duty. Also all online CPUs are in stop * machine, guaranteed not to be idle, therefore there is no * concurrency and it's safe to pick any online successor. */ if (tick_do_timer_cpu == dying_cpu) tick_do_timer_cpu = cpumask_first(cpu_online_mask); /* Make sure the CPU won't try to retake the timekeeping duty */ tick_sched_timer_dying(dying_cpu); /* Remove CPU from timer broadcasting */ tick_offline_cpu(dying_cpu); return 0; } /* * Shutdown an event device on a given cpu: * * This is called on a life CPU, when a CPU is dead. So we cannot * access the hardware device itself. * We just set the mode and remove it from the lists. */ void tick_shutdown(unsigned int cpu) { struct tick_device *td = &per_cpu(tick_cpu_device, cpu); struct clock_event_device *dev = td->evtdev; td->mode = TICKDEV_MODE_PERIODIC; if (dev) { /* * Prevent that the clock events layer tries to call * the set mode function! */ clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); clockevents_exchange_device(dev, NULL); dev->event_handler = clockevents_handle_noop; td->evtdev = NULL; } } #endif /** * tick_suspend_local - Suspend the local tick device * * Called from the local cpu for freeze with interrupts disabled. * * No locks required. Nothing can change the per cpu device. */ void tick_suspend_local(void) { struct tick_device *td = this_cpu_ptr(&tick_cpu_device); clockevents_shutdown(td->evtdev); } /** * tick_resume_local - Resume the local tick device * * Called from the local CPU for unfreeze or XEN resume magic. * * No locks required. Nothing can change the per cpu device. */ void tick_resume_local(void) { struct tick_device *td = this_cpu_ptr(&tick_cpu_device); bool broadcast = tick_resume_check_broadcast(); clockevents_tick_resume(td->evtdev); if (!broadcast) { if (td->mode == TICKDEV_MODE_PERIODIC) tick_setup_periodic(td->evtdev, 0); else tick_resume_oneshot(); } /* * Ensure that hrtimers are up to date and the clockevents device * is reprogrammed correctly when high resolution timers are * enabled. */ hrtimers_resume_local(); } /** * tick_suspend - Suspend the tick and the broadcast device * * Called from syscore_suspend() via timekeeping_suspend with only one * CPU online and interrupts disabled or from tick_unfreeze() under * tick_freeze_lock. * * No locks required. Nothing can change the per cpu device. */ void tick_suspend(void) { tick_suspend_local(); tick_suspend_broadcast(); } /** * tick_resume - Resume the tick and the broadcast device * * Called from syscore_resume() via timekeeping_resume with only one * CPU online and interrupts disabled. * * No locks required. Nothing can change the per cpu device. */ void tick_resume(void) { tick_resume_broadcast(); tick_resume_local(); } #ifdef CONFIG_SUSPEND static DEFINE_RAW_SPINLOCK(tick_freeze_lock); static DEFINE_WAIT_OVERRIDE_MAP(tick_freeze_map, LD_WAIT_SLEEP); static unsigned int tick_freeze_depth; /** * tick_freeze - Suspend the local tick and (possibly) timekeeping. * * Check if this is the last online CPU executing the function and if so, * suspend timekeeping. Otherwise suspend the local tick. * * Call with interrupts disabled. Must be balanced with %tick_unfreeze(). * Interrupts must not be enabled before the subsequent %tick_unfreeze(). */ void tick_freeze(void) { raw_spin_lock(&tick_freeze_lock); tick_freeze_depth++; if (tick_freeze_depth == num_online_cpus()) { trace_suspend_resume(TPS("timekeeping_freeze"), smp_processor_id(), true); /* * All other CPUs have their interrupts disabled and are * suspended to idle. Other tasks have been frozen so there * is no scheduling happening. This means that there is no * concurrency in the system at this point. Therefore it is * okay to acquire a sleeping lock on PREEMPT_RT, such as a * spinlock, because the lock cannot be held by other CPUs * or threads and acquiring it cannot block. * * Inform lockdep about the situation. */ lock_map_acquire_try(&tick_freeze_map); system_state = SYSTEM_SUSPEND; sched_clock_suspend(); timekeeping_suspend(); lock_map_release(&tick_freeze_map); } else { tick_suspend_local(); } raw_spin_unlock(&tick_freeze_lock); } /** * tick_unfreeze - Resume the local tick and (possibly) timekeeping. * * Check if this is the first CPU executing the function and if so, resume * timekeeping. Otherwise resume the local tick. * * Call with interrupts disabled. Must be balanced with %tick_freeze(). * Interrupts must not be enabled after the preceding %tick_freeze(). */ void tick_unfreeze(void) { raw_spin_lock(&tick_freeze_lock); if (tick_freeze_depth == num_online_cpus()) { /* * Similar to tick_freeze(). On resumption the first CPU may * acquire uncontended sleeping locks while other CPUs block on * tick_freeze_lock. */ lock_map_acquire_try(&tick_freeze_map); timekeeping_resume(); sched_clock_resume(); lock_map_release(&tick_freeze_map); system_state = SYSTEM_RUNNING; trace_suspend_resume(TPS("timekeeping_freeze"), smp_processor_id(), false); } else { touch_softlockup_watchdog(); tick_resume_local(); } tick_freeze_depth--; raw_spin_unlock(&tick_freeze_lock); } #endif /* CONFIG_SUSPEND */ /** * tick_init - initialize the tick control */ void __init tick_init(void) { tick_broadcast_init(); tick_nohz_init(); } |
| 30 3 10 10 3 1 2 1 2 9 7 1 9 9 1 1 2 1 1 2 1 1 5 4 4 4 2 2 31 193 193 182 181 13 13 13 7 7 7 1 7 13 190 217 218 218 218 218 187 189 25 189 176 177 13 31 30 31 31 31 20 20 19 20 20 6 19 31 218 217 7 3 3 2 2 18 18 16 11 1 10 10 1 5 3 3 3 1 2 12 12 18 5 4 4 18 18 1 17 6 6 13 10 1 1 5 9 4 5 4 4 5 3 10 3 12 36 17 11 1 16 3 13 1 4 12 1 11 3 1 10 17 3 3 2 36 27 26 18 9 6 5 1 5 4 22 22 22 21 8 2 2 1 1 13 1 12 1 11 2 9 11 9 2 7 1 7 6 22 27 21 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 | // SPDX-License-Identifier: GPL-2.0-only #include <linux/ethtool_netlink.h> #include <linux/bitmap.h> #include "netlink.h" #include "bitset.h" /* Some bitmaps are internally represented as an array of unsigned long, some * as an array of u32 (some even as single u32 for now). To avoid the need of * wrappers on caller side, we provide two set of functions: those with "32" * suffix in their names expect u32 based bitmaps, those without it expect * unsigned long bitmaps. */ static u32 ethnl_lower_bits(unsigned int n) { return ~(u32)0 >> (32 - n % 32); } static u32 ethnl_upper_bits(unsigned int n) { return ~(u32)0 << (n % 32); } /** * ethnl_bitmap32_clear() - Clear u32 based bitmap * @dst: bitmap to clear * @start: beginning of the interval * @end: end of the interval * @mod: set if bitmap was modified * * Clear @nbits bits of a bitmap with indices @start <= i < @end */ static void ethnl_bitmap32_clear(u32 *dst, unsigned int start, unsigned int end, bool *mod) { unsigned int start_word = start / 32; unsigned int end_word = end / 32; unsigned int i; u32 mask; if (end <= start) return; if (start % 32) { mask = ethnl_upper_bits(start); if (end_word == start_word) { mask &= ethnl_lower_bits(end); if (dst[start_word] & mask) { dst[start_word] &= ~mask; *mod = true; } return; } if (dst[start_word] & mask) { dst[start_word] &= ~mask; *mod = true; } start_word++; } for (i = start_word; i < end_word; i++) { if (dst[i]) { dst[i] = 0; *mod = true; } } if (end % 32) { mask = ethnl_lower_bits(end); if (dst[end_word] & mask) { dst[end_word] &= ~mask; *mod = true; } } } /** * ethnl_bitmap32_not_zero() - Check if any bit is set in an interval * @map: bitmap to test * @start: beginning of the interval * @end: end of the interval * * Return: true if there is non-zero bit with index @start <= i < @end, * false if the whole interval is zero */ static bool ethnl_bitmap32_not_zero(const u32 *map, unsigned int start, unsigned int end) { unsigned int start_word = start / 32; unsigned int end_word = end / 32; u32 mask; if (end <= start) return true; if (start % 32) { mask = ethnl_upper_bits(start); if (end_word == start_word) { mask &= ethnl_lower_bits(end); return map[start_word] & mask; } if (map[start_word] & mask) return true; start_word++; } if (!memchr_inv(map + start_word, '\0', (end_word - start_word) * sizeof(u32))) return true; if (end % 32 == 0) return true; return map[end_word] & ethnl_lower_bits(end); } /** * ethnl_bitmap32_update() - Modify u32 based bitmap according to value/mask * pair * @dst: bitmap to update * @nbits: bit size of the bitmap * @value: values to set * @mask: mask of bits to set * @mod: set to true if bitmap is modified, preserve if not * * Set bits in @dst bitmap which are set in @mask to values from @value, leave * the rest untouched. If destination bitmap was modified, set @mod to true, * leave as it is if not. */ static void ethnl_bitmap32_update(u32 *dst, unsigned int nbits, const u32 *value, const u32 *mask, bool *mod) { while (nbits > 0) { u32 real_mask = mask ? *mask : ~(u32)0; u32 new_value; if (nbits < 32) real_mask &= ethnl_lower_bits(nbits); new_value = (*dst & ~real_mask) | (*value & real_mask); if (new_value != *dst) { *dst = new_value; *mod = true; } if (nbits <= 32) break; dst++; nbits -= 32; value++; if (mask) mask++; } } static bool ethnl_bitmap32_test_bit(const u32 *map, unsigned int index) { return map[index / 32] & (1U << (index % 32)); } /** * ethnl_bitset32_size() - Calculate size of bitset nested attribute * @val: value bitmap (u32 based) * @mask: mask bitmap (u32 based, optional) * @nbits: bit length of the bitset * @names: array of bit names (optional) * @compact: assume compact format for output * * Estimate length of netlink attribute composed by a later call to * ethnl_put_bitset32() call with the same arguments. * * Return: negative error code or attribute length estimate */ int ethnl_bitset32_size(const u32 *val, const u32 *mask, unsigned int nbits, ethnl_string_array_t names, bool compact) { unsigned int len = 0; /* list flag */ if (!mask) len += nla_total_size(sizeof(u32)); /* size */ len += nla_total_size(sizeof(u32)); if (compact) { unsigned int nwords = DIV_ROUND_UP(nbits, 32); /* value, mask */ len += (mask ? 2 : 1) * nla_total_size(nwords * sizeof(u32)); } else { unsigned int bits_len = 0; unsigned int bit_len, i; for (i = 0; i < nbits; i++) { const char *name = names ? names[i] : NULL; if (!ethnl_bitmap32_test_bit(mask ?: val, i)) continue; /* index */ bit_len = nla_total_size(sizeof(u32)); /* name */ if (name) bit_len += ethnl_strz_size(name); /* value */ if (mask && ethnl_bitmap32_test_bit(val, i)) bit_len += nla_total_size(0); /* bit nest */ bits_len += nla_total_size(bit_len); } /* bits nest */ len += nla_total_size(bits_len); } /* outermost nest */ return nla_total_size(len); } /** * ethnl_put_bitset32() - Put a bitset nest into a message * @skb: skb with the message * @attrtype: attribute type for the bitset nest * @val: value bitmap (u32 based) * @mask: mask bitmap (u32 based, optional) * @nbits: bit length of the bitset * @names: array of bit names (optional) * @compact: use compact format for the output * * Compose a nested attribute representing a bitset. If @mask is null, simple * bitmap (bit list) is created, if @mask is provided, represent a value/mask * pair. Bit names are only used in verbose mode and when provided by calller. * * Return: 0 on success, negative error value on error */ int ethnl_put_bitset32(struct sk_buff *skb, int attrtype, const u32 *val, const u32 *mask, unsigned int nbits, ethnl_string_array_t names, bool compact) { struct nlattr *nest; struct nlattr *attr; nest = nla_nest_start(skb, attrtype); if (!nest) return -EMSGSIZE; if (!mask && nla_put_flag(skb, ETHTOOL_A_BITSET_NOMASK)) goto nla_put_failure; if (nla_put_u32(skb, ETHTOOL_A_BITSET_SIZE, nbits)) goto nla_put_failure; if (compact) { unsigned int nwords = DIV_ROUND_UP(nbits, 32); unsigned int nbytes = nwords * sizeof(u32); u32 *dst; attr = nla_reserve(skb, ETHTOOL_A_BITSET_VALUE, nbytes); if (!attr) goto nla_put_failure; dst = nla_data(attr); memcpy(dst, val, nbytes); if (nbits % 32) dst[nwords - 1] &= ethnl_lower_bits(nbits); if (mask) { attr = nla_reserve(skb, ETHTOOL_A_BITSET_MASK, nbytes); if (!attr) goto nla_put_failure; dst = nla_data(attr); memcpy(dst, mask, nbytes); if (nbits % 32) dst[nwords - 1] &= ethnl_lower_bits(nbits); } } else { struct nlattr *bits; unsigned int i; bits = nla_nest_start(skb, ETHTOOL_A_BITSET_BITS); if (!bits) goto nla_put_failure; for (i = 0; i < nbits; i++) { const char *name = names ? names[i] : NULL; if (!ethnl_bitmap32_test_bit(mask ?: val, i)) continue; attr = nla_nest_start(skb, ETHTOOL_A_BITSET_BITS_BIT); if (!attr) goto nla_put_failure; if (nla_put_u32(skb, ETHTOOL_A_BITSET_BIT_INDEX, i)) goto nla_put_failure; if (name && ethnl_put_strz(skb, ETHTOOL_A_BITSET_BIT_NAME, name)) goto nla_put_failure; if (mask && ethnl_bitmap32_test_bit(val, i) && nla_put_flag(skb, ETHTOOL_A_BITSET_BIT_VALUE)) goto nla_put_failure; nla_nest_end(skb, attr); } nla_nest_end(skb, bits); } nla_nest_end(skb, nest); return 0; nla_put_failure: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static const struct nla_policy bitset_policy[] = { [ETHTOOL_A_BITSET_NOMASK] = { .type = NLA_FLAG }, [ETHTOOL_A_BITSET_SIZE] = NLA_POLICY_MAX(NLA_U32, ETHNL_MAX_BITSET_SIZE), [ETHTOOL_A_BITSET_BITS] = { .type = NLA_NESTED }, [ETHTOOL_A_BITSET_VALUE] = { .type = NLA_BINARY }, [ETHTOOL_A_BITSET_MASK] = { .type = NLA_BINARY }, }; static const struct nla_policy bit_policy[] = { [ETHTOOL_A_BITSET_BIT_INDEX] = { .type = NLA_U32 }, [ETHTOOL_A_BITSET_BIT_NAME] = { .type = NLA_NUL_STRING }, [ETHTOOL_A_BITSET_BIT_VALUE] = { .type = NLA_FLAG }, }; /** * ethnl_bitset_is_compact() - check if bitset attribute represents a compact * bitset * @bitset: nested attribute representing a bitset * @compact: pointer for return value * * Return: 0 on success, negative error code on failure */ int ethnl_bitset_is_compact(const struct nlattr *bitset, bool *compact) { struct nlattr *tb[ARRAY_SIZE(bitset_policy)]; int ret; ret = nla_parse_nested(tb, ARRAY_SIZE(bitset_policy) - 1, bitset, bitset_policy, NULL); if (ret < 0) return ret; if (tb[ETHTOOL_A_BITSET_BITS]) { if (tb[ETHTOOL_A_BITSET_VALUE] || tb[ETHTOOL_A_BITSET_MASK]) return -EINVAL; *compact = false; return 0; } if (!tb[ETHTOOL_A_BITSET_SIZE] || !tb[ETHTOOL_A_BITSET_VALUE]) return -EINVAL; *compact = true; return 0; } /** * ethnl_name_to_idx() - look up string index for a name * @names: array of ETH_GSTRING_LEN sized strings * @n_names: number of strings in the array * @name: name to look up * * Return: index of the string if found, -ENOENT if not found */ static int ethnl_name_to_idx(ethnl_string_array_t names, unsigned int n_names, const char *name) { unsigned int i; if (!names) return -ENOENT; for (i = 0; i < n_names; i++) { /* names[i] may not be null terminated */ if (!strncmp(names[i], name, ETH_GSTRING_LEN) && strlen(name) <= ETH_GSTRING_LEN) return i; } return -ENOENT; } static int ethnl_parse_bit(unsigned int *index, bool *val, unsigned int nbits, const struct nlattr *bit_attr, bool no_mask, ethnl_string_array_t names, struct netlink_ext_ack *extack) { struct nlattr *tb[ARRAY_SIZE(bit_policy)]; int ret, idx; ret = nla_parse_nested(tb, ARRAY_SIZE(bit_policy) - 1, bit_attr, bit_policy, extack); if (ret < 0) return ret; if (tb[ETHTOOL_A_BITSET_BIT_INDEX]) { const char *name; idx = nla_get_u32(tb[ETHTOOL_A_BITSET_BIT_INDEX]); if (idx >= nbits) { NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_BIT_INDEX], "bit index too high"); return -EOPNOTSUPP; } name = names ? names[idx] : NULL; if (tb[ETHTOOL_A_BITSET_BIT_NAME] && name && strncmp(nla_data(tb[ETHTOOL_A_BITSET_BIT_NAME]), name, nla_len(tb[ETHTOOL_A_BITSET_BIT_NAME]))) { NL_SET_ERR_MSG_ATTR(extack, bit_attr, "bit index and name mismatch"); return -EINVAL; } } else if (tb[ETHTOOL_A_BITSET_BIT_NAME]) { idx = ethnl_name_to_idx(names, nbits, nla_data(tb[ETHTOOL_A_BITSET_BIT_NAME])); if (idx < 0) { NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_BIT_NAME], "bit name not found"); return -EOPNOTSUPP; } } else { NL_SET_ERR_MSG_ATTR(extack, bit_attr, "neither bit index nor name specified"); return -EINVAL; } *index = idx; *val = no_mask || tb[ETHTOOL_A_BITSET_BIT_VALUE]; return 0; } /** * ethnl_bitmap32_equal() - Compare two bitmaps * @map1: first bitmap * @map2: second bitmap * @nbits: bit size to compare * * Return: true if first @nbits are equal, false if not */ static bool ethnl_bitmap32_equal(const u32 *map1, const u32 *map2, unsigned int nbits) { if (memcmp(map1, map2, nbits / 32 * sizeof(u32))) return false; if (nbits % 32 == 0) return true; return !((map1[nbits / 32] ^ map2[nbits / 32]) & ethnl_lower_bits(nbits % 32)); } static int ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits, const struct nlattr *attr, struct nlattr **tb, ethnl_string_array_t names, struct netlink_ext_ack *extack, bool *mod) { u32 *saved_bitmap = NULL; struct nlattr *bit_attr; bool no_mask; int rem; int ret; if (tb[ETHTOOL_A_BITSET_VALUE]) { NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_VALUE], "value only allowed in compact bitset"); return -EINVAL; } if (tb[ETHTOOL_A_BITSET_MASK]) { NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_MASK], "mask only allowed in compact bitset"); return -EINVAL; } no_mask = tb[ETHTOOL_A_BITSET_NOMASK]; if (no_mask) { unsigned int nwords = DIV_ROUND_UP(nbits, 32); unsigned int nbytes = nwords * sizeof(u32); bool dummy; /* The bitmap size is only the size of the map part without * its mask part. */ saved_bitmap = kcalloc(nwords, sizeof(u32), GFP_KERNEL); if (!saved_bitmap) return -ENOMEM; memcpy(saved_bitmap, bitmap, nbytes); ethnl_bitmap32_clear(bitmap, 0, nbits, &dummy); } nla_for_each_nested(bit_attr, tb[ETHTOOL_A_BITSET_BITS], rem) { bool old_val, new_val; unsigned int idx; if (nla_type(bit_attr) != ETHTOOL_A_BITSET_BITS_BIT) { NL_SET_ERR_MSG_ATTR(extack, bit_attr, "only ETHTOOL_A_BITSET_BITS_BIT allowed in ETHTOOL_A_BITSET_BITS"); kfree(saved_bitmap); return -EINVAL; } ret = ethnl_parse_bit(&idx, &new_val, nbits, bit_attr, no_mask, names, extack); if (ret < 0) { kfree(saved_bitmap); return ret; } old_val = bitmap[idx / 32] & ((u32)1 << (idx % 32)); if (new_val != old_val) { if (new_val) bitmap[idx / 32] |= ((u32)1 << (idx % 32)); else bitmap[idx / 32] &= ~((u32)1 << (idx % 32)); if (!no_mask) *mod = true; } } if (no_mask && !ethnl_bitmap32_equal(saved_bitmap, bitmap, nbits)) *mod = true; kfree(saved_bitmap); return 0; } static int ethnl_compact_sanity_checks(unsigned int nbits, const struct nlattr *nest, struct nlattr **tb, struct netlink_ext_ack *extack) { bool no_mask = tb[ETHTOOL_A_BITSET_NOMASK]; unsigned int attr_nbits, attr_nwords; const struct nlattr *test_attr; if (no_mask && tb[ETHTOOL_A_BITSET_MASK]) { NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_MASK], "mask not allowed in list bitset"); return -EINVAL; } if (!tb[ETHTOOL_A_BITSET_SIZE]) { NL_SET_ERR_MSG_ATTR(extack, nest, "missing size in compact bitset"); return -EINVAL; } if (!tb[ETHTOOL_A_BITSET_VALUE]) { NL_SET_ERR_MSG_ATTR(extack, nest, "missing value in compact bitset"); return -EINVAL; } if (!no_mask && !tb[ETHTOOL_A_BITSET_MASK]) { NL_SET_ERR_MSG_ATTR(extack, nest, "missing mask in compact nonlist bitset"); return -EINVAL; } attr_nbits = nla_get_u32(tb[ETHTOOL_A_BITSET_SIZE]); attr_nwords = DIV_ROUND_UP(attr_nbits, 32); if (nla_len(tb[ETHTOOL_A_BITSET_VALUE]) != attr_nwords * sizeof(u32)) { NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_VALUE], "bitset value length does not match size"); return -EINVAL; } if (tb[ETHTOOL_A_BITSET_MASK] && nla_len(tb[ETHTOOL_A_BITSET_MASK]) != attr_nwords * sizeof(u32)) { NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_MASK], "bitset mask length does not match size"); return -EINVAL; } if (attr_nbits <= nbits) return 0; test_attr = no_mask ? tb[ETHTOOL_A_BITSET_VALUE] : tb[ETHTOOL_A_BITSET_MASK]; if (ethnl_bitmap32_not_zero(nla_data(test_attr), nbits, attr_nbits)) { NL_SET_ERR_MSG_ATTR(extack, test_attr, "cannot modify bits past kernel bitset size"); return -EINVAL; } return 0; } /** * ethnl_update_bitset32() - Apply a bitset nest to a u32 based bitmap * @bitmap: bitmap to update * @nbits: size of the updated bitmap in bits * @attr: nest attribute to parse and apply * @names: array of bit names; may be null for compact format * @extack: extack for error reporting * @mod: set this to true if bitmap is modified, leave as it is if not * * Apply bitset netsted attribute to a bitmap. If the attribute represents * a bit list, @bitmap is set to its contents; otherwise, bits in mask are * set to values from value. Bitmaps in the attribute may be longer than * @nbits but the message must not request modifying any bits past @nbits. * * Return: negative error code on failure, 0 on success */ int ethnl_update_bitset32(u32 *bitmap, unsigned int nbits, const struct nlattr *attr, ethnl_string_array_t names, struct netlink_ext_ack *extack, bool *mod) { struct nlattr *tb[ARRAY_SIZE(bitset_policy)]; unsigned int change_bits; bool no_mask; int ret; if (!attr) return 0; ret = nla_parse_nested(tb, ARRAY_SIZE(bitset_policy) - 1, attr, bitset_policy, extack); if (ret < 0) return ret; if (tb[ETHTOOL_A_BITSET_BITS]) return ethnl_update_bitset32_verbose(bitmap, nbits, attr, tb, names, extack, mod); ret = ethnl_compact_sanity_checks(nbits, attr, tb, extack); if (ret < 0) return ret; no_mask = tb[ETHTOOL_A_BITSET_NOMASK]; change_bits = min_t(unsigned int, nla_get_u32(tb[ETHTOOL_A_BITSET_SIZE]), nbits); ethnl_bitmap32_update(bitmap, change_bits, nla_data(tb[ETHTOOL_A_BITSET_VALUE]), no_mask ? NULL : nla_data(tb[ETHTOOL_A_BITSET_MASK]), mod); if (no_mask && change_bits < nbits) ethnl_bitmap32_clear(bitmap, change_bits, nbits, mod); return 0; } /** * ethnl_parse_bitset() - Compute effective value and mask from bitset nest * @val: unsigned long based bitmap to put value into * @mask: unsigned long based bitmap to put mask into * @nbits: size of @val and @mask bitmaps * @attr: nest attribute to parse and apply * @names: array of bit names; may be null for compact format * @extack: extack for error reporting * * Provide @nbits size long bitmaps for value and mask so that * x = (val & mask) | (x & ~mask) would modify any @nbits sized bitmap x * the same way ethnl_update_bitset() with the same bitset attribute would. * * Return: negative error code on failure, 0 on success */ int ethnl_parse_bitset(unsigned long *val, unsigned long *mask, unsigned int nbits, const struct nlattr *attr, ethnl_string_array_t names, struct netlink_ext_ack *extack) { struct nlattr *tb[ARRAY_SIZE(bitset_policy)]; const struct nlattr *bit_attr; bool no_mask; int rem; int ret; if (!attr) return 0; ret = nla_parse_nested(tb, ARRAY_SIZE(bitset_policy) - 1, attr, bitset_policy, extack); if (ret < 0) return ret; no_mask = tb[ETHTOOL_A_BITSET_NOMASK]; if (!tb[ETHTOOL_A_BITSET_BITS]) { unsigned int change_bits; ret = ethnl_compact_sanity_checks(nbits, attr, tb, extack); if (ret < 0) return ret; change_bits = nla_get_u32(tb[ETHTOOL_A_BITSET_SIZE]); if (change_bits > nbits) change_bits = nbits; bitmap_from_arr32(val, nla_data(tb[ETHTOOL_A_BITSET_VALUE]), change_bits); if (change_bits < nbits) bitmap_clear(val, change_bits, nbits - change_bits); if (no_mask) { bitmap_fill(mask, nbits); } else { bitmap_from_arr32(mask, nla_data(tb[ETHTOOL_A_BITSET_MASK]), change_bits); if (change_bits < nbits) bitmap_clear(mask, change_bits, nbits - change_bits); } return 0; } if (tb[ETHTOOL_A_BITSET_VALUE]) { NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_VALUE], "value only allowed in compact bitset"); return -EINVAL; } if (tb[ETHTOOL_A_BITSET_MASK]) { NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_MASK], "mask only allowed in compact bitset"); return -EINVAL; } bitmap_zero(val, nbits); if (no_mask) bitmap_fill(mask, nbits); else bitmap_zero(mask, nbits); nla_for_each_nested(bit_attr, tb[ETHTOOL_A_BITSET_BITS], rem) { unsigned int idx; bool bit_val; ret = ethnl_parse_bit(&idx, &bit_val, nbits, bit_attr, no_mask, names, extack); if (ret < 0) return ret; if (bit_val) __set_bit(idx, val); if (!no_mask) __set_bit(idx, mask); } return 0; } #if BITS_PER_LONG == 64 && defined(__BIG_ENDIAN) /* 64-bit big endian architectures are the only case when u32 based bitmaps * and unsigned long based bitmaps have different memory layout so that we * cannot simply cast the latter to the former and need actual wrappers * converting the latter to the former. * * To reduce the number of slab allocations, the wrappers use fixed size local * variables for bitmaps up to ETHNL_SMALL_BITMAP_BITS bits which is the * majority of bitmaps used by ethtool. */ #define ETHNL_SMALL_BITMAP_BITS 128 #define ETHNL_SMALL_BITMAP_WORDS DIV_ROUND_UP(ETHNL_SMALL_BITMAP_BITS, 32) int ethnl_bitset_size(const unsigned long *val, const unsigned long *mask, unsigned int nbits, ethnl_string_array_t names, bool compact) { u32 small_mask32[ETHNL_SMALL_BITMAP_WORDS]; u32 small_val32[ETHNL_SMALL_BITMAP_WORDS]; u32 *mask32; u32 *val32; int ret; if (nbits > ETHNL_SMALL_BITMAP_BITS) { unsigned int nwords = DIV_ROUND_UP(nbits, 32); val32 = kmalloc_array(2 * nwords, sizeof(u32), GFP_KERNEL); if (!val32) return -ENOMEM; mask32 = val32 + nwords; } else { val32 = small_val32; mask32 = small_mask32; } bitmap_to_arr32(val32, val, nbits); if (mask) bitmap_to_arr32(mask32, mask, nbits); else mask32 = NULL; ret = ethnl_bitset32_size(val32, mask32, nbits, names, compact); if (nbits > ETHNL_SMALL_BITMAP_BITS) kfree(val32); return ret; } int ethnl_put_bitset(struct sk_buff *skb, int attrtype, const unsigned long *val, const unsigned long *mask, unsigned int nbits, ethnl_string_array_t names, bool compact) { u32 small_mask32[ETHNL_SMALL_BITMAP_WORDS]; u32 small_val32[ETHNL_SMALL_BITMAP_WORDS]; u32 *mask32; u32 *val32; int ret; if (nbits > ETHNL_SMALL_BITMAP_BITS) { unsigned int nwords = DIV_ROUND_UP(nbits, 32); val32 = kmalloc_array(2 * nwords, sizeof(u32), GFP_KERNEL); if (!val32) return -ENOMEM; mask32 = val32 + nwords; } else { val32 = small_val32; mask32 = small_mask32; } bitmap_to_arr32(val32, val, nbits); if (mask) bitmap_to_arr32(mask32, mask, nbits); else mask32 = NULL; ret = ethnl_put_bitset32(skb, attrtype, val32, mask32, nbits, names, compact); if (nbits > ETHNL_SMALL_BITMAP_BITS) kfree(val32); return ret; } int ethnl_update_bitset(unsigned long *bitmap, unsigned int nbits, const struct nlattr *attr, ethnl_string_array_t names, struct netlink_ext_ack *extack, bool *mod) { u32 small_bitmap32[ETHNL_SMALL_BITMAP_WORDS]; u32 *bitmap32 = small_bitmap32; bool u32_mod = false; int ret; if (nbits > ETHNL_SMALL_BITMAP_BITS) { unsigned int dst_words = DIV_ROUND_UP(nbits, 32); bitmap32 = kmalloc_array(dst_words, sizeof(u32), GFP_KERNEL); if (!bitmap32) return -ENOMEM; } bitmap_to_arr32(bitmap32, bitmap, nbits); ret = ethnl_update_bitset32(bitmap32, nbits, attr, names, extack, &u32_mod); if (u32_mod) { bitmap_from_arr32(bitmap, bitmap32, nbits); *mod = true; } if (nbits > ETHNL_SMALL_BITMAP_BITS) kfree(bitmap32); return ret; } #else /* On little endian 64-bit and all 32-bit architectures, an unsigned long * based bitmap can be interpreted as u32 based one using a simple cast. */ int ethnl_bitset_size(const unsigned long *val, const unsigned long *mask, unsigned int nbits, ethnl_string_array_t names, bool compact) { return ethnl_bitset32_size((const u32 *)val, (const u32 *)mask, nbits, names, compact); } int ethnl_put_bitset(struct sk_buff *skb, int attrtype, const unsigned long *val, const unsigned long *mask, unsigned int nbits, ethnl_string_array_t names, bool compact) { return ethnl_put_bitset32(skb, attrtype, (const u32 *)val, (const u32 *)mask, nbits, names, compact); } int ethnl_update_bitset(unsigned long *bitmap, unsigned int nbits, const struct nlattr *attr, ethnl_string_array_t names, struct netlink_ext_ack *extack, bool *mod) { return ethnl_update_bitset32((u32 *)bitmap, nbits, attr, names, extack, mod); } #endif /* BITS_PER_LONG == 64 && defined(__BIG_ENDIAN) */ |
| 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 80 80 80 80 80 80 25 23 26 23 14 12 14 14 7 9 7 7 6 7 6 6 6 6 6 5 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2013 Patrick McHardy <kaber@trash.net> */ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/unaligned.h> #include <net/tcp.h> #include <net/netns/generic.h> #include <linux/proc_fs.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter/nf_synproxy.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_conntrack_synproxy.h> #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_synproxy.h> unsigned int synproxy_net_id; EXPORT_SYMBOL_GPL(synproxy_net_id); bool synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, const struct tcphdr *th, struct synproxy_options *opts) { int length = (th->doff * 4) - sizeof(*th); u8 buf[40], *ptr; if (unlikely(length < 0)) return false; ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf); if (ptr == NULL) return false; opts->options = 0; while (length > 0) { int opcode = *ptr++; int opsize; switch (opcode) { case TCPOPT_EOL: return true; case TCPOPT_NOP: length--; continue; default: if (length < 2) return true; opsize = *ptr++; if (opsize < 2) return true; if (opsize > length) return true; switch (opcode) { case TCPOPT_MSS: if (opsize == TCPOLEN_MSS) { opts->mss_option = get_unaligned_be16(ptr); opts->options |= NF_SYNPROXY_OPT_MSS; } break; case TCPOPT_WINDOW: if (opsize == TCPOLEN_WINDOW) { opts->wscale = *ptr; if (opts->wscale > TCP_MAX_WSCALE) opts->wscale = TCP_MAX_WSCALE; opts->options |= NF_SYNPROXY_OPT_WSCALE; } break; case TCPOPT_TIMESTAMP: if (opsize == TCPOLEN_TIMESTAMP) { opts->tsval = get_unaligned_be32(ptr); opts->tsecr = get_unaligned_be32(ptr + 4); opts->options |= NF_SYNPROXY_OPT_TIMESTAMP; } break; case TCPOPT_SACK_PERM: if (opsize == TCPOLEN_SACK_PERM) opts->options |= NF_SYNPROXY_OPT_SACK_PERM; break; } ptr += opsize - 2; length -= opsize; } } return true; } EXPORT_SYMBOL_GPL(synproxy_parse_options); static unsigned int synproxy_options_size(const struct synproxy_options *opts) { unsigned int size = 0; if (opts->options & NF_SYNPROXY_OPT_MSS) size += TCPOLEN_MSS_ALIGNED; if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP) size += TCPOLEN_TSTAMP_ALIGNED; else if (opts->options & NF_SYNPROXY_OPT_SACK_PERM) size += TCPOLEN_SACKPERM_ALIGNED; if (opts->options & NF_SYNPROXY_OPT_WSCALE) size += TCPOLEN_WSCALE_ALIGNED; return size; } static void synproxy_build_options(struct tcphdr *th, const struct synproxy_options *opts) { __be32 *ptr = (__be32 *)(th + 1); u8 options = opts->options; if (options & NF_SYNPROXY_OPT_MSS) *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | opts->mss_option); if (options & NF_SYNPROXY_OPT_TIMESTAMP) { if (options & NF_SYNPROXY_OPT_SACK_PERM) *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); else *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); *ptr++ = htonl(opts->tsval); *ptr++ = htonl(opts->tsecr); } else if (options & NF_SYNPROXY_OPT_SACK_PERM) *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM); if (options & NF_SYNPROXY_OPT_WSCALE) *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | opts->wscale); } void synproxy_init_timestamp_cookie(const struct nf_synproxy_info *info, struct synproxy_options *opts) { opts->tsecr = opts->tsval; opts->tsval = tcp_clock_ms() & ~0x3f; if (opts->options & NF_SYNPROXY_OPT_WSCALE) { opts->tsval |= opts->wscale; opts->wscale = info->wscale; } else opts->tsval |= 0xf; if (opts->options & NF_SYNPROXY_OPT_SACK_PERM) opts->tsval |= 1 << 4; if (opts->options & NF_SYNPROXY_OPT_ECN) opts->tsval |= 1 << 5; } EXPORT_SYMBOL_GPL(synproxy_init_timestamp_cookie); static void synproxy_check_timestamp_cookie(struct synproxy_options *opts) { opts->wscale = opts->tsecr & 0xf; if (opts->wscale != 0xf) opts->options |= NF_SYNPROXY_OPT_WSCALE; opts->options |= opts->tsecr & (1 << 4) ? NF_SYNPROXY_OPT_SACK_PERM : 0; opts->options |= opts->tsecr & (1 << 5) ? NF_SYNPROXY_OPT_ECN : 0; } static unsigned int synproxy_tstamp_adjust(struct sk_buff *skb, unsigned int protoff, struct tcphdr *th, struct nf_conn *ct, enum ip_conntrack_info ctinfo, const struct nf_conn_synproxy *synproxy) { unsigned int optoff, optend; __be32 *ptr, old; if (synproxy->tsoff == 0) return 1; optoff = protoff + sizeof(struct tcphdr); optend = protoff + th->doff * 4; if (skb_ensure_writable(skb, optend)) return 0; while (optoff < optend) { unsigned char *op = skb->data + optoff; switch (op[0]) { case TCPOPT_EOL: return 1; case TCPOPT_NOP: optoff++; continue; default: if (optoff + 1 == optend || optoff + op[1] > optend || op[1] < 2) return 0; if (op[0] == TCPOPT_TIMESTAMP && op[1] == TCPOLEN_TIMESTAMP) { if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { ptr = (__be32 *)&op[2]; old = *ptr; *ptr = htonl(ntohl(*ptr) - synproxy->tsoff); } else { ptr = (__be32 *)&op[6]; old = *ptr; *ptr = htonl(ntohl(*ptr) + synproxy->tsoff); } inet_proto_csum_replace4(&th->check, skb, old, *ptr, false); return 1; } optoff += op[1]; } } return 1; } #ifdef CONFIG_PROC_FS static void *synproxy_cpu_seq_start(struct seq_file *seq, loff_t *pos) { struct synproxy_net *snet = synproxy_pernet(seq_file_net(seq)); int cpu; if (*pos == 0) return SEQ_START_TOKEN; for (cpu = *pos - 1; cpu < nr_cpu_ids; cpu++) { if (!cpu_possible(cpu)) continue; *pos = cpu + 1; return per_cpu_ptr(snet->stats, cpu); } return NULL; } static void *synproxy_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct synproxy_net *snet = synproxy_pernet(seq_file_net(seq)); int cpu; for (cpu = *pos; cpu < nr_cpu_ids; cpu++) { if (!cpu_possible(cpu)) continue; *pos = cpu + 1; return per_cpu_ptr(snet->stats, cpu); } (*pos)++; return NULL; } static void synproxy_cpu_seq_stop(struct seq_file *seq, void *v) { return; } static int synproxy_cpu_seq_show(struct seq_file *seq, void *v) { struct synproxy_stats *stats = v; if (v == SEQ_START_TOKEN) { seq_puts(seq, "entries\t\tsyn_received\t" "cookie_invalid\tcookie_valid\t" "cookie_retrans\tconn_reopened\n"); return 0; } seq_printf(seq, "%08x\t%08x\t%08x\t%08x\t%08x\t%08x\n", 0, stats->syn_received, stats->cookie_invalid, stats->cookie_valid, stats->cookie_retrans, stats->conn_reopened); return 0; } static const struct seq_operations synproxy_cpu_seq_ops = { .start = synproxy_cpu_seq_start, .next = synproxy_cpu_seq_next, .stop = synproxy_cpu_seq_stop, .show = synproxy_cpu_seq_show, }; static int __net_init synproxy_proc_init(struct net *net) { if (!proc_create_net("synproxy", 0444, net->proc_net_stat, &synproxy_cpu_seq_ops, sizeof(struct seq_net_private))) return -ENOMEM; return 0; } static void __net_exit synproxy_proc_exit(struct net *net) { remove_proc_entry("synproxy", net->proc_net_stat); } #else static int __net_init synproxy_proc_init(struct net *net) { return 0; } static void __net_exit synproxy_proc_exit(struct net *net) { return; } #endif /* CONFIG_PROC_FS */ static int __net_init synproxy_net_init(struct net *net) { struct synproxy_net *snet = synproxy_pernet(net); struct nf_conn *ct; int err = -ENOMEM; ct = nf_ct_tmpl_alloc(net, &nf_ct_zone_dflt, GFP_KERNEL); if (!ct) goto err1; if (!nfct_seqadj_ext_add(ct)) goto err2; if (!nfct_synproxy_ext_add(ct)) goto err2; __set_bit(IPS_CONFIRMED_BIT, &ct->status); snet->tmpl = ct; snet->stats = alloc_percpu(struct synproxy_stats); if (snet->stats == NULL) goto err2; err = synproxy_proc_init(net); if (err < 0) goto err3; return 0; err3: free_percpu(snet->stats); err2: nf_ct_tmpl_free(ct); err1: return err; } static void __net_exit synproxy_net_exit(struct net *net) { struct synproxy_net *snet = synproxy_pernet(net); nf_ct_put(snet->tmpl); synproxy_proc_exit(net); free_percpu(snet->stats); } static struct pernet_operations synproxy_net_ops = { .init = synproxy_net_init, .exit = synproxy_net_exit, .id = &synproxy_net_id, .size = sizeof(struct synproxy_net), }; static int __init synproxy_core_init(void) { return register_pernet_subsys(&synproxy_net_ops); } static void __exit synproxy_core_exit(void) { unregister_pernet_subsys(&synproxy_net_ops); } module_init(synproxy_core_init); module_exit(synproxy_core_exit); static struct iphdr * synproxy_build_ip(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr) { struct iphdr *iph; skb_reset_network_header(skb); iph = skb_put(skb, sizeof(*iph)); iph->version = 4; iph->ihl = sizeof(*iph) / 4; iph->tos = 0; iph->id = 0; iph->frag_off = htons(IP_DF); iph->ttl = READ_ONCE(net->ipv4.sysctl_ip_default_ttl); iph->protocol = IPPROTO_TCP; iph->check = 0; iph->saddr = saddr; iph->daddr = daddr; return iph; } static void synproxy_send_tcp(struct net *net, const struct sk_buff *skb, struct sk_buff *nskb, struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo, struct iphdr *niph, struct tcphdr *nth, unsigned int tcp_hdr_size) { nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0); nskb->ip_summed = CHECKSUM_PARTIAL; nskb->csum_start = (unsigned char *)nth - nskb->head; nskb->csum_offset = offsetof(struct tcphdr, check); skb_dst_set_noref(nskb, skb_dst(skb)); nskb->protocol = htons(ETH_P_IP); if (ip_route_me_harder(net, nskb->sk, nskb, RTN_UNSPEC)) goto free_nskb; if (nfct) { nf_ct_set(nskb, (struct nf_conn *)nfct, ctinfo); nf_conntrack_get(nfct); } ip_local_out(net, nskb->sk, nskb); return; free_nskb: kfree_skb(nskb); } void synproxy_send_client_synack(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts) { struct sk_buff *nskb; struct iphdr *iph, *niph; struct tcphdr *nth; unsigned int tcp_hdr_size; u16 mss = opts->mss_encode; iph = ip_hdr(skb); tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, GFP_ATOMIC); if (!nskb) return; skb_reserve(nskb, MAX_TCP_HEADER); niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr); skb_reset_transport_header(nskb); nth = skb_put(nskb, tcp_hdr_size); nth->source = th->dest; nth->dest = th->source; nth->seq = htonl(__cookie_v4_init_sequence(iph, th, &mss)); nth->ack_seq = htonl(ntohl(th->seq) + 1); tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK; if (opts->options & NF_SYNPROXY_OPT_ECN) tcp_flag_word(nth) |= TCP_FLAG_ECE; nth->doff = tcp_hdr_size / 4; nth->window = 0; nth->check = 0; nth->urg_ptr = 0; synproxy_build_options(nth, opts); synproxy_send_tcp(net, skb, nskb, skb_nfct(skb), IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } EXPORT_SYMBOL_GPL(synproxy_send_client_synack); static void synproxy_send_server_syn(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts, u32 recv_seq) { struct synproxy_net *snet = synproxy_pernet(net); struct sk_buff *nskb; struct iphdr *iph, *niph; struct tcphdr *nth; unsigned int tcp_hdr_size; iph = ip_hdr(skb); tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, GFP_ATOMIC); if (!nskb) return; skb_reserve(nskb, MAX_TCP_HEADER); niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr); skb_reset_transport_header(nskb); nth = skb_put(nskb, tcp_hdr_size); nth->source = th->source; nth->dest = th->dest; nth->seq = htonl(recv_seq - 1); /* ack_seq is used to relay our ISN to the synproxy hook to initialize * sequence number translation once a connection tracking entry exists. */ nth->ack_seq = htonl(ntohl(th->ack_seq) - 1); tcp_flag_word(nth) = TCP_FLAG_SYN; if (opts->options & NF_SYNPROXY_OPT_ECN) tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR; nth->doff = tcp_hdr_size / 4; nth->window = th->window; nth->check = 0; nth->urg_ptr = 0; synproxy_build_options(nth, opts); synproxy_send_tcp(net, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, niph, nth, tcp_hdr_size); } static void synproxy_send_server_ack(struct net *net, const struct ip_ct_tcp *state, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts) { struct sk_buff *nskb; struct iphdr *iph, *niph; struct tcphdr *nth; unsigned int tcp_hdr_size; iph = ip_hdr(skb); tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, GFP_ATOMIC); if (!nskb) return; skb_reserve(nskb, MAX_TCP_HEADER); niph = synproxy_build_ip(net, nskb, iph->daddr, iph->saddr); skb_reset_transport_header(nskb); nth = skb_put(nskb, tcp_hdr_size); nth->source = th->dest; nth->dest = th->source; nth->seq = htonl(ntohl(th->ack_seq)); nth->ack_seq = htonl(ntohl(th->seq) + 1); tcp_flag_word(nth) = TCP_FLAG_ACK; nth->doff = tcp_hdr_size / 4; nth->window = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin); nth->check = 0; nth->urg_ptr = 0; synproxy_build_options(nth, opts); synproxy_send_tcp(net, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); } static void synproxy_send_client_ack(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts) { struct sk_buff *nskb; struct iphdr *iph, *niph; struct tcphdr *nth; unsigned int tcp_hdr_size; iph = ip_hdr(skb); tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, GFP_ATOMIC); if (!nskb) return; skb_reserve(nskb, MAX_TCP_HEADER); niph = synproxy_build_ip(net, nskb, iph->saddr, iph->daddr); skb_reset_transport_header(nskb); nth = skb_put(nskb, tcp_hdr_size); nth->source = th->source; nth->dest = th->dest; nth->seq = htonl(ntohl(th->seq) + 1); nth->ack_seq = th->ack_seq; tcp_flag_word(nth) = TCP_FLAG_ACK; nth->doff = tcp_hdr_size / 4; nth->window = htons(ntohs(th->window) >> opts->wscale); nth->check = 0; nth->urg_ptr = 0; synproxy_build_options(nth, opts); synproxy_send_tcp(net, skb, nskb, skb_nfct(skb), IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } bool synproxy_recv_client_ack(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, struct synproxy_options *opts, u32 recv_seq) { struct synproxy_net *snet = synproxy_pernet(net); int mss; mss = __cookie_v4_check(ip_hdr(skb), th); if (mss == 0) { this_cpu_inc(snet->stats->cookie_invalid); return false; } this_cpu_inc(snet->stats->cookie_valid); opts->mss_option = mss; opts->options |= NF_SYNPROXY_OPT_MSS; if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP) synproxy_check_timestamp_cookie(opts); synproxy_send_server_syn(net, skb, th, opts, recv_seq); return true; } EXPORT_SYMBOL_GPL(synproxy_recv_client_ack); unsigned int ipv4_synproxy_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *nhs) { struct net *net = nhs->net; struct synproxy_net *snet = synproxy_pernet(net); enum ip_conntrack_info ctinfo; struct nf_conn *ct; struct nf_conn_synproxy *synproxy; struct synproxy_options opts = {}; const struct ip_ct_tcp *state; struct tcphdr *th, _th; unsigned int thoff; ct = nf_ct_get(skb, &ctinfo); if (!ct) return NF_ACCEPT; synproxy = nfct_synproxy(ct); if (!synproxy) return NF_ACCEPT; if (nf_is_loopback_packet(skb) || ip_hdr(skb)->protocol != IPPROTO_TCP) return NF_ACCEPT; thoff = ip_hdrlen(skb); th = skb_header_pointer(skb, thoff, sizeof(_th), &_th); if (!th) return NF_DROP; state = &ct->proto.tcp; switch (state->state) { case TCP_CONNTRACK_CLOSE: if (th->rst && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) { nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq) + 1); break; } if (!th->syn || th->ack || CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) break; /* Reopened connection - reset the sequence number and timestamp * adjustments, they will get initialized once the connection is * reestablished. */ nf_ct_seqadj_init(ct, ctinfo, 0); synproxy->tsoff = 0; this_cpu_inc(snet->stats->conn_reopened); fallthrough; case TCP_CONNTRACK_SYN_SENT: if (!synproxy_parse_options(skb, thoff, th, &opts)) return NF_DROP; if (!th->syn && th->ack && CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1, * therefore we need to add 1 to make the SYN sequence * number match the one of first SYN. */ if (synproxy_recv_client_ack(net, skb, th, &opts, ntohl(th->seq) + 1)) { this_cpu_inc(snet->stats->cookie_retrans); consume_skb(skb); return NF_STOLEN; } else { return NF_DROP; } } synproxy->isn = ntohl(th->ack_seq); if (opts.options & NF_SYNPROXY_OPT_TIMESTAMP) synproxy->its = opts.tsecr; nf_conntrack_event_cache(IPCT_SYNPROXY, ct); break; case TCP_CONNTRACK_SYN_RECV: if (!th->syn || !th->ack) break; if (!synproxy_parse_options(skb, thoff, th, &opts)) return NF_DROP; if (opts.options & NF_SYNPROXY_OPT_TIMESTAMP) { synproxy->tsoff = opts.tsval - synproxy->its; nf_conntrack_event_cache(IPCT_SYNPROXY, ct); } opts.options &= ~(NF_SYNPROXY_OPT_MSS | NF_SYNPROXY_OPT_WSCALE | NF_SYNPROXY_OPT_SACK_PERM); swap(opts.tsval, opts.tsecr); synproxy_send_server_ack(net, state, skb, th, &opts); nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq)); nf_conntrack_event_cache(IPCT_SEQADJ, ct); swap(opts.tsval, opts.tsecr); synproxy_send_client_ack(net, skb, th, &opts); consume_skb(skb); return NF_STOLEN; default: break; } synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy); return NF_ACCEPT; } EXPORT_SYMBOL_GPL(ipv4_synproxy_hook); static const struct nf_hook_ops ipv4_synproxy_ops[] = { { .hook = ipv4_synproxy_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, }, { .hook = ipv4_synproxy_hook, .pf = NFPROTO_IPV4, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, }, }; int nf_synproxy_ipv4_init(struct synproxy_net *snet, struct net *net) { int err; if (snet->hook_ref4 == 0) { err = nf_register_net_hooks(net, ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops)); if (err) return err; } snet->hook_ref4++; return 0; } EXPORT_SYMBOL_GPL(nf_synproxy_ipv4_init); void nf_synproxy_ipv4_fini(struct synproxy_net *snet, struct net *net) { snet->hook_ref4--; if (snet->hook_ref4 == 0) nf_unregister_net_hooks(net, ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops)); } EXPORT_SYMBOL_GPL(nf_synproxy_ipv4_fini); #if IS_ENABLED(CONFIG_IPV6) static struct ipv6hdr * synproxy_build_ip_ipv6(struct net *net, struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr) { struct ipv6hdr *iph; skb_reset_network_header(skb); iph = skb_put(skb, sizeof(*iph)); ip6_flow_hdr(iph, 0, 0); iph->hop_limit = READ_ONCE(net->ipv6.devconf_all->hop_limit); iph->nexthdr = IPPROTO_TCP; iph->saddr = *saddr; iph->daddr = *daddr; return iph; } static void synproxy_send_tcp_ipv6(struct net *net, const struct sk_buff *skb, struct sk_buff *nskb, struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo, struct ipv6hdr *niph, struct tcphdr *nth, unsigned int tcp_hdr_size) { struct dst_entry *dst; struct flowi6 fl6; int err; nth->check = ~tcp_v6_check(tcp_hdr_size, &niph->saddr, &niph->daddr, 0); nskb->ip_summed = CHECKSUM_PARTIAL; nskb->csum_start = (unsigned char *)nth - nskb->head; nskb->csum_offset = offsetof(struct tcphdr, check); memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_TCP; fl6.saddr = niph->saddr; fl6.daddr = niph->daddr; fl6.fl6_sport = nth->source; fl6.fl6_dport = nth->dest; security_skb_classify_flow((struct sk_buff *)skb, flowi6_to_flowi_common(&fl6)); err = nf_ip6_route(net, &dst, flowi6_to_flowi(&fl6), false); if (err) { goto free_nskb; } dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); if (IS_ERR(dst)) goto free_nskb; skb_dst_set(nskb, dst); if (nfct) { nf_ct_set(nskb, (struct nf_conn *)nfct, ctinfo); nf_conntrack_get(nfct); } ip6_local_out(net, nskb->sk, nskb); return; free_nskb: kfree_skb(nskb); } void synproxy_send_client_synack_ipv6(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts) { struct sk_buff *nskb; struct ipv6hdr *iph, *niph; struct tcphdr *nth; unsigned int tcp_hdr_size; u16 mss = opts->mss_encode; iph = ipv6_hdr(skb); tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, GFP_ATOMIC); if (!nskb) return; skb_reserve(nskb, MAX_TCP_HEADER); niph = synproxy_build_ip_ipv6(net, nskb, &iph->daddr, &iph->saddr); skb_reset_transport_header(nskb); nth = skb_put(nskb, tcp_hdr_size); nth->source = th->dest; nth->dest = th->source; nth->seq = htonl(nf_ipv6_cookie_init_sequence(iph, th, &mss)); nth->ack_seq = htonl(ntohl(th->seq) + 1); tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK; if (opts->options & NF_SYNPROXY_OPT_ECN) tcp_flag_word(nth) |= TCP_FLAG_ECE; nth->doff = tcp_hdr_size / 4; nth->window = 0; nth->check = 0; nth->urg_ptr = 0; synproxy_build_options(nth, opts); synproxy_send_tcp_ipv6(net, skb, nskb, skb_nfct(skb), IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } EXPORT_SYMBOL_GPL(synproxy_send_client_synack_ipv6); static void synproxy_send_server_syn_ipv6(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts, u32 recv_seq) { struct synproxy_net *snet = synproxy_pernet(net); struct sk_buff *nskb; struct ipv6hdr *iph, *niph; struct tcphdr *nth; unsigned int tcp_hdr_size; iph = ipv6_hdr(skb); tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, GFP_ATOMIC); if (!nskb) return; skb_reserve(nskb, MAX_TCP_HEADER); niph = synproxy_build_ip_ipv6(net, nskb, &iph->saddr, &iph->daddr); skb_reset_transport_header(nskb); nth = skb_put(nskb, tcp_hdr_size); nth->source = th->source; nth->dest = th->dest; nth->seq = htonl(recv_seq - 1); /* ack_seq is used to relay our ISN to the synproxy hook to initialize * sequence number translation once a connection tracking entry exists. */ nth->ack_seq = htonl(ntohl(th->ack_seq) - 1); tcp_flag_word(nth) = TCP_FLAG_SYN; if (opts->options & NF_SYNPROXY_OPT_ECN) tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR; nth->doff = tcp_hdr_size / 4; nth->window = th->window; nth->check = 0; nth->urg_ptr = 0; synproxy_build_options(nth, opts); synproxy_send_tcp_ipv6(net, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, niph, nth, tcp_hdr_size); } static void synproxy_send_server_ack_ipv6(struct net *net, const struct ip_ct_tcp *state, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts) { struct sk_buff *nskb; struct ipv6hdr *iph, *niph; struct tcphdr *nth; unsigned int tcp_hdr_size; iph = ipv6_hdr(skb); tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, GFP_ATOMIC); if (!nskb) return; skb_reserve(nskb, MAX_TCP_HEADER); niph = synproxy_build_ip_ipv6(net, nskb, &iph->daddr, &iph->saddr); skb_reset_transport_header(nskb); nth = skb_put(nskb, tcp_hdr_size); nth->source = th->dest; nth->dest = th->source; nth->seq = htonl(ntohl(th->ack_seq)); nth->ack_seq = htonl(ntohl(th->seq) + 1); tcp_flag_word(nth) = TCP_FLAG_ACK; nth->doff = tcp_hdr_size / 4; nth->window = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin); nth->check = 0; nth->urg_ptr = 0; synproxy_build_options(nth, opts); synproxy_send_tcp_ipv6(net, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); } static void synproxy_send_client_ack_ipv6(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, const struct synproxy_options *opts) { struct sk_buff *nskb; struct ipv6hdr *iph, *niph; struct tcphdr *nth; unsigned int tcp_hdr_size; iph = ipv6_hdr(skb); tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, GFP_ATOMIC); if (!nskb) return; skb_reserve(nskb, MAX_TCP_HEADER); niph = synproxy_build_ip_ipv6(net, nskb, &iph->saddr, &iph->daddr); skb_reset_transport_header(nskb); nth = skb_put(nskb, tcp_hdr_size); nth->source = th->source; nth->dest = th->dest; nth->seq = htonl(ntohl(th->seq) + 1); nth->ack_seq = th->ack_seq; tcp_flag_word(nth) = TCP_FLAG_ACK; nth->doff = tcp_hdr_size / 4; nth->window = htons(ntohs(th->window) >> opts->wscale); nth->check = 0; nth->urg_ptr = 0; synproxy_build_options(nth, opts); synproxy_send_tcp_ipv6(net, skb, nskb, skb_nfct(skb), IP_CT_ESTABLISHED_REPLY, niph, nth, tcp_hdr_size); } bool synproxy_recv_client_ack_ipv6(struct net *net, const struct sk_buff *skb, const struct tcphdr *th, struct synproxy_options *opts, u32 recv_seq) { struct synproxy_net *snet = synproxy_pernet(net); int mss; mss = nf_cookie_v6_check(ipv6_hdr(skb), th); if (mss == 0) { this_cpu_inc(snet->stats->cookie_invalid); return false; } this_cpu_inc(snet->stats->cookie_valid); opts->mss_option = mss; opts->options |= NF_SYNPROXY_OPT_MSS; if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP) synproxy_check_timestamp_cookie(opts); synproxy_send_server_syn_ipv6(net, skb, th, opts, recv_seq); return true; } EXPORT_SYMBOL_GPL(synproxy_recv_client_ack_ipv6); unsigned int ipv6_synproxy_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *nhs) { struct net *net = nhs->net; struct synproxy_net *snet = synproxy_pernet(net); enum ip_conntrack_info ctinfo; struct nf_conn *ct; struct nf_conn_synproxy *synproxy; struct synproxy_options opts = {}; const struct ip_ct_tcp *state; struct tcphdr *th, _th; __be16 frag_off; u8 nexthdr; int thoff; ct = nf_ct_get(skb, &ctinfo); if (!ct) return NF_ACCEPT; synproxy = nfct_synproxy(ct); if (!synproxy) return NF_ACCEPT; if (nf_is_loopback_packet(skb)) return NF_ACCEPT; nexthdr = ipv6_hdr(skb)->nexthdr; thoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off); if (thoff < 0 || nexthdr != IPPROTO_TCP) return NF_ACCEPT; th = skb_header_pointer(skb, thoff, sizeof(_th), &_th); if (!th) return NF_DROP; state = &ct->proto.tcp; switch (state->state) { case TCP_CONNTRACK_CLOSE: if (th->rst && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) { nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq) + 1); break; } if (!th->syn || th->ack || CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) break; /* Reopened connection - reset the sequence number and timestamp * adjustments, they will get initialized once the connection is * reestablished. */ nf_ct_seqadj_init(ct, ctinfo, 0); synproxy->tsoff = 0; this_cpu_inc(snet->stats->conn_reopened); fallthrough; case TCP_CONNTRACK_SYN_SENT: if (!synproxy_parse_options(skb, thoff, th, &opts)) return NF_DROP; if (!th->syn && th->ack && CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1, * therefore we need to add 1 to make the SYN sequence * number match the one of first SYN. */ if (synproxy_recv_client_ack_ipv6(net, skb, th, &opts, ntohl(th->seq) + 1)) { this_cpu_inc(snet->stats->cookie_retrans); consume_skb(skb); return NF_STOLEN; } else { return NF_DROP; } } synproxy->isn = ntohl(th->ack_seq); if (opts.options & NF_SYNPROXY_OPT_TIMESTAMP) synproxy->its = opts.tsecr; nf_conntrack_event_cache(IPCT_SYNPROXY, ct); break; case TCP_CONNTRACK_SYN_RECV: if (!th->syn || !th->ack) break; if (!synproxy_parse_options(skb, thoff, th, &opts)) return NF_DROP; if (opts.options & NF_SYNPROXY_OPT_TIMESTAMP) { synproxy->tsoff = opts.tsval - synproxy->its; nf_conntrack_event_cache(IPCT_SYNPROXY, ct); } opts.options &= ~(NF_SYNPROXY_OPT_MSS | NF_SYNPROXY_OPT_WSCALE | NF_SYNPROXY_OPT_SACK_PERM); swap(opts.tsval, opts.tsecr); synproxy_send_server_ack_ipv6(net, state, skb, th, &opts); nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq)); nf_conntrack_event_cache(IPCT_SEQADJ, ct); swap(opts.tsval, opts.tsecr); synproxy_send_client_ack_ipv6(net, skb, th, &opts); consume_skb(skb); return NF_STOLEN; default: break; } synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy); return NF_ACCEPT; } EXPORT_SYMBOL_GPL(ipv6_synproxy_hook); static const struct nf_hook_ops ipv6_synproxy_ops[] = { { .hook = ipv6_synproxy_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, }, { .hook = ipv6_synproxy_hook, .pf = NFPROTO_IPV6, .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, }, }; int nf_synproxy_ipv6_init(struct synproxy_net *snet, struct net *net) { int err; if (snet->hook_ref6 == 0) { err = nf_register_net_hooks(net, ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops)); if (err) return err; } snet->hook_ref6++; return 0; } EXPORT_SYMBOL_GPL(nf_synproxy_ipv6_init); void nf_synproxy_ipv6_fini(struct synproxy_net *snet, struct net *net) { snet->hook_ref6--; if (snet->hook_ref6 == 0) nf_unregister_net_hooks(net, ipv6_synproxy_ops, ARRAY_SIZE(ipv6_synproxy_ops)); } EXPORT_SYMBOL_GPL(nf_synproxy_ipv6_fini); #endif /* CONFIG_IPV6 */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_DESCRIPTION("nftables SYNPROXY expression support"); |
| 3 3 6 1 5 5 5 5 4 3 1 5 4 1 3 5 9 2 1 6 9 6 5 5 1 5 5 9 9 1 8 8 1 6 9 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 | // SPDX-License-Identifier: GPL-2.0-or-later /* * HID driver for Kye/Genius devices not fully compliant with HID standard * * Copyright (c) 2009 Jiri Kosina * Copyright (c) 2009 Tomas Hanak * Copyright (c) 2012 Nikolai Kondrashov * Copyright (c) 2023 David Yang */ #include <linux/unaligned.h> #include <linux/device.h> #include <linux/hid.h> #include <linux/module.h> #include "hid-ids.h" /* Data gathered from Database/VID0458_PID????/Vista/TBoard/default.xml in ioTablet driver * * TODO: * - Add battery and sleep support for EasyPen M406W and MousePen M508WX * - Investigate ScrollZ.MiceFMT buttons of EasyPen M406 */ static const __u8 easypen_m406_control_rdesc[] = { 0x05, 0x0C, /* Usage Page (Consumer), */ 0x09, 0x01, /* Usage (Consumer Control), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x12, /* Report ID (18), */ 0x0A, 0x45, 0x02, /* Usage (AC Rotate), */ 0x09, 0x40, /* Usage (Menu), */ 0x0A, 0x2F, 0x02, /* Usage (AC Zoom), */ 0x0A, 0x46, 0x02, /* Usage (AC Resize), */ 0x0A, 0x1A, 0x02, /* Usage (AC Undo), */ 0x0A, 0x6A, 0x02, /* Usage (AC Delete), */ 0x0A, 0x24, 0x02, /* Usage (AC Back), */ 0x0A, 0x25, 0x02, /* Usage (AC Forward), */ 0x14, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x75, 0x01, /* Report Size (1), */ 0x95, 0x08, /* Report Count (8), */ 0x81, 0x02, /* Input (Variable), */ 0x95, 0x30, /* Report Count (48), */ 0x81, 0x01, /* Input (Constant), */ 0xC0 /* End Collection */ }; static const __u8 easypen_m506_control_rdesc[] = { 0x05, 0x0C, /* Usage Page (Consumer), */ 0x09, 0x01, /* Usage (Consumer Control), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x12, /* Report ID (18), */ 0x0A, 0x6A, 0x02, /* Usage (AC Delete), */ 0x0A, 0x1A, 0x02, /* Usage (AC Undo), */ 0x0A, 0x2D, 0x02, /* Usage (AC Zoom In), */ 0x0A, 0x2E, 0x02, /* Usage (AC Zoom Out), */ 0x14, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x75, 0x01, /* Report Size (1), */ 0x95, 0x04, /* Report Count (4), */ 0x81, 0x02, /* Input (Variable), */ 0x95, 0x34, /* Report Count (52), */ 0x81, 0x01, /* Input (Constant), */ 0xC0 /* End Collection */ }; static const __u8 easypen_m406w_control_rdesc[] = { 0x05, 0x0C, /* Usage Page (Consumer), */ 0x09, 0x01, /* Usage (Consumer Control), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x12, /* Report ID (18), */ 0x0A, 0x6A, 0x02, /* Usage (AC Delete), */ 0x0A, 0x1A, 0x02, /* Usage (AC Undo), */ 0x0A, 0x01, 0x02, /* Usage (AC New), */ 0x09, 0x40, /* Usage (Menu), */ 0x14, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x75, 0x01, /* Report Size (1), */ 0x95, 0x04, /* Report Count (4), */ 0x81, 0x02, /* Input (Variable), */ 0x95, 0x34, /* Report Count (52), */ 0x81, 0x01, /* Input (Constant), */ 0xC0 /* End Collection */ }; static const __u8 easypen_m610x_control_rdesc[] = { 0x05, 0x0C, /* Usage Page (Consumer), */ 0x09, 0x01, /* Usage (Consumer Control), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x12, /* Report ID (18), */ 0x0A, 0x1A, 0x02, /* Usage (AC Undo), */ 0x0A, 0x79, 0x02, /* Usage (AC Redo Or Repeat), */ 0x0A, 0x2D, 0x02, /* Usage (AC Zoom In), */ 0x0A, 0x2E, 0x02, /* Usage (AC Zoom Out), */ 0x14, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x75, 0x01, /* Report Size (1), */ 0x95, 0x04, /* Report Count (4), */ 0x81, 0x02, /* Input (Variable), */ 0x95, 0x34, /* Report Count (52), */ 0x81, 0x01, /* Input (Constant), */ 0xC0 /* End Collection */ }; static const __u8 pensketch_m912_control_rdesc[] = { 0x05, 0x0C, /* Usage Page (Consumer), */ 0x09, 0x01, /* Usage (Consumer Control), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x12, /* Report ID (18), */ 0x14, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x75, 0x01, /* Report Size (1), */ 0x95, 0x08, /* Report Count (8), */ 0x05, 0x0C, /* Usage Page (Consumer), */ 0x0A, 0x6A, 0x02, /* Usage (AC Delete), */ 0x0A, 0x1A, 0x02, /* Usage (AC Undo), */ 0x0A, 0x01, 0x02, /* Usage (AC New), */ 0x0A, 0x2F, 0x02, /* Usage (AC Zoom), */ 0x0A, 0x25, 0x02, /* Usage (AC Forward), */ 0x0A, 0x24, 0x02, /* Usage (AC Back), */ 0x0A, 0x2D, 0x02, /* Usage (AC Zoom In), */ 0x0A, 0x2E, 0x02, /* Usage (AC Zoom Out), */ 0x81, 0x02, /* Input (Variable), */ 0x95, 0x30, /* Report Count (48), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0xC0 /* End Collection */ }; static const __u8 mousepen_m508wx_control_rdesc[] = { 0x05, 0x0C, /* Usage Page (Consumer), */ 0x09, 0x01, /* Usage (Consumer Control), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x12, /* Report ID (18), */ 0x0A, 0x1A, 0x02, /* Usage (AC Undo), */ 0x0A, 0x6A, 0x02, /* Usage (AC Delete), */ 0x0A, 0x2D, 0x02, /* Usage (AC Zoom In), */ 0x0A, 0x2E, 0x02, /* Usage (AC Zoom Out), */ 0x14, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x75, 0x01, /* Report Size (1), */ 0x95, 0x04, /* Report Count (4), */ 0x81, 0x02, /* Input (Variable), */ 0x95, 0x34, /* Report Count (52), */ 0x81, 0x01, /* Input (Constant), */ 0xC0 /* End Collection */ }; static const __u8 mousepen_m508x_control_rdesc[] = { 0x05, 0x0C, /* Usage Page (Consumer), */ 0x09, 0x01, /* Usage (Consumer Control), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x12, /* Report ID (18), */ 0x0A, 0x01, 0x02, /* Usage (AC New), */ 0x09, 0x40, /* Usage (Menu), */ 0x0A, 0x6A, 0x02, /* Usage (AC Delete), */ 0x0A, 0x1A, 0x02, /* Usage (AC Undo), */ 0x14, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x75, 0x01, /* Report Size (1), */ 0x95, 0x04, /* Report Count (4), */ 0x81, 0x02, /* Input (Variable), */ 0x81, 0x01, /* Input (Constant), */ 0x15, 0xFF, /* Logical Minimum (-1), */ 0x95, 0x10, /* Report Count (16), */ 0x81, 0x01, /* Input (Constant), */ 0x0A, 0x35, 0x02, /* Usage (AC Scroll), */ 0x0A, 0x2F, 0x02, /* Usage (AC Zoom), */ 0x0A, 0x38, 0x02, /* Usage (AC Pan), */ 0x75, 0x08, /* Report Size (8), */ 0x95, 0x03, /* Report Count (3), */ 0x81, 0x06, /* Input (Variable, Relative), */ 0x95, 0x01, /* Report Count (1), */ 0x81, 0x01, /* Input (Constant), */ 0xC0 /* End Collection */ }; static const __u8 easypen_m406xe_control_rdesc[] = { 0x05, 0x0C, /* Usage Page (Consumer), */ 0x09, 0x01, /* Usage (Consumer Control), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x12, /* Report ID (18), */ 0x14, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x75, 0x01, /* Report Size (1), */ 0x95, 0x04, /* Report Count (4), */ 0x0A, 0x79, 0x02, /* Usage (AC Redo Or Repeat), */ 0x0A, 0x1A, 0x02, /* Usage (AC Undo), */ 0x0A, 0x2D, 0x02, /* Usage (AC Zoom In), */ 0x0A, 0x2E, 0x02, /* Usage (AC Zoom Out), */ 0x81, 0x02, /* Input (Variable), */ 0x95, 0x34, /* Report Count (52), */ 0x81, 0x03, /* Input (Constant, Variable), */ 0xC0 /* End Collection */ }; static const __u8 pensketch_t609a_control_rdesc[] = { 0x05, 0x0C, /* Usage Page (Consumer), */ 0x09, 0x01, /* Usage (Consumer Control), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x12, /* Report ID (18), */ 0x0A, 0x6A, 0x02, /* Usage (AC Delete), */ 0x14, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x75, 0x01, /* Report Size (1), */ 0x95, 0x08, /* Report Count (8), */ 0x81, 0x02, /* Input (Variable), */ 0x95, 0x37, /* Report Count (55), */ 0x81, 0x01, /* Input (Constant), */ 0xC0 /* End Collection */ }; /* Fix indexes in kye_tablet_fixup() if you change this */ static const __u8 kye_tablet_rdesc[] = { 0x06, 0x00, 0xFF, /* Usage Page (FF00h), */ 0x09, 0x01, /* Usage (01h), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x05, /* Report ID (5), */ 0x09, 0x01, /* Usage (01h), */ 0x15, 0x81, /* Logical Minimum (-127), */ 0x25, 0x7F, /* Logical Maximum (127), */ 0x75, 0x08, /* Report Size (8), */ 0x95, 0x07, /* Report Count (7), */ 0xB1, 0x02, /* Feature (Variable), */ 0xC0, /* End Collection, */ 0x05, 0x0D, /* Usage Page (Digitizer), */ 0x09, 0x01, /* Usage (Digitizer), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x10, /* Report ID (16), */ 0x09, 0x20, /* Usage (Stylus), */ 0xA0, /* Collection (Physical), */ 0x09, 0x42, /* Usage (Tip Switch), */ 0x09, 0x44, /* Usage (Barrel Switch), */ 0x09, 0x46, /* Usage (Tablet Pick), */ 0x14, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x75, 0x01, /* Report Size (1), */ 0x95, 0x03, /* Report Count (3), */ 0x81, 0x02, /* Input (Variable), */ 0x95, 0x04, /* Report Count (4), */ 0x81, 0x01, /* Input (Constant), */ 0x09, 0x32, /* Usage (In Range), */ 0x95, 0x01, /* Report Count (1), */ 0x81, 0x02, /* Input (Variable), */ 0x75, 0x10, /* Report Size (16), */ 0xA4, /* Push, */ 0x05, 0x01, /* Usage Page (Desktop), */ 0x09, 0x30, /* Usage (X), */ 0x27, 0xFF, 0x7F, 0x00, 0x00, /* Logical Maximum (32767), */ 0x34, /* Physical Minimum (0), */ 0x47, 0x00, 0x00, 0x00, 0x00, /* Physical Maximum (0), */ 0x65, 0x11, /* Unit (Centimeter), */ 0x55, 0x00, /* Unit Exponent (0), */ 0x75, 0x10, /* Report Size (16), */ 0x81, 0x02, /* Input (Variable), */ 0x09, 0x31, /* Usage (Y), */ 0x27, 0xFF, 0x7F, 0x00, 0x00, /* Logical Maximum (32767), */ 0x47, 0x00, 0x00, 0x00, 0x00, /* Physical Maximum (0), */ 0x81, 0x02, /* Input (Variable), */ 0xB4, /* Pop, */ 0x05, 0x0D, /* Usage Page (Digitizer), */ 0x09, 0x30, /* Usage (Tip Pressure), */ 0x27, 0xFF, 0x07, 0x00, 0x00, /* Logical Maximum (2047), */ 0x81, 0x02, /* Input (Variable), */ 0xC0, /* End Collection, */ 0xC0 /* End Collection, */ }; /* Fix indexes in kye_tablet_fixup() if you change this */ static const __u8 kye_tablet_mouse_rdesc[] = { 0x05, 0x01, /* Usage Page (Desktop), */ 0x09, 0x02, /* Usage (Mouse), */ 0xA1, 0x01, /* Collection (Application), */ 0x85, 0x11, /* Report ID (17), */ 0x09, 0x01, /* Usage (Pointer), */ 0xA0, /* Collection (Physical), */ 0x05, 0x09, /* Usage Page (Button), */ 0x19, 0x01, /* Usage Minimum (01h), */ 0x29, 0x03, /* Usage Maximum (03h), */ 0x14, /* Logical Minimum (0), */ 0x25, 0x01, /* Logical Maximum (1), */ 0x75, 0x01, /* Report Size (1), */ 0x95, 0x03, /* Report Count (3), */ 0x81, 0x02, /* Input (Variable), */ 0x95, 0x04, /* Report Count (4), */ 0x81, 0x01, /* Input (Constant), */ 0x05, 0x0D, /* Usage Page (Digitizer), */ 0x09, 0x37, /* Usage (Data Valid), */ 0x95, 0x01, /* Report Count (1), */ 0x81, 0x02, /* Input (Variable), */ 0x05, 0x01, /* Usage Page (Desktop), */ 0xA4, /* Push, */ 0x09, 0x30, /* Usage (X), */ 0x27, 0xFF, 0x7F, 0x00, 0x00, /* Logical Maximum (32767), */ 0x34, /* Physical Minimum (0), */ 0x47, 0x00, 0x00, 0x00, 0x00, /* Physical Maximum (0), */ 0x65, 0x11, /* Unit (Centimeter), */ 0x55, 0x00, /* Unit Exponent (0), */ 0x75, 0x10, /* Report Size (16), */ 0x81, 0x02, /* Input (Variable), */ 0x09, 0x31, /* Usage (Y), */ 0x27, 0xFF, 0x7F, 0x00, 0x00, /* Logical Maximum (32767), */ 0x47, 0x00, 0x00, 0x00, 0x00, /* Physical Maximum (0), */ 0x81, 0x02, /* Input (Variable), */ 0xB4, /* Pop, */ 0x09, 0x38, /* Usage (Wheel), */ 0x15, 0xFF, /* Logical Minimum (-1), */ 0x75, 0x08, /* Report Size (8), */ 0x95, 0x01, /* Report Count (1), */ 0x81, 0x06, /* Input (Variable, Relative), */ 0x81, 0x01, /* Input (Constant), */ 0xC0, /* End Collection, */ 0xC0 /* End Collection */ }; static const struct kye_tablet_info { __u32 product; __s32 x_logical_maximum; __s32 y_logical_maximum; __s32 pressure_logical_maximum; __s32 x_physical_maximum; __s32 y_physical_maximum; __s8 unit_exponent; __s8 unit; bool has_mouse; unsigned int control_rsize; const __u8 *control_rdesc; } kye_tablets_info[] = { {USB_DEVICE_ID_KYE_EASYPEN_M406, /* 0x5005 */ 15360, 10240, 1023, 6, 4, 0, 0x13, false, sizeof(easypen_m406_control_rdesc), easypen_m406_control_rdesc}, {USB_DEVICE_ID_KYE_EASYPEN_M506, /* 0x500F */ 24576, 20480, 1023, 6, 5, 0, 0x13, false, sizeof(easypen_m506_control_rdesc), easypen_m506_control_rdesc}, {USB_DEVICE_ID_KYE_EASYPEN_I405X, /* 0x5010 */ 14080, 10240, 1023, 55, 40, -1, 0x13, false}, {USB_DEVICE_ID_KYE_MOUSEPEN_I608X, /* 0x5011 */ 20480, 15360, 2047, 8, 6, 0, 0x13, true}, {USB_DEVICE_ID_KYE_EASYPEN_M406W, /* 0x5012 */ 15360, 10240, 1023, 6, 4, 0, 0x13, false, sizeof(easypen_m406w_control_rdesc), easypen_m406w_control_rdesc}, {USB_DEVICE_ID_KYE_EASYPEN_M610X, /* 0x5013 */ 40960, 25600, 1023, 1000, 625, -2, 0x13, false, sizeof(easypen_m610x_control_rdesc), easypen_m610x_control_rdesc}, {USB_DEVICE_ID_KYE_EASYPEN_340, /* 0x5014 */ 10240, 7680, 1023, 4, 3, 0, 0x13, false}, {USB_DEVICE_ID_KYE_PENSKETCH_M912, /* 0x5015 */ 61440, 46080, 2047, 12, 9, 0, 0x13, true, sizeof(pensketch_m912_control_rdesc), pensketch_m912_control_rdesc}, {USB_DEVICE_ID_KYE_MOUSEPEN_M508WX, /* 0x5016 */ 40960, 25600, 2047, 8, 5, 0, 0x13, true, sizeof(mousepen_m508wx_control_rdesc), mousepen_m508wx_control_rdesc}, {USB_DEVICE_ID_KYE_MOUSEPEN_M508X, /* 0x5017 */ 40960, 25600, 2047, 8, 5, 0, 0x13, true, sizeof(mousepen_m508x_control_rdesc), mousepen_m508x_control_rdesc}, {USB_DEVICE_ID_KYE_EASYPEN_M406XE, /* 0x5019 */ 15360, 10240, 1023, 6, 4, 0, 0x13, false, sizeof(easypen_m406xe_control_rdesc), easypen_m406xe_control_rdesc}, {USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2, /* 0x501A */ 40960, 30720, 2047, 8, 6, 0, 0x13, true}, {USB_DEVICE_ID_KYE_PENSKETCH_T609A, /* 0x501B */ 43520, 28160, 1023, 85, 55, -1, 0x13, false, sizeof(pensketch_t609a_control_rdesc), pensketch_t609a_control_rdesc}, {} }; static __u8 *kye_consumer_control_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize, int offset, const char *device_name) { /* * the fixup that need to be done: * - change Usage Maximum in the Consumer Control * (report ID 3) to a reasonable value */ if (*rsize >= offset + 31 && /* Usage Page (Consumer Devices) */ rdesc[offset] == 0x05 && rdesc[offset + 1] == 0x0c && /* Usage (Consumer Control) */ rdesc[offset + 2] == 0x09 && rdesc[offset + 3] == 0x01 && /* Usage Maximum > 12287 */ rdesc[offset + 10] == 0x2a && rdesc[offset + 12] > 0x2f) { hid_info(hdev, "fixing up %s report descriptor\n", device_name); rdesc[offset + 12] = 0x2f; } return rdesc; } /* * Fix tablet descriptor of so-called "DataFormat 2". * * Though we may achieve a usable descriptor from original vendor-defined one, * some problems exist: * - Their Logical Maximum never exceed 32767 (7F FF), though device do report * values greater than that; * - Physical Maximums are arbitrarily filled (always equal to Logical * Maximum); * - Detail for control buttons are not provided (a vendor-defined Usage Page * with fixed content). * * Thus we use a pre-defined parameter table rather than digging it from * original descriptor. * * We may as well write a fallback routine for unrecognized kye tablet, but it's * clear kye are unlikely to produce new models in the foreseeable future, so we * simply enumerate all possible models. */ static __u8 *kye_tablet_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { const struct kye_tablet_info *info; __u8 *newdesc = rdesc; if (*rsize < sizeof(kye_tablet_rdesc)) { hid_warn(hdev, "tablet report size too small, or kye_tablet_rdesc unexpectedly large\n"); return rdesc; } for (info = kye_tablets_info; info->product; info++) { if (hdev->product == info->product) break; } if (!info->product) { hid_err(hdev, "tablet unknown, someone forget to add kye_tablet_info entry?\n"); return rdesc; } memcpy(newdesc, kye_tablet_rdesc, sizeof(kye_tablet_rdesc)); put_unaligned_le32(info->x_logical_maximum, newdesc + 66); put_unaligned_le32(info->x_physical_maximum, newdesc + 72); newdesc[77] = info->unit; newdesc[79] = info->unit_exponent; put_unaligned_le32(info->y_logical_maximum, newdesc + 87); put_unaligned_le32(info->y_physical_maximum, newdesc + 92); put_unaligned_le32(info->pressure_logical_maximum, newdesc + 104); newdesc += sizeof(kye_tablet_rdesc); if (info->has_mouse) { if (newdesc + sizeof(kye_tablet_mouse_rdesc) > rdesc + *rsize) hid_err(hdev, "control desc unexpectedly large\n"); else { memcpy(newdesc, kye_tablet_mouse_rdesc, sizeof(kye_tablet_mouse_rdesc)); put_unaligned_le32(info->x_logical_maximum, newdesc + 44); put_unaligned_le32(info->x_physical_maximum, newdesc + 50); newdesc[55] = info->unit; newdesc[57] = info->unit_exponent; put_unaligned_le32(info->y_logical_maximum, newdesc + 65); put_unaligned_le32(info->y_physical_maximum, newdesc + 70); newdesc += sizeof(kye_tablet_mouse_rdesc); } } if (info->control_rsize) { if (newdesc + info->control_rsize > rdesc + *rsize) hid_err(hdev, "control desc unexpectedly large\n"); else { memcpy(newdesc, info->control_rdesc, info->control_rsize); newdesc += info->control_rsize; } } *rsize = newdesc - rdesc; return rdesc; } static const __u8 *kye_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { switch (hdev->product) { case USB_DEVICE_ID_KYE_ERGO_525V: /* the fixups that need to be done: * - change led usage page to button for extra buttons * - report size 8 count 1 must be size 1 count 8 for button * bitfield * - change the button usage range to 4-7 for the extra * buttons */ if (*rsize >= 75 && rdesc[61] == 0x05 && rdesc[62] == 0x08 && rdesc[63] == 0x19 && rdesc[64] == 0x08 && rdesc[65] == 0x29 && rdesc[66] == 0x0f && rdesc[71] == 0x75 && rdesc[72] == 0x08 && rdesc[73] == 0x95 && rdesc[74] == 0x01) { hid_info(hdev, "fixing up Kye/Genius Ergo Mouse " "report descriptor\n"); rdesc[62] = 0x09; rdesc[64] = 0x04; rdesc[66] = 0x07; rdesc[72] = 0x01; rdesc[74] = 0x08; } break; case USB_DEVICE_ID_GENIUS_GILA_GAMING_MOUSE: rdesc = kye_consumer_control_fixup(hdev, rdesc, rsize, 104, "Genius Gila Gaming Mouse"); break; case USB_DEVICE_ID_GENIUS_MANTICORE: rdesc = kye_consumer_control_fixup(hdev, rdesc, rsize, 104, "Genius Manticore Keyboard"); break; case USB_DEVICE_ID_GENIUS_GX_IMPERATOR: rdesc = kye_consumer_control_fixup(hdev, rdesc, rsize, 83, "Genius Gx Imperator Keyboard"); break; case USB_DEVICE_ID_KYE_EASYPEN_M406: case USB_DEVICE_ID_KYE_EASYPEN_M506: case USB_DEVICE_ID_KYE_EASYPEN_I405X: case USB_DEVICE_ID_KYE_MOUSEPEN_I608X: case USB_DEVICE_ID_KYE_EASYPEN_M406W: case USB_DEVICE_ID_KYE_EASYPEN_M610X: case USB_DEVICE_ID_KYE_EASYPEN_340: case USB_DEVICE_ID_KYE_PENSKETCH_M912: case USB_DEVICE_ID_KYE_MOUSEPEN_M508WX: case USB_DEVICE_ID_KYE_MOUSEPEN_M508X: case USB_DEVICE_ID_KYE_EASYPEN_M406XE: case USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2: case USB_DEVICE_ID_KYE_PENSKETCH_T609A: rdesc = kye_tablet_fixup(hdev, rdesc, rsize); break; } return rdesc; } static int kye_tablet_enable(struct hid_device *hdev) { struct list_head *list; struct list_head *head; struct hid_report *report; __s32 *value; list = &hdev->report_enum[HID_FEATURE_REPORT].report_list; list_for_each(head, list) { report = list_entry(head, struct hid_report, list); if (report->id == 5) break; } if (head == list) { hid_err(hdev, "tablet-enabling feature report not found\n"); return -ENODEV; } if (report->maxfield < 1 || report->field[0]->report_count < 7) { hid_err(hdev, "invalid tablet-enabling feature report\n"); return -ENODEV; } value = report->field[0]->value; /* * The code is for DataFormat 2 of config xml. They have no obvious * meaning (at least not configurable in Windows driver) except enabling * fully-functional tablet mode (absolute positioning). Otherwise, the * tablet acts like a relative mouse. * * Though there're magic codes for DataFormat 3 and 4, no devices use * these DataFormats. */ value[0] = 0x12; value[1] = 0x10; value[2] = 0x11; value[3] = 0x12; value[4] = 0x00; value[5] = 0x00; value[6] = 0x00; hid_hw_request(hdev, report, HID_REQ_SET_REPORT); return 0; } static int kye_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); goto err; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); if (ret) { hid_err(hdev, "hw start failed\n"); goto err; } switch (id->product) { case USB_DEVICE_ID_GENIUS_MANTICORE: /* * The manticore keyboard needs to have all the interfaces * opened at least once to be fully functional. */ if (hid_hw_open(hdev)) hid_hw_close(hdev); break; case USB_DEVICE_ID_KYE_EASYPEN_M406: case USB_DEVICE_ID_KYE_EASYPEN_M506: case USB_DEVICE_ID_KYE_EASYPEN_I405X: case USB_DEVICE_ID_KYE_MOUSEPEN_I608X: case USB_DEVICE_ID_KYE_EASYPEN_M406W: case USB_DEVICE_ID_KYE_EASYPEN_M610X: case USB_DEVICE_ID_KYE_EASYPEN_340: case USB_DEVICE_ID_KYE_PENSKETCH_M912: case USB_DEVICE_ID_KYE_MOUSEPEN_M508WX: case USB_DEVICE_ID_KYE_MOUSEPEN_M508X: case USB_DEVICE_ID_KYE_EASYPEN_M406XE: case USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2: case USB_DEVICE_ID_KYE_PENSKETCH_T609A: ret = kye_tablet_enable(hdev); if (ret) { hid_err(hdev, "tablet enabling failed\n"); goto enabling_err; } break; } return 0; enabling_err: hid_hw_stop(hdev); err: return ret; } static const struct hid_device_id kye_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_ERGO_525V) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_GENIUS_GILA_GAMING_MOUSE) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_GENIUS_MANTICORE) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_GENIUS_GX_IMPERATOR) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M406) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M506) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_I405X) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M406W) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M610X) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_340) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_PENSKETCH_M912) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_M508WX) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_M508X) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M406XE) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_MOUSEPEN_I608X_V2) }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_PENSKETCH_T609A) }, { } }; MODULE_DEVICE_TABLE(hid, kye_devices); static struct hid_driver kye_driver = { .name = "kye", .id_table = kye_devices, .probe = kye_probe, .report_fixup = kye_report_fixup, }; module_hid_driver(kye_driver); MODULE_DESCRIPTION("HID driver for Kye/Genius devices not fully compliant with HID standard"); MODULE_LICENSE("GPL"); |
| 9490 49 71 80 123 1 25 12 938 9817 571 1074 2078 30 39 21 19 127 135 38 38 11049 3513 1061 2033 1783 2708 305 305 6 6 117 903 1 1776 1793 5 5 32 32 5 5 1008 14 34 185 52 890 110 153 18 116 98 12 8 8 1 178 739 19 92 70 71 57 72 9459 9130 1022 37 540 2 2 2 48 2101 808 9 14 399 582 133 129 130 18 26 48 33 131 131 11 121 163 164 164 47 146 2364 1970 11665 11626 608 731 10404 3652 9322 11664 11625 2308 8429 10396 2293 1740 10678 79 409 10346 51 51 51 51 50 2233 6 4 175 1612 27 16 168 1097 972 755 1736 94 171 9470 172 839 623 621 622 622 282 1538 282 1118 89 1 913 1 923 71 22 12 9473 9486 9473 74 57 57 23 124 180 135 274 9494 82 49 107 45 30 30 973 834 1 2629 2395 1798 2008 1762 23 915 1806 966 826 25 41 700 697 530 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the Interfaces handler. * * Version: @(#)dev.h 1.0.10 08/12/93 * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Corey Minyard <wf-rch!minyard@relay.EU.net> * Donald J. Becker, <becker@cesdis.gsfc.nasa.gov> * Alan Cox, <alan@lxorguk.ukuu.org.uk> * Bjorn Ekwall. <bj0rn@blox.se> * Pekka Riikonen <priikone@poseidon.pspt.fi> * * Moved to /usr/include/linux for NET3 */ #ifndef _LINUX_NETDEVICE_H #define _LINUX_NETDEVICE_H #include <linux/timer.h> #include <linux/bug.h> #include <linux/delay.h> #include <linux/atomic.h> #include <linux/prefetch.h> #include <asm/cache.h> #include <asm/byteorder.h> #include <asm/local.h> #include <linux/percpu.h> #include <linux/rculist.h> #include <linux/workqueue.h> #include <linux/dynamic_queue_limits.h> #include <net/net_namespace.h> #ifdef CONFIG_DCB #include <net/dcbnl.h> #endif #include <net/netprio_cgroup.h> #include <linux/netdev_features.h> #include <linux/neighbour.h> #include <linux/netdevice_xmit.h> #include <uapi/linux/netdevice.h> #include <uapi/linux/if_bonding.h> #include <uapi/linux/pkt_cls.h> #include <uapi/linux/netdev.h> #include <linux/hashtable.h> #include <linux/rbtree.h> #include <net/net_trackers.h> #include <net/net_debug.h> #include <net/dropreason-core.h> #include <net/neighbour_tables.h> struct netpoll_info; struct device; struct ethtool_ops; struct kernel_hwtstamp_config; struct phy_device; struct dsa_port; struct ip_tunnel_parm_kern; struct macsec_context; struct macsec_ops; struct netdev_config; struct netdev_name_node; struct sd_flow_limit; struct sfp_bus; /* 802.11 specific */ struct wireless_dev; /* 802.15.4 specific */ struct wpan_dev; struct mpls_dev; /* UDP Tunnel offloads */ struct udp_tunnel_info; struct udp_tunnel_nic_info; struct udp_tunnel_nic; struct bpf_prog; struct xdp_buff; struct xdp_frame; struct xdp_metadata_ops; struct xdp_md; struct ethtool_netdev_state; struct phy_link_topology; struct hwtstamp_provider; typedef u32 xdp_features_t; void synchronize_net(void); void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); void netdev_sw_irq_coalesce_default_on(struct net_device *dev); /* Backlog congestion levels */ #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ #define NET_RX_DROP 1 /* packet dropped */ #define MAX_NEST_DEV 8 /* * Transmit return codes: transmit return codes originate from three different * namespaces: * * - qdisc return codes * - driver transmit return codes * - errno values * * Drivers are allowed to return any one of those in their hard_start_xmit() * function. Real network devices commonly used with qdiscs should only return * the driver transmit return codes though - when qdiscs are used, the actual * transmission happens asynchronously, so the value is not propagated to * higher layers. Virtual network devices transmit synchronously; in this case * the driver transmit return codes are consumed by dev_queue_xmit(), and all * others are propagated to higher layers. */ /* qdisc ->enqueue() return codes. */ #define NET_XMIT_SUCCESS 0x00 #define NET_XMIT_DROP 0x01 /* skb dropped */ #define NET_XMIT_CN 0x02 /* congestion notification */ #define NET_XMIT_MASK 0x0f /* qdisc flags in net/sch_generic.h */ /* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It * indicates that the device will soon be dropping packets, or already drops * some packets of the same priority; prompting us to send less aggressively. */ #define net_xmit_eval(e) ((e) == NET_XMIT_CN ? 0 : (e)) #define net_xmit_errno(e) ((e) != NET_XMIT_CN ? -ENOBUFS : 0) /* Driver transmit return codes */ #define NETDEV_TX_MASK 0xf0 enum netdev_tx { __NETDEV_TX_MIN = INT_MIN, /* make sure enum is signed */ NETDEV_TX_OK = 0x00, /* driver took care of packet */ NETDEV_TX_BUSY = 0x10, /* driver tx path was busy*/ }; typedef enum netdev_tx netdev_tx_t; /* * Current order: NETDEV_TX_MASK > NET_XMIT_MASK >= 0 is significant; * hard_start_xmit() return < NET_XMIT_MASK means skb was consumed. */ static inline bool dev_xmit_complete(int rc) { /* * Positive cases with an skb consumed by a driver: * - successful transmission (rc == NETDEV_TX_OK) * - error while transmitting (rc < 0) * - error while queueing to a different device (rc & NET_XMIT_MASK) */ if (likely(rc < NET_XMIT_MASK)) return true; return false; } /* * Compute the worst-case header length according to the protocols * used. */ #if defined(CONFIG_HYPERV_NET) # define LL_MAX_HEADER 128 #elif defined(CONFIG_WLAN) || IS_ENABLED(CONFIG_AX25) # if defined(CONFIG_MAC80211_MESH) # define LL_MAX_HEADER 128 # else # define LL_MAX_HEADER 96 # endif #else # define LL_MAX_HEADER 32 #endif #if !IS_ENABLED(CONFIG_NET_IPIP) && !IS_ENABLED(CONFIG_NET_IPGRE) && \ !IS_ENABLED(CONFIG_IPV6_SIT) && !IS_ENABLED(CONFIG_IPV6_TUNNEL) #define MAX_HEADER LL_MAX_HEADER #else #define MAX_HEADER (LL_MAX_HEADER + 48) #endif /* * Old network device statistics. Fields are native words * (unsigned long) so they can be read and written atomically. */ #define NET_DEV_STAT(FIELD) \ union { \ unsigned long FIELD; \ atomic_long_t __##FIELD; \ } struct net_device_stats { NET_DEV_STAT(rx_packets); NET_DEV_STAT(tx_packets); NET_DEV_STAT(rx_bytes); NET_DEV_STAT(tx_bytes); NET_DEV_STAT(rx_errors); NET_DEV_STAT(tx_errors); NET_DEV_STAT(rx_dropped); NET_DEV_STAT(tx_dropped); NET_DEV_STAT(multicast); NET_DEV_STAT(collisions); NET_DEV_STAT(rx_length_errors); NET_DEV_STAT(rx_over_errors); NET_DEV_STAT(rx_crc_errors); NET_DEV_STAT(rx_frame_errors); NET_DEV_STAT(rx_fifo_errors); NET_DEV_STAT(rx_missed_errors); NET_DEV_STAT(tx_aborted_errors); NET_DEV_STAT(tx_carrier_errors); NET_DEV_STAT(tx_fifo_errors); NET_DEV_STAT(tx_heartbeat_errors); NET_DEV_STAT(tx_window_errors); NET_DEV_STAT(rx_compressed); NET_DEV_STAT(tx_compressed); }; #undef NET_DEV_STAT /* per-cpu stats, allocated on demand. * Try to fit them in a single cache line, for dev_get_stats() sake. */ struct net_device_core_stats { unsigned long rx_dropped; unsigned long tx_dropped; unsigned long rx_nohandler; unsigned long rx_otherhost_dropped; } __aligned(4 * sizeof(unsigned long)); #include <linux/cache.h> #include <linux/skbuff.h> struct neighbour; struct neigh_parms; struct sk_buff; struct netdev_hw_addr { struct list_head list; struct rb_node node; unsigned char addr[MAX_ADDR_LEN]; unsigned char type; #define NETDEV_HW_ADDR_T_LAN 1 #define NETDEV_HW_ADDR_T_SAN 2 #define NETDEV_HW_ADDR_T_UNICAST 3 #define NETDEV_HW_ADDR_T_MULTICAST 4 bool global_use; int sync_cnt; int refcount; int synced; struct rcu_head rcu_head; }; struct netdev_hw_addr_list { struct list_head list; int count; /* Auxiliary tree for faster lookup on addition and deletion */ struct rb_root tree; }; #define netdev_hw_addr_list_count(l) ((l)->count) #define netdev_hw_addr_list_empty(l) (netdev_hw_addr_list_count(l) == 0) #define netdev_hw_addr_list_for_each(ha, l) \ list_for_each_entry(ha, &(l)->list, list) #define netdev_uc_count(dev) netdev_hw_addr_list_count(&(dev)->uc) #define netdev_uc_empty(dev) netdev_hw_addr_list_empty(&(dev)->uc) #define netdev_for_each_uc_addr(ha, dev) \ netdev_hw_addr_list_for_each(ha, &(dev)->uc) #define netdev_for_each_synced_uc_addr(_ha, _dev) \ netdev_for_each_uc_addr((_ha), (_dev)) \ if ((_ha)->sync_cnt) #define netdev_mc_count(dev) netdev_hw_addr_list_count(&(dev)->mc) #define netdev_mc_empty(dev) netdev_hw_addr_list_empty(&(dev)->mc) #define netdev_for_each_mc_addr(ha, dev) \ netdev_hw_addr_list_for_each(ha, &(dev)->mc) #define netdev_for_each_synced_mc_addr(_ha, _dev) \ netdev_for_each_mc_addr((_ha), (_dev)) \ if ((_ha)->sync_cnt) struct hh_cache { unsigned int hh_len; seqlock_t hh_lock; /* cached hardware header; allow for machine alignment needs. */ #define HH_DATA_MOD 16 #define HH_DATA_OFF(__len) \ (HH_DATA_MOD - (((__len - 1) & (HH_DATA_MOD - 1)) + 1)) #define HH_DATA_ALIGN(__len) \ (((__len)+(HH_DATA_MOD-1))&~(HH_DATA_MOD - 1)) unsigned long hh_data[HH_DATA_ALIGN(LL_MAX_HEADER) / sizeof(long)]; }; /* Reserve HH_DATA_MOD byte-aligned hard_header_len, but at least that much. * Alternative is: * dev->hard_header_len ? (dev->hard_header_len + * (HH_DATA_MOD - 1)) & ~(HH_DATA_MOD - 1) : 0 * * We could use other alignment values, but we must maintain the * relationship HH alignment <= LL alignment. */ #define LL_RESERVED_SPACE(dev) \ ((((dev)->hard_header_len + READ_ONCE((dev)->needed_headroom)) \ & ~(HH_DATA_MOD - 1)) + HH_DATA_MOD) #define LL_RESERVED_SPACE_EXTRA(dev,extra) \ ((((dev)->hard_header_len + READ_ONCE((dev)->needed_headroom) + (extra)) \ & ~(HH_DATA_MOD - 1)) + HH_DATA_MOD) struct header_ops { int (*create) (struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len); int (*parse)(const struct sk_buff *skb, unsigned char *haddr); int (*cache)(const struct neighbour *neigh, struct hh_cache *hh, __be16 type); void (*cache_update)(struct hh_cache *hh, const struct net_device *dev, const unsigned char *haddr); bool (*validate)(const char *ll_header, unsigned int len); __be16 (*parse_protocol)(const struct sk_buff *skb); }; /* These flag bits are private to the generic network queueing * layer; they may not be explicitly referenced by any other * code. */ enum netdev_state_t { __LINK_STATE_START, __LINK_STATE_PRESENT, __LINK_STATE_NOCARRIER, __LINK_STATE_LINKWATCH_PENDING, __LINK_STATE_DORMANT, __LINK_STATE_TESTING, }; struct gro_list { struct list_head list; int count; }; /* * size of gro hash buckets, must be <= the number of bits in * gro_node::bitmask */ #define GRO_HASH_BUCKETS 8 /** * struct gro_node - structure to support Generic Receive Offload * @bitmask: bitmask to indicate used buckets in @hash * @hash: hashtable of pending aggregated skbs, separated by flows * @rx_list: list of pending ``GRO_NORMAL`` skbs * @rx_count: cached current length of @rx_list * @cached_napi_id: napi_struct::napi_id cached for hotpath, 0 for standalone */ struct gro_node { unsigned long bitmask; struct gro_list hash[GRO_HASH_BUCKETS]; struct list_head rx_list; u32 rx_count; u32 cached_napi_id; }; /* * Structure for per-NAPI config */ struct napi_config { u64 gro_flush_timeout; u64 irq_suspend_timeout; u32 defer_hard_irqs; cpumask_t affinity_mask; unsigned int napi_id; }; /* * Structure for NAPI scheduling similar to tasklet but with weighting */ struct napi_struct { /* The poll_list must only be managed by the entity which * changes the state of the NAPI_STATE_SCHED bit. This means * whoever atomically sets that bit can add this napi_struct * to the per-CPU poll_list, and whoever clears that bit * can remove from the list right before clearing the bit. */ struct list_head poll_list; unsigned long state; int weight; u32 defer_hard_irqs_count; int (*poll)(struct napi_struct *, int); #ifdef CONFIG_NETPOLL /* CPU actively polling if netpoll is configured */ int poll_owner; #endif /* CPU on which NAPI has been scheduled for processing */ int list_owner; struct net_device *dev; struct sk_buff *skb; struct gro_node gro; struct hrtimer timer; /* all fields past this point are write-protected by netdev_lock */ struct task_struct *thread; unsigned long gro_flush_timeout; unsigned long irq_suspend_timeout; u32 defer_hard_irqs; /* control-path-only fields follow */ u32 napi_id; struct list_head dev_list; struct hlist_node napi_hash_node; int irq; struct irq_affinity_notify notify; int napi_rmap_idx; int index; struct napi_config *config; }; enum { NAPI_STATE_SCHED, /* Poll is scheduled */ NAPI_STATE_MISSED, /* reschedule a napi */ NAPI_STATE_DISABLE, /* Disable pending */ NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ NAPI_STATE_LISTED, /* NAPI added to system lists */ NAPI_STATE_NO_BUSY_POLL, /* Do not add in napi_hash, no busy polling */ NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() owns this NAPI */ NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/ NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/ NAPI_STATE_SCHED_THREADED, /* Napi is currently scheduled in threaded mode */ NAPI_STATE_HAS_NOTIFIER, /* Napi has an IRQ notifier */ }; enum { NAPIF_STATE_SCHED = BIT(NAPI_STATE_SCHED), NAPIF_STATE_MISSED = BIT(NAPI_STATE_MISSED), NAPIF_STATE_DISABLE = BIT(NAPI_STATE_DISABLE), NAPIF_STATE_NPSVC = BIT(NAPI_STATE_NPSVC), NAPIF_STATE_LISTED = BIT(NAPI_STATE_LISTED), NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL), NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED), NAPIF_STATE_SCHED_THREADED = BIT(NAPI_STATE_SCHED_THREADED), NAPIF_STATE_HAS_NOTIFIER = BIT(NAPI_STATE_HAS_NOTIFIER), }; enum gro_result { GRO_MERGED, GRO_MERGED_FREE, GRO_HELD, GRO_NORMAL, GRO_CONSUMED, }; typedef enum gro_result gro_result_t; /* * enum rx_handler_result - Possible return values for rx_handlers. * @RX_HANDLER_CONSUMED: skb was consumed by rx_handler, do not process it * further. * @RX_HANDLER_ANOTHER: Do another round in receive path. This is indicated in * case skb->dev was changed by rx_handler. * @RX_HANDLER_EXACT: Force exact delivery, no wildcard. * @RX_HANDLER_PASS: Do nothing, pass the skb as if no rx_handler was called. * * rx_handlers are functions called from inside __netif_receive_skb(), to do * special processing of the skb, prior to delivery to protocol handlers. * * Currently, a net_device can only have a single rx_handler registered. Trying * to register a second rx_handler will return -EBUSY. * * To register a rx_handler on a net_device, use netdev_rx_handler_register(). * To unregister a rx_handler on a net_device, use * netdev_rx_handler_unregister(). * * Upon return, rx_handler is expected to tell __netif_receive_skb() what to * do with the skb. * * If the rx_handler consumed the skb in some way, it should return * RX_HANDLER_CONSUMED. This is appropriate when the rx_handler arranged for * the skb to be delivered in some other way. * * If the rx_handler changed skb->dev, to divert the skb to another * net_device, it should return RX_HANDLER_ANOTHER. The rx_handler for the * new device will be called if it exists. * * If the rx_handler decides the skb should be ignored, it should return * RX_HANDLER_EXACT. The skb will only be delivered to protocol handlers that * are registered on exact device (ptype->dev == skb->dev). * * If the rx_handler didn't change skb->dev, but wants the skb to be normally * delivered, it should return RX_HANDLER_PASS. * * A device without a registered rx_handler will behave as if rx_handler * returned RX_HANDLER_PASS. */ enum rx_handler_result { RX_HANDLER_CONSUMED, RX_HANDLER_ANOTHER, RX_HANDLER_EXACT, RX_HANDLER_PASS, }; typedef enum rx_handler_result rx_handler_result_t; typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb); void __napi_schedule(struct napi_struct *n); void __napi_schedule_irqoff(struct napi_struct *n); static inline bool napi_disable_pending(struct napi_struct *n) { return test_bit(NAPI_STATE_DISABLE, &n->state); } static inline bool napi_prefer_busy_poll(struct napi_struct *n) { return test_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state); } /** * napi_is_scheduled - test if NAPI is scheduled * @n: NAPI context * * This check is "best-effort". With no locking implemented, * a NAPI can be scheduled or terminate right after this check * and produce not precise results. * * NAPI_STATE_SCHED is an internal state, napi_is_scheduled * should not be used normally and napi_schedule should be * used instead. * * Use only if the driver really needs to check if a NAPI * is scheduled for example in the context of delayed timer * that can be skipped if a NAPI is already scheduled. * * Return: True if NAPI is scheduled, False otherwise. */ static inline bool napi_is_scheduled(struct napi_struct *n) { return test_bit(NAPI_STATE_SCHED, &n->state); } bool napi_schedule_prep(struct napi_struct *n); /** * napi_schedule - schedule NAPI poll * @n: NAPI context * * Schedule NAPI poll routine to be called if it is not already * running. * Return: true if we schedule a NAPI or false if not. * Refer to napi_schedule_prep() for additional reason on why * a NAPI might not be scheduled. */ static inline bool napi_schedule(struct napi_struct *n) { if (napi_schedule_prep(n)) { __napi_schedule(n); return true; } return false; } /** * napi_schedule_irqoff - schedule NAPI poll * @n: NAPI context * * Variant of napi_schedule(), assuming hard irqs are masked. */ static inline void napi_schedule_irqoff(struct napi_struct *n) { if (napi_schedule_prep(n)) __napi_schedule_irqoff(n); } /** * napi_complete_done - NAPI processing complete * @n: NAPI context * @work_done: number of packets processed * * Mark NAPI processing as complete. Should only be called if poll budget * has not been completely consumed. * Prefer over napi_complete(). * Return: false if device should avoid rearming interrupts. */ bool napi_complete_done(struct napi_struct *n, int work_done); static inline bool napi_complete(struct napi_struct *n) { return napi_complete_done(n, 0); } int dev_set_threaded(struct net_device *dev, bool threaded); void napi_disable(struct napi_struct *n); void napi_disable_locked(struct napi_struct *n); void napi_enable(struct napi_struct *n); void napi_enable_locked(struct napi_struct *n); /** * napi_synchronize - wait until NAPI is not running * @n: NAPI context * * Wait until NAPI is done being scheduled on this context. * Waits till any outstanding processing completes but * does not disable future activations. */ static inline void napi_synchronize(const struct napi_struct *n) { if (IS_ENABLED(CONFIG_SMP)) while (test_bit(NAPI_STATE_SCHED, &n->state)) msleep(1); else barrier(); } /** * napi_if_scheduled_mark_missed - if napi is running, set the * NAPIF_STATE_MISSED * @n: NAPI context * * If napi is running, set the NAPIF_STATE_MISSED, and return true if * NAPI is scheduled. **/ static inline bool napi_if_scheduled_mark_missed(struct napi_struct *n) { unsigned long val, new; val = READ_ONCE(n->state); do { if (val & NAPIF_STATE_DISABLE) return true; if (!(val & NAPIF_STATE_SCHED)) return false; new = val | NAPIF_STATE_MISSED; } while (!try_cmpxchg(&n->state, &val, new)); return true; } enum netdev_queue_state_t { __QUEUE_STATE_DRV_XOFF, __QUEUE_STATE_STACK_XOFF, __QUEUE_STATE_FROZEN, }; #define QUEUE_STATE_DRV_XOFF (1 << __QUEUE_STATE_DRV_XOFF) #define QUEUE_STATE_STACK_XOFF (1 << __QUEUE_STATE_STACK_XOFF) #define QUEUE_STATE_FROZEN (1 << __QUEUE_STATE_FROZEN) #define QUEUE_STATE_ANY_XOFF (QUEUE_STATE_DRV_XOFF | QUEUE_STATE_STACK_XOFF) #define QUEUE_STATE_ANY_XOFF_OR_FROZEN (QUEUE_STATE_ANY_XOFF | \ QUEUE_STATE_FROZEN) #define QUEUE_STATE_DRV_XOFF_OR_FROZEN (QUEUE_STATE_DRV_XOFF | \ QUEUE_STATE_FROZEN) /* * __QUEUE_STATE_DRV_XOFF is used by drivers to stop the transmit queue. The * netif_tx_* functions below are used to manipulate this flag. The * __QUEUE_STATE_STACK_XOFF flag is used by the stack to stop the transmit * queue independently. The netif_xmit_*stopped functions below are called * to check if the queue has been stopped by the driver or stack (either * of the XOFF bits are set in the state). Drivers should not need to call * netif_xmit*stopped functions, they should only be using netif_tx_*. */ struct netdev_queue { /* * read-mostly part */ struct net_device *dev; netdevice_tracker dev_tracker; struct Qdisc __rcu *qdisc; struct Qdisc __rcu *qdisc_sleeping; #ifdef CONFIG_SYSFS struct kobject kobj; const struct attribute_group **groups; #endif unsigned long tx_maxrate; /* * Number of TX timeouts for this queue * (/sys/class/net/DEV/Q/trans_timeout) */ atomic_long_t trans_timeout; /* Subordinate device that the queue has been assigned to */ struct net_device *sb_dev; #ifdef CONFIG_XDP_SOCKETS /* "ops protected", see comment about net_device::lock */ struct xsk_buff_pool *pool; #endif /* * write-mostly part */ #ifdef CONFIG_BQL struct dql dql; #endif spinlock_t _xmit_lock ____cacheline_aligned_in_smp; int xmit_lock_owner; /* * Time (in jiffies) of last Tx */ unsigned long trans_start; unsigned long state; /* * slow- / control-path part */ /* NAPI instance for the queue * "ops protected", see comment about net_device::lock */ struct napi_struct *napi; #if defined(CONFIG_XPS) && defined(CONFIG_NUMA) int numa_node; #endif } ____cacheline_aligned_in_smp; extern int sysctl_fb_tunnels_only_for_init_net; extern int sysctl_devconf_inherit_init_net; /* * sysctl_fb_tunnels_only_for_init_net == 0 : For all netns * == 1 : For initns only * == 2 : For none. */ static inline bool net_has_fallback_tunnels(const struct net *net) { #if IS_ENABLED(CONFIG_SYSCTL) int fb_tunnels_only_for_init_net = READ_ONCE(sysctl_fb_tunnels_only_for_init_net); return !fb_tunnels_only_for_init_net || (net_eq(net, &init_net) && fb_tunnels_only_for_init_net == 1); #else return true; #endif } static inline int net_inherit_devconf(void) { #if IS_ENABLED(CONFIG_SYSCTL) return READ_ONCE(sysctl_devconf_inherit_init_net); #else return 0; #endif } static inline int netdev_queue_numa_node_read(const struct netdev_queue *q) { #if defined(CONFIG_XPS) && defined(CONFIG_NUMA) return q->numa_node; #else return NUMA_NO_NODE; #endif } static inline void netdev_queue_numa_node_write(struct netdev_queue *q, int node) { #if defined(CONFIG_XPS) && defined(CONFIG_NUMA) q->numa_node = node; #endif } #ifdef CONFIG_RFS_ACCEL bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id, u16 filter_id); #endif /* XPS map type and offset of the xps map within net_device->xps_maps[]. */ enum xps_map_type { XPS_CPUS = 0, XPS_RXQS, XPS_MAPS_MAX, }; #ifdef CONFIG_XPS /* * This structure holds an XPS map which can be of variable length. The * map is an array of queues. */ struct xps_map { unsigned int len; unsigned int alloc_len; struct rcu_head rcu; u16 queues[]; }; #define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + ((_num) * sizeof(u16))) #define XPS_MIN_MAP_ALLOC ((L1_CACHE_ALIGN(offsetof(struct xps_map, queues[1])) \ - sizeof(struct xps_map)) / sizeof(u16)) /* * This structure holds all XPS maps for device. Maps are indexed by CPU. * * We keep track of the number of cpus/rxqs used when the struct is allocated, * in nr_ids. This will help not accessing out-of-bound memory. * * We keep track of the number of traffic classes used when the struct is * allocated, in num_tc. This will be used to navigate the maps, to ensure we're * not crossing its upper bound, as the original dev->num_tc can be updated in * the meantime. */ struct xps_dev_maps { struct rcu_head rcu; unsigned int nr_ids; s16 num_tc; struct xps_map __rcu *attr_map[]; /* Either CPUs map or RXQs map */ }; #define XPS_CPU_DEV_MAPS_SIZE(_tcs) (sizeof(struct xps_dev_maps) + \ (nr_cpu_ids * (_tcs) * sizeof(struct xps_map *))) #define XPS_RXQ_DEV_MAPS_SIZE(_tcs, _rxqs) (sizeof(struct xps_dev_maps) +\ (_rxqs * (_tcs) * sizeof(struct xps_map *))) #endif /* CONFIG_XPS */ #define TC_MAX_QUEUE 16 #define TC_BITMASK 15 /* HW offloaded queuing disciplines txq count and offset maps */ struct netdev_tc_txq { u16 count; u16 offset; }; #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) /* * This structure is to hold information about the device * configured to run FCoE protocol stack. */ struct netdev_fcoe_hbainfo { char manufacturer[64]; char serial_number[64]; char hardware_version[64]; char driver_version[64]; char optionrom_version[64]; char firmware_version[64]; char model[256]; char model_description[256]; }; #endif #define MAX_PHYS_ITEM_ID_LEN 32 /* This structure holds a unique identifier to identify some * physical item (port for example) used by a netdevice. */ struct netdev_phys_item_id { unsigned char id[MAX_PHYS_ITEM_ID_LEN]; unsigned char id_len; }; static inline bool netdev_phys_item_id_same(struct netdev_phys_item_id *a, struct netdev_phys_item_id *b) { return a->id_len == b->id_len && memcmp(a->id, b->id, a->id_len) == 0; } typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); enum net_device_path_type { DEV_PATH_ETHERNET = 0, DEV_PATH_VLAN, DEV_PATH_BRIDGE, DEV_PATH_PPPOE, DEV_PATH_DSA, DEV_PATH_MTK_WDMA, }; struct net_device_path { enum net_device_path_type type; const struct net_device *dev; union { struct { u16 id; __be16 proto; u8 h_dest[ETH_ALEN]; } encap; struct { enum { DEV_PATH_BR_VLAN_KEEP, DEV_PATH_BR_VLAN_TAG, DEV_PATH_BR_VLAN_UNTAG, DEV_PATH_BR_VLAN_UNTAG_HW, } vlan_mode; u16 vlan_id; __be16 vlan_proto; } bridge; struct { int port; u16 proto; } dsa; struct { u8 wdma_idx; u8 queue; u16 wcid; u8 bss; u8 amsdu; } mtk_wdma; }; }; #define NET_DEVICE_PATH_STACK_MAX 5 #define NET_DEVICE_PATH_VLAN_MAX 2 struct net_device_path_stack { int num_paths; struct net_device_path path[NET_DEVICE_PATH_STACK_MAX]; }; struct net_device_path_ctx { const struct net_device *dev; u8 daddr[ETH_ALEN]; int num_vlans; struct { u16 id; __be16 proto; } vlan[NET_DEVICE_PATH_VLAN_MAX]; }; enum tc_setup_type { TC_QUERY_CAPS, TC_SETUP_QDISC_MQPRIO, TC_SETUP_CLSU32, TC_SETUP_CLSFLOWER, TC_SETUP_CLSMATCHALL, TC_SETUP_CLSBPF, TC_SETUP_BLOCK, TC_SETUP_QDISC_CBS, TC_SETUP_QDISC_RED, TC_SETUP_QDISC_PRIO, TC_SETUP_QDISC_MQ, TC_SETUP_QDISC_ETF, TC_SETUP_ROOT_QDISC, TC_SETUP_QDISC_GRED, TC_SETUP_QDISC_TAPRIO, TC_SETUP_FT, TC_SETUP_QDISC_ETS, TC_SETUP_QDISC_TBF, TC_SETUP_QDISC_FIFO, TC_SETUP_QDISC_HTB, TC_SETUP_ACT, }; /* These structures hold the attributes of bpf state that are being passed * to the netdevice through the bpf op. */ enum bpf_netdev_command { /* Set or clear a bpf program used in the earliest stages of packet * rx. The prog will have been loaded as BPF_PROG_TYPE_XDP. The callee * is responsible for calling bpf_prog_put on any old progs that are * stored. In case of error, the callee need not release the new prog * reference, but on success it takes ownership and must bpf_prog_put * when it is no longer used. */ XDP_SETUP_PROG, XDP_SETUP_PROG_HW, /* BPF program for offload callbacks, invoked at program load time. */ BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE, XDP_SETUP_XSK_POOL, }; struct bpf_prog_offload_ops; struct netlink_ext_ack; struct xdp_umem; struct xdp_dev_bulk_queue; struct bpf_xdp_link; enum bpf_xdp_mode { XDP_MODE_SKB = 0, XDP_MODE_DRV = 1, XDP_MODE_HW = 2, __MAX_XDP_MODE }; struct bpf_xdp_entity { struct bpf_prog *prog; struct bpf_xdp_link *link; }; struct netdev_bpf { enum bpf_netdev_command command; union { /* XDP_SETUP_PROG */ struct { u32 flags; struct bpf_prog *prog; struct netlink_ext_ack *extack; }; /* BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE */ struct { struct bpf_offloaded_map *offmap; }; /* XDP_SETUP_XSK_POOL */ struct { struct xsk_buff_pool *pool; u16 queue_id; } xsk; }; }; /* Flags for ndo_xsk_wakeup. */ #define XDP_WAKEUP_RX (1 << 0) #define XDP_WAKEUP_TX (1 << 1) #ifdef CONFIG_XFRM_OFFLOAD struct xfrmdev_ops { int (*xdo_dev_state_add)(struct net_device *dev, struct xfrm_state *x, struct netlink_ext_ack *extack); void (*xdo_dev_state_delete)(struct net_device *dev, struct xfrm_state *x); void (*xdo_dev_state_free)(struct net_device *dev, struct xfrm_state *x); bool (*xdo_dev_offload_ok) (struct sk_buff *skb, struct xfrm_state *x); void (*xdo_dev_state_advance_esn) (struct xfrm_state *x); void (*xdo_dev_state_update_stats) (struct xfrm_state *x); int (*xdo_dev_policy_add) (struct xfrm_policy *x, struct netlink_ext_ack *extack); void (*xdo_dev_policy_delete) (struct xfrm_policy *x); void (*xdo_dev_policy_free) (struct xfrm_policy *x); }; #endif struct dev_ifalias { struct rcu_head rcuhead; char ifalias[]; }; struct devlink; struct tlsdev_ops; struct netdev_net_notifier { struct list_head list; struct notifier_block *nb; }; /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are * optional and can be filled with a null pointer. * * int (*ndo_init)(struct net_device *dev); * This function is called once when a network device is registered. * The network device can use this for any late stage initialization * or semantic validation. It can fail with an error code which will * be propagated back to register_netdev. * * void (*ndo_uninit)(struct net_device *dev); * This function is called when device is unregistered or when registration * fails. It is not called if init fails. * * int (*ndo_open)(struct net_device *dev); * This function is called when a network device transitions to the up * state. * * int (*ndo_stop)(struct net_device *dev); * This function is called when a network device transitions to the down * state. * * netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb, * struct net_device *dev); * Called when a packet needs to be transmitted. * Returns NETDEV_TX_OK. Can return NETDEV_TX_BUSY, but you should stop * the queue before that can happen; it's for obsolete devices and weird * corner cases, but the stack really does a non-trivial amount * of useless work if you return NETDEV_TX_BUSY. * Required; cannot be NULL. * * netdev_features_t (*ndo_features_check)(struct sk_buff *skb, * struct net_device *dev * netdev_features_t features); * Called by core transmit path to determine if device is capable of * performing offload operations on a given packet. This is to give * the device an opportunity to implement any restrictions that cannot * be otherwise expressed by feature flags. The check is called with * the set of features that the stack has calculated and it returns * those the driver believes to be appropriate. * * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, * struct net_device *sb_dev); * Called to decide which queue to use when device supports multiple * transmit queues. * * void (*ndo_change_rx_flags)(struct net_device *dev, int flags); * This function is called to allow device receiver to make * changes to configuration when multicast or promiscuous is enabled. * * void (*ndo_set_rx_mode)(struct net_device *dev); * This function is called device changes address list filtering. * If driver handles unicast address filtering, it should set * IFF_UNICAST_FLT in its priv_flags. * * int (*ndo_set_mac_address)(struct net_device *dev, void *addr); * This function is called when the Media Access Control address * needs to be changed. If this interface is not defined, the * MAC address can not be changed. * * int (*ndo_validate_addr)(struct net_device *dev); * Test if Media Access Control address is valid for the device. * * int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); * Old-style ioctl entry point. This is used internally by the * ieee802154 subsystem but is no longer called by the device * ioctl handler. * * int (*ndo_siocbond)(struct net_device *dev, struct ifreq *ifr, int cmd); * Used by the bonding driver for its device specific ioctls: * SIOCBONDENSLAVE, SIOCBONDRELEASE, SIOCBONDSETHWADDR, SIOCBONDCHANGEACTIVE, * SIOCBONDSLAVEINFOQUERY, and SIOCBONDINFOQUERY * * * int (*ndo_eth_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); * Called for ethernet specific ioctls: SIOCGMIIPHY, SIOCGMIIREG, * SIOCSMIIREG, SIOCSHWTSTAMP and SIOCGHWTSTAMP. * * int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); * Used to set network devices bus interface parameters. This interface * is retained for legacy reasons; new devices should use the bus * interface (PCI) for low level management. * * int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); * Called when a user wants to change the Maximum Transfer Unit * of a device. * * void (*ndo_tx_timeout)(struct net_device *dev, unsigned int txqueue); * Callback used when the transmitter has not made any progress * for dev->watchdog ticks. * * void (*ndo_get_stats64)(struct net_device *dev, * struct rtnl_link_stats64 *storage); * struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); * Called when a user wants to get the network device usage * statistics. Drivers must do one of the following: * 1. Define @ndo_get_stats64 to fill in a zero-initialised * rtnl_link_stats64 structure passed by the caller. * 2. Define @ndo_get_stats to update a net_device_stats structure * (which should normally be dev->stats) and return a pointer to * it. The structure may be changed asynchronously only if each * field is written atomically. * 3. Update dev->stats asynchronously and atomically, and define * neither operation. * * bool (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id) * Return true if this device supports offload stats of this attr_id. * * int (*ndo_get_offload_stats)(int attr_id, const struct net_device *dev, * void *attr_data) * Get statistics for offload operations by attr_id. Write it into the * attr_data pointer. * * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16 vid); * If device supports VLAN filtering this function is called when a * VLAN id is registered. * * int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, __be16 proto, u16 vid); * If device supports VLAN filtering this function is called when a * VLAN id is unregistered. * * void (*ndo_poll_controller)(struct net_device *dev); * * SR-IOV management functions. * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac); * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, * u8 qos, __be16 proto); * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate, * int max_tx_rate); * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting); * int (*ndo_set_vf_trust)(struct net_device *dev, int vf, bool setting); * int (*ndo_get_vf_config)(struct net_device *dev, * int vf, struct ifla_vf_info *ivf); * int (*ndo_set_vf_link_state)(struct net_device *dev, int vf, int link_state); * int (*ndo_set_vf_port)(struct net_device *dev, int vf, * struct nlattr *port[]); * * Enable or disable the VF ability to query its RSS Redirection Table and * Hash Key. This is needed since on some devices VF share this information * with PF and querying it may introduce a theoretical security risk. * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting); * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); * int (*ndo_setup_tc)(struct net_device *dev, enum tc_setup_type type, * void *type_data); * Called to setup any 'tc' scheduler, classifier or action on @dev. * This is always called from the stack with the rtnl lock held and netif * tx queues stopped. This allows the netdevice to perform queue * management safely. * * Fiber Channel over Ethernet (FCoE) offload functions. * int (*ndo_fcoe_enable)(struct net_device *dev); * Called when the FCoE protocol stack wants to start using LLD for FCoE * so the underlying device can perform whatever needed configuration or * initialization to support acceleration of FCoE traffic. * * int (*ndo_fcoe_disable)(struct net_device *dev); * Called when the FCoE protocol stack wants to stop using LLD for FCoE * so the underlying device can perform whatever needed clean-ups to * stop supporting acceleration of FCoE traffic. * * int (*ndo_fcoe_ddp_setup)(struct net_device *dev, u16 xid, * struct scatterlist *sgl, unsigned int sgc); * Called when the FCoE Initiator wants to initialize an I/O that * is a possible candidate for Direct Data Placement (DDP). The LLD can * perform necessary setup and returns 1 to indicate the device is set up * successfully to perform DDP on this I/O, otherwise this returns 0. * * int (*ndo_fcoe_ddp_done)(struct net_device *dev, u16 xid); * Called when the FCoE Initiator/Target is done with the DDPed I/O as * indicated by the FC exchange id 'xid', so the underlying device can * clean up and reuse resources for later DDP requests. * * int (*ndo_fcoe_ddp_target)(struct net_device *dev, u16 xid, * struct scatterlist *sgl, unsigned int sgc); * Called when the FCoE Target wants to initialize an I/O that * is a possible candidate for Direct Data Placement (DDP). The LLD can * perform necessary setup and returns 1 to indicate the device is set up * successfully to perform DDP on this I/O, otherwise this returns 0. * * int (*ndo_fcoe_get_hbainfo)(struct net_device *dev, * struct netdev_fcoe_hbainfo *hbainfo); * Called when the FCoE Protocol stack wants information on the underlying * device. This information is utilized by the FCoE protocol stack to * register attributes with Fiber Channel management service as per the * FC-GS Fabric Device Management Information(FDMI) specification. * * int (*ndo_fcoe_get_wwn)(struct net_device *dev, u64 *wwn, int type); * Called when the underlying device wants to override default World Wide * Name (WWN) generation mechanism in FCoE protocol stack to pass its own * World Wide Port Name (WWPN) or World Wide Node Name (WWNN) to the FCoE * protocol stack to use. * * RFS acceleration. * int (*ndo_rx_flow_steer)(struct net_device *dev, const struct sk_buff *skb, * u16 rxq_index, u32 flow_id); * Set hardware filter for RFS. rxq_index is the target queue index; * flow_id is a flow ID to be passed to rps_may_expire_flow() later. * Return the filter ID on success, or a negative error code. * * Slave management functions (for bridge, bonding, etc). * int (*ndo_add_slave)(struct net_device *dev, struct net_device *slave_dev); * Called to make another netdev an underling. * * int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); * Called to release previously enslaved netdev. * * struct net_device *(*ndo_get_xmit_slave)(struct net_device *dev, * struct sk_buff *skb, * bool all_slaves); * Get the xmit slave of master device. If all_slaves is true, function * assume all the slaves can transmit. * * Feature/offload setting functions. * netdev_features_t (*ndo_fix_features)(struct net_device *dev, * netdev_features_t features); * Adjusts the requested feature flags according to device-specific * constraints, and returns the resulting flags. Must not modify * the device state. * * int (*ndo_set_features)(struct net_device *dev, netdev_features_t features); * Called to update device configuration to new features. Passed * feature set might be less than what was returned by ndo_fix_features()). * Must return >0 or -errno if it changed dev->features itself. * * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[], * struct net_device *dev, * const unsigned char *addr, u16 vid, u16 flags, * bool *notified, struct netlink_ext_ack *extack); * Adds an FDB entry to dev for addr. * Callee shall set *notified to true if it sent any appropriate * notification(s). Otherwise core will send a generic one. * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], * struct net_device *dev, * const unsigned char *addr, u16 vid * bool *notified, struct netlink_ext_ack *extack); * Deletes the FDB entry from dev corresponding to addr. * Callee shall set *notified to true if it sent any appropriate * notification(s). Otherwise core will send a generic one. * int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, * struct netlink_ext_ack *extack); * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, * struct net_device *dev, struct net_device *filter_dev, * int *idx) * Used to add FDB entries to dump requests. Implementers should add * entries to skb and update idx with the number of entries. * * int (*ndo_mdb_add)(struct net_device *dev, struct nlattr *tb[], * u16 nlmsg_flags, struct netlink_ext_ack *extack); * Adds an MDB entry to dev. * int (*ndo_mdb_del)(struct net_device *dev, struct nlattr *tb[], * struct netlink_ext_ack *extack); * Deletes the MDB entry from dev. * int (*ndo_mdb_del_bulk)(struct net_device *dev, struct nlattr *tb[], * struct netlink_ext_ack *extack); * Bulk deletes MDB entries from dev. * int (*ndo_mdb_dump)(struct net_device *dev, struct sk_buff *skb, * struct netlink_callback *cb); * Dumps MDB entries from dev. The first argument (marker) in the netlink * callback is used by core rtnetlink code. * * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh, * u16 flags, struct netlink_ext_ack *extack) * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, * struct net_device *dev, u32 filter_mask, * int nlflags) * int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh, * u16 flags); * * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); * Called to change device carrier. Soft-devices (like dummy, team, etc) * which do not represent real hardware may define this to allow their * userspace components to manage their virtual carrier state. Devices * that determine carrier state from physical hardware properties (eg * network cables) or protocol-dependent mechanisms (eg * USB_CDC_NOTIFY_NETWORK_CONNECTION) should NOT implement this function. * * int (*ndo_get_phys_port_id)(struct net_device *dev, * struct netdev_phys_item_id *ppid); * Called to get ID of physical port of this device. If driver does * not implement this, it is assumed that the hw is not able to have * multiple net devices on single physical port. * * int (*ndo_get_port_parent_id)(struct net_device *dev, * struct netdev_phys_item_id *ppid) * Called to get the parent ID of the physical port of this device. * * void* (*ndo_dfwd_add_station)(struct net_device *pdev, * struct net_device *dev) * Called by upper layer devices to accelerate switching or other * station functionality into hardware. 'pdev is the lowerdev * to use for the offload and 'dev' is the net device that will * back the offload. Returns a pointer to the private structure * the upper layer will maintain. * void (*ndo_dfwd_del_station)(struct net_device *pdev, void *priv) * Called by upper layer device to delete the station created * by 'ndo_dfwd_add_station'. 'pdev' is the net device backing * the station and priv is the structure returned by the add * operation. * int (*ndo_set_tx_maxrate)(struct net_device *dev, * int queue_index, u32 maxrate); * Called when a user wants to set a max-rate limitation of specific * TX queue. * int (*ndo_get_iflink)(const struct net_device *dev); * Called to get the iflink value of this device. * int (*ndo_fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb); * This function is used to get egress tunnel information for given skb. * This is useful for retrieving outer tunnel header parameters while * sampling packet. * void (*ndo_set_rx_headroom)(struct net_device *dev, int needed_headroom); * This function is used to specify the headroom that the skb must * consider when allocation skb during packet reception. Setting * appropriate rx headroom value allows avoiding skb head copy on * forward. Setting a negative value resets the rx headroom to the * default value. * int (*ndo_bpf)(struct net_device *dev, struct netdev_bpf *bpf); * This function is used to set or query state related to XDP on the * netdevice and manage BPF offload. See definition of * enum bpf_netdev_command for details. * int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp, * u32 flags); * This function is used to submit @n XDP packets for transmit on a * netdevice. Returns number of frames successfully transmitted, frames * that got dropped are freed/returned via xdp_return_frame(). * Returns negative number, means general error invoking ndo, meaning * no frames were xmit'ed and core-caller will free all frames. * struct net_device *(*ndo_xdp_get_xmit_slave)(struct net_device *dev, * struct xdp_buff *xdp); * Get the xmit slave of master device based on the xdp_buff. * int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags); * This function is used to wake up the softirq, ksoftirqd or kthread * responsible for sending and/or receiving packets on a specific * queue id bound to an AF_XDP socket. The flags field specifies if * only RX, only Tx, or both should be woken up using the flags * XDP_WAKEUP_RX and XDP_WAKEUP_TX. * int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm_kern *p, * int cmd); * Add, change, delete or get information on an IPv4 tunnel. * struct net_device *(*ndo_get_peer_dev)(struct net_device *dev); * If a device is paired with a peer device, return the peer instance. * The caller must be under RCU read context. * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path); * Get the forwarding path to reach the real device from the HW destination address * ktime_t (*ndo_get_tstamp)(struct net_device *dev, * const struct skb_shared_hwtstamps *hwtstamps, * bool cycles); * Get hardware timestamp based on normal/adjustable time or free running * cycle counter. This function is required if physical clock supports a * free running cycle counter. * * int (*ndo_hwtstamp_get)(struct net_device *dev, * struct kernel_hwtstamp_config *kernel_config); * Get the currently configured hardware timestamping parameters for the * NIC device. * * int (*ndo_hwtstamp_set)(struct net_device *dev, * struct kernel_hwtstamp_config *kernel_config, * struct netlink_ext_ack *extack); * Change the hardware timestamping parameters for NIC device. */ struct net_device_ops { int (*ndo_init)(struct net_device *dev); void (*ndo_uninit)(struct net_device *dev); int (*ndo_open)(struct net_device *dev); int (*ndo_stop)(struct net_device *dev); netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb, struct net_device *dev); netdev_features_t (*ndo_features_check)(struct sk_buff *skb, struct net_device *dev, netdev_features_t features); u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); void (*ndo_change_rx_flags)(struct net_device *dev, int flags); void (*ndo_set_rx_mode)(struct net_device *dev); int (*ndo_set_mac_address)(struct net_device *dev, void *addr); int (*ndo_validate_addr)(struct net_device *dev); int (*ndo_do_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); int (*ndo_eth_ioctl)(struct net_device *dev, struct ifreq *ifr, int cmd); int (*ndo_siocbond)(struct net_device *dev, struct ifreq *ifr, int cmd); int (*ndo_siocwandev)(struct net_device *dev, struct if_settings *ifs); int (*ndo_siocdevprivate)(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd); int (*ndo_set_config)(struct net_device *dev, struct ifmap *map); int (*ndo_change_mtu)(struct net_device *dev, int new_mtu); int (*ndo_neigh_setup)(struct net_device *dev, struct neigh_parms *); void (*ndo_tx_timeout) (struct net_device *dev, unsigned int txqueue); void (*ndo_get_stats64)(struct net_device *dev, struct rtnl_link_stats64 *storage); bool (*ndo_has_offload_stats)(const struct net_device *dev, int attr_id); int (*ndo_get_offload_stats)(int attr_id, const struct net_device *dev, void *attr_data); struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16 vid); int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, __be16 proto, u16 vid); #ifdef CONFIG_NET_POLL_CONTROLLER void (*ndo_poll_controller)(struct net_device *dev); int (*ndo_netpoll_setup)(struct net_device *dev); void (*ndo_netpoll_cleanup)(struct net_device *dev); #endif int (*ndo_set_vf_mac)(struct net_device *dev, int queue, u8 *mac); int (*ndo_set_vf_vlan)(struct net_device *dev, int queue, u16 vlan, u8 qos, __be16 proto); int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate); int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting); int (*ndo_set_vf_trust)(struct net_device *dev, int vf, bool setting); int (*ndo_get_vf_config)(struct net_device *dev, int vf, struct ifla_vf_info *ivf); int (*ndo_set_vf_link_state)(struct net_device *dev, int vf, int link_state); int (*ndo_get_vf_stats)(struct net_device *dev, int vf, struct ifla_vf_stats *vf_stats); int (*ndo_set_vf_port)(struct net_device *dev, int vf, struct nlattr *port[]); int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); int (*ndo_get_vf_guid)(struct net_device *dev, int vf, struct ifla_vf_guid *node_guid, struct ifla_vf_guid *port_guid); int (*ndo_set_vf_guid)(struct net_device *dev, int vf, u64 guid, int guid_type); int (*ndo_set_vf_rss_query_en)( struct net_device *dev, int vf, bool setting); int (*ndo_setup_tc)(struct net_device *dev, enum tc_setup_type type, void *type_data); #if IS_ENABLED(CONFIG_FCOE) int (*ndo_fcoe_enable)(struct net_device *dev); int (*ndo_fcoe_disable)(struct net_device *dev); int (*ndo_fcoe_ddp_setup)(struct net_device *dev, u16 xid, struct scatterlist *sgl, unsigned int sgc); int (*ndo_fcoe_ddp_done)(struct net_device *dev, u16 xid); int (*ndo_fcoe_ddp_target)(struct net_device *dev, u16 xid, struct scatterlist *sgl, unsigned int sgc); int (*ndo_fcoe_get_hbainfo)(struct net_device *dev, struct netdev_fcoe_hbainfo *hbainfo); #endif #if IS_ENABLED(CONFIG_LIBFCOE) #define NETDEV_FCOE_WWNN 0 #define NETDEV_FCOE_WWPN 1 int (*ndo_fcoe_get_wwn)(struct net_device *dev, u64 *wwn, int type); #endif #ifdef CONFIG_RFS_ACCEL int (*ndo_rx_flow_steer)(struct net_device *dev, const struct sk_buff *skb, u16 rxq_index, u32 flow_id); #endif int (*ndo_add_slave)(struct net_device *dev, struct net_device *slave_dev, struct netlink_ext_ack *extack); int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); struct net_device* (*ndo_get_xmit_slave)(struct net_device *dev, struct sk_buff *skb, bool all_slaves); struct net_device* (*ndo_sk_get_lower_dev)(struct net_device *dev, struct sock *sk); netdev_features_t (*ndo_fix_features)(struct net_device *dev, netdev_features_t features); int (*ndo_set_features)(struct net_device *dev, netdev_features_t features); int (*ndo_neigh_construct)(struct net_device *dev, struct neighbour *n); void (*ndo_neigh_destroy)(struct net_device *dev, struct neighbour *n); int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 flags, bool *notified, struct netlink_ext_ack *extack); int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, bool *notified, struct netlink_ext_ack *extack); int (*ndo_fdb_del_bulk)(struct nlmsghdr *nlh, struct net_device *dev, struct netlink_ext_ack *extack); int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, int *idx); int (*ndo_fdb_get)(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u32 portid, u32 seq, struct netlink_ext_ack *extack); int (*ndo_mdb_add)(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags, struct netlink_ext_ack *extack); int (*ndo_mdb_del)(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack); int (*ndo_mdb_del_bulk)(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack); int (*ndo_mdb_dump)(struct net_device *dev, struct sk_buff *skb, struct netlink_callback *cb); int (*ndo_mdb_get)(struct net_device *dev, struct nlattr *tb[], u32 portid, u32 seq, struct netlink_ext_ack *extack); int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh, u16 flags, struct netlink_ext_ack *extack); int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u32 filter_mask, int nlflags); int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier); int (*ndo_get_phys_port_id)(struct net_device *dev, struct netdev_phys_item_id *ppid); int (*ndo_get_port_parent_id)(struct net_device *dev, struct netdev_phys_item_id *ppid); int (*ndo_get_phys_port_name)(struct net_device *dev, char *name, size_t len); void* (*ndo_dfwd_add_station)(struct net_device *pdev, struct net_device *dev); void (*ndo_dfwd_del_station)(struct net_device *pdev, void *priv); int (*ndo_set_tx_maxrate)(struct net_device *dev, int queue_index, u32 maxrate); int (*ndo_get_iflink)(const struct net_device *dev); int (*ndo_fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb); void (*ndo_set_rx_headroom)(struct net_device *dev, int needed_headroom); int (*ndo_bpf)(struct net_device *dev, struct netdev_bpf *bpf); int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp, u32 flags); struct net_device * (*ndo_xdp_get_xmit_slave)(struct net_device *dev, struct xdp_buff *xdp); int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags); int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm_kern *p, int cmd); struct net_device * (*ndo_get_peer_dev)(struct net_device *dev); int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path); ktime_t (*ndo_get_tstamp)(struct net_device *dev, const struct skb_shared_hwtstamps *hwtstamps, bool cycles); int (*ndo_hwtstamp_get)(struct net_device *dev, struct kernel_hwtstamp_config *kernel_config); int (*ndo_hwtstamp_set)(struct net_device *dev, struct kernel_hwtstamp_config *kernel_config, struct netlink_ext_ack *extack); #if IS_ENABLED(CONFIG_NET_SHAPER) /** * @net_shaper_ops: Device shaping offload operations * see include/net/net_shapers.h */ const struct net_shaper_ops *net_shaper_ops; #endif }; /** * enum netdev_priv_flags - &struct net_device priv_flags * * These are the &struct net_device, they are only set internally * by drivers and used in the kernel. These flags are invisible to * userspace; this means that the order of these flags can change * during any kernel release. * * You should add bitfield booleans after either net_device::priv_flags * (hotpath) or ::threaded (slowpath) instead of extending these flags. * * @IFF_802_1Q_VLAN: 802.1Q VLAN device * @IFF_EBRIDGE: Ethernet bridging device * @IFF_BONDING: bonding master or slave * @IFF_ISATAP: ISATAP interface (RFC4214) * @IFF_WAN_HDLC: WAN HDLC device * @IFF_XMIT_DST_RELEASE: dev_hard_start_xmit() is allowed to * release skb->dst * @IFF_DONT_BRIDGE: disallow bridging this ether dev * @IFF_DISABLE_NETPOLL: disable netpoll at run-time * @IFF_MACVLAN_PORT: device used as macvlan port * @IFF_BRIDGE_PORT: device used as bridge port * @IFF_OVS_DATAPATH: device used as Open vSwitch datapath port * @IFF_TX_SKB_SHARING: The interface supports sharing skbs on transmit * @IFF_UNICAST_FLT: Supports unicast filtering * @IFF_TEAM_PORT: device used as team port * @IFF_SUPP_NOFCS: device supports sending custom FCS * @IFF_LIVE_ADDR_CHANGE: device supports hardware address * change when it's running * @IFF_MACVLAN: Macvlan device * @IFF_XMIT_DST_RELEASE_PERM: IFF_XMIT_DST_RELEASE not taking into account * underlying stacked devices * @IFF_L3MDEV_MASTER: device is an L3 master device * @IFF_NO_QUEUE: device can run without qdisc attached * @IFF_OPENVSWITCH: device is a Open vSwitch master * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device * @IFF_TEAM: device is a team device * @IFF_RXFH_CONFIGURED: device has had Rx Flow indirection table configured * @IFF_PHONY_HEADROOM: the headroom value is controlled by an external * entity (i.e. the master device for bridged veth) * @IFF_MACSEC: device is a MACsec device * @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook * @IFF_FAILOVER: device is a failover master device * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device * @IFF_NO_ADDRCONF: prevent ipv6 addrconf * @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with * skb_headlen(skb) == 0 (data starts from frag0) */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, IFF_EBRIDGE = 1<<1, IFF_BONDING = 1<<2, IFF_ISATAP = 1<<3, IFF_WAN_HDLC = 1<<4, IFF_XMIT_DST_RELEASE = 1<<5, IFF_DONT_BRIDGE = 1<<6, IFF_DISABLE_NETPOLL = 1<<7, IFF_MACVLAN_PORT = 1<<8, IFF_BRIDGE_PORT = 1<<9, IFF_OVS_DATAPATH = 1<<10, IFF_TX_SKB_SHARING = 1<<11, IFF_UNICAST_FLT = 1<<12, IFF_TEAM_PORT = 1<<13, IFF_SUPP_NOFCS = 1<<14, IFF_LIVE_ADDR_CHANGE = 1<<15, IFF_MACVLAN = 1<<16, IFF_XMIT_DST_RELEASE_PERM = 1<<17, IFF_L3MDEV_MASTER = 1<<18, IFF_NO_QUEUE = 1<<19, IFF_OPENVSWITCH = 1<<20, IFF_L3MDEV_SLAVE = 1<<21, IFF_TEAM = 1<<22, IFF_RXFH_CONFIGURED = 1<<23, IFF_PHONY_HEADROOM = 1<<24, IFF_MACSEC = 1<<25, IFF_NO_RX_HANDLER = 1<<26, IFF_FAILOVER = 1<<27, IFF_FAILOVER_SLAVE = 1<<28, IFF_L3MDEV_RX_HANDLER = 1<<29, IFF_NO_ADDRCONF = BIT_ULL(30), IFF_TX_SKB_NO_LINEAR = BIT_ULL(31), }; /* Specifies the type of the struct net_device::ml_priv pointer */ enum netdev_ml_priv_type { ML_PRIV_NONE, ML_PRIV_CAN, }; enum netdev_stat_type { NETDEV_PCPU_STAT_NONE, NETDEV_PCPU_STAT_LSTATS, /* struct pcpu_lstats */ NETDEV_PCPU_STAT_TSTATS, /* struct pcpu_sw_netstats */ NETDEV_PCPU_STAT_DSTATS, /* struct pcpu_dstats */ }; enum netdev_reg_state { NETREG_UNINITIALIZED = 0, NETREG_REGISTERED, /* completed register_netdevice */ NETREG_UNREGISTERING, /* called unregister_netdevice */ NETREG_UNREGISTERED, /* completed unregister todo */ NETREG_RELEASED, /* called free_netdev */ NETREG_DUMMY, /* dummy device for NAPI poll */ }; /** * struct net_device - The DEVICE structure. * * Actually, this whole structure is a big mistake. It mixes I/O * data with strictly "high-level" data, and it has to know about * almost every data structure used in the INET module. * * @priv_flags: flags invisible to userspace defined as bits, see * enum netdev_priv_flags for the definitions * @lltx: device supports lockless Tx. Deprecated for real HW * drivers. Mainly used by logical interfaces, such as * bonding and tunnels * @netmem_tx: device support netmem_tx. * * @name: This is the first field of the "visible" part of this structure * (i.e. as seen by users in the "Space.c" file). It is the name * of the interface. * * @name_node: Name hashlist node * @ifalias: SNMP alias * @mem_end: Shared memory end * @mem_start: Shared memory start * @base_addr: Device I/O address * @irq: Device IRQ number * * @state: Generic network queuing layer state, see netdev_state_t * @dev_list: The global list of network devices * @napi_list: List entry used for polling NAPI devices * @unreg_list: List entry when we are unregistering the * device; see the function unregister_netdev * @close_list: List entry used when we are closing the device * @ptype_all: Device-specific packet handlers for all protocols * @ptype_specific: Device-specific, protocol-specific packet handlers * * @adj_list: Directly linked devices, like slaves for bonding * @features: Currently active device features * @hw_features: User-changeable features * * @wanted_features: User-requested features * @vlan_features: Mask of features inheritable by VLAN devices * * @hw_enc_features: Mask of features inherited by encapsulating devices * This field indicates what encapsulation * offloads the hardware is capable of doing, * and drivers will need to set them appropriately. * * @mpls_features: Mask of features inheritable by MPLS * @gso_partial_features: value(s) from NETIF_F_GSO\* * * @ifindex: interface index * @group: The group the device belongs to * * @stats: Statistics struct, which was left as a legacy, use * rtnl_link_stats64 instead * * @core_stats: core networking counters, * do not use this in drivers * @carrier_up_count: Number of times the carrier has been up * @carrier_down_count: Number of times the carrier has been down * * @wireless_handlers: List of functions to handle Wireless Extensions, * instead of ioctl, * see <net/iw_handler.h> for details. * * @netdev_ops: Includes several pointers to callbacks, * if one wants to override the ndo_*() functions * @xdp_metadata_ops: Includes pointers to XDP metadata callbacks. * @xsk_tx_metadata_ops: Includes pointers to AF_XDP TX metadata callbacks. * @ethtool_ops: Management operations * @l3mdev_ops: Layer 3 master device operations * @ndisc_ops: Includes callbacks for different IPv6 neighbour * discovery handling. Necessary for e.g. 6LoWPAN. * @xfrmdev_ops: Transformation offload operations * @tlsdev_ops: Transport Layer Security offload operations * @header_ops: Includes callbacks for creating,parsing,caching,etc * of Layer 2 headers. * * @flags: Interface flags (a la BSD) * @xdp_features: XDP capability supported by the device * @gflags: Global flags ( kept as legacy ) * @priv_len: Size of the ->priv flexible array * @priv: Flexible array containing private data * @operstate: RFC2863 operstate * @link_mode: Mapping policy to operstate * @if_port: Selectable AUI, TP, ... * @dma: DMA channel * @mtu: Interface MTU value * @min_mtu: Interface Minimum MTU value * @max_mtu: Interface Maximum MTU value * @type: Interface hardware type * @hard_header_len: Maximum hardware header length. * @min_header_len: Minimum hardware header length * * @needed_headroom: Extra headroom the hardware may need, but not in all * cases can this be guaranteed * @needed_tailroom: Extra tailroom the hardware may need, but not in all * cases can this be guaranteed. Some cases also use * LL_MAX_HEADER instead to allocate the skb * * interface address info: * * @perm_addr: Permanent hw address * @addr_assign_type: Hw address assignment type * @addr_len: Hardware address length * @upper_level: Maximum depth level of upper devices. * @lower_level: Maximum depth level of lower devices. * @neigh_priv_len: Used in neigh_alloc() * @dev_id: Used to differentiate devices that share * the same link layer address * @dev_port: Used to differentiate devices that share * the same function * @addr_list_lock: XXX: need comments on this one * @name_assign_type: network interface name assignment type * @uc_promisc: Counter that indicates promiscuous mode * has been enabled due to the need to listen to * additional unicast addresses in a device that * does not implement ndo_set_rx_mode() * @uc: unicast mac addresses * @mc: multicast mac addresses * @dev_addrs: list of device hw addresses * @queues_kset: Group of all Kobjects in the Tx and RX queues * @promiscuity: Number of times the NIC is told to work in * promiscuous mode; if it becomes 0 the NIC will * exit promiscuous mode * @allmulti: Counter, enables or disables allmulticast mode * * @vlan_info: VLAN info * @dsa_ptr: dsa specific data * @tipc_ptr: TIPC specific data * @atalk_ptr: AppleTalk link * @ip_ptr: IPv4 specific data * @ip6_ptr: IPv6 specific data * @ax25_ptr: AX.25 specific data * @ieee80211_ptr: IEEE 802.11 specific data, assign before registering * @ieee802154_ptr: IEEE 802.15.4 low-rate Wireless Personal Area Network * device struct * @mpls_ptr: mpls_dev struct pointer * @mctp_ptr: MCTP specific data * * @dev_addr: Hw address (before bcast, * because most packets are unicast) * * @_rx: Array of RX queues * @num_rx_queues: Number of RX queues * allocated at register_netdev() time * @real_num_rx_queues: Number of RX queues currently active in device * @xdp_prog: XDP sockets filter program pointer * * @rx_handler: handler for received packets * @rx_handler_data: XXX: need comments on this one * @tcx_ingress: BPF & clsact qdisc specific data for ingress processing * @ingress_queue: XXX: need comments on this one * @nf_hooks_ingress: netfilter hooks executed for ingress packets * @broadcast: hw bcast address * * @rx_cpu_rmap: CPU reverse-mapping for RX completion interrupts, * indexed by RX queue number. Assigned by driver. * This must only be set if the ndo_rx_flow_steer * operation is defined * @index_hlist: Device index hash chain * * @_tx: Array of TX queues * @num_tx_queues: Number of TX queues allocated at alloc_netdev_mq() time * @real_num_tx_queues: Number of TX queues currently active in device * @qdisc: Root qdisc from userspace point of view * @tx_queue_len: Max frames per queue allowed * @tx_global_lock: XXX: need comments on this one * @xdp_bulkq: XDP device bulk queue * @xps_maps: all CPUs/RXQs maps for XPS device * * @xps_maps: XXX: need comments on this one * @tcx_egress: BPF & clsact qdisc specific data for egress processing * @nf_hooks_egress: netfilter hooks executed for egress packets * @qdisc_hash: qdisc hash table * @watchdog_timeo: Represents the timeout that is used by * the watchdog (see dev_watchdog()) * @watchdog_timer: List of timers * * @proto_down_reason: reason a netdev interface is held down * @pcpu_refcnt: Number of references to this device * @dev_refcnt: Number of references to this device * @refcnt_tracker: Tracker directory for tracked references to this device * @todo_list: Delayed register/unregister * @link_watch_list: XXX: need comments on this one * * @reg_state: Register/unregister state machine * @dismantle: Device is going to be freed * @needs_free_netdev: Should unregister perform free_netdev? * @priv_destructor: Called from unregister * @npinfo: XXX: need comments on this one * @nd_net: Network namespace this network device is inside * protected by @lock * * @ml_priv: Mid-layer private * @ml_priv_type: Mid-layer private type * * @pcpu_stat_type: Type of device statistics which the core should * allocate/free: none, lstats, tstats, dstats. none * means the driver is handling statistics allocation/ * freeing internally. * @lstats: Loopback statistics: packets, bytes * @tstats: Tunnel statistics: RX/TX packets, RX/TX bytes * @dstats: Dummy statistics: RX/TX/drop packets, RX/TX bytes * * @garp_port: GARP * @mrp_port: MRP * * @dm_private: Drop monitor private * * @dev: Class/net/name entry * @sysfs_groups: Space for optional device, statistics and wireless * sysfs groups * * @sysfs_rx_queue_group: Space for optional per-rx queue attributes * @rtnl_link_ops: Rtnl_link_ops * @stat_ops: Optional ops for queue-aware statistics * @queue_mgmt_ops: Optional ops for queue management * * @gso_max_size: Maximum size of generic segmentation offload * @tso_max_size: Device (as in HW) limit on the max TSO request size * @gso_max_segs: Maximum number of segments that can be passed to the * NIC for GSO * @tso_max_segs: Device (as in HW) limit on the max TSO segment count * @gso_ipv4_max_size: Maximum size of generic segmentation offload, * for IPv4. * * @dcbnl_ops: Data Center Bridging netlink ops * @num_tc: Number of traffic classes in the net device * @tc_to_txq: XXX: need comments on this one * @prio_tc_map: XXX: need comments on this one * * @fcoe_ddp_xid: Max exchange id for FCoE LRO by ddp * * @priomap: XXX: need comments on this one * @link_topo: Physical link topology tracking attached PHYs * @phydev: Physical device may attach itself * for hardware timestamping * @sfp_bus: attached &struct sfp_bus structure. * * @qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock * * @proto_down: protocol port state information can be sent to the * switch driver and used to set the phys state of the * switch port. * * @threaded: napi threaded mode is enabled * * @irq_affinity_auto: driver wants the core to store and re-assign the IRQ * affinity. Set by netif_enable_irq_affinity(), then * the driver must create a persistent napi by * netif_napi_add_config() and finally bind the napi to * IRQ (via netif_napi_set_irq()). * * @rx_cpu_rmap_auto: driver wants the core to manage the ARFS rmap. * Set by calling netif_enable_cpu_rmap(). * * @see_all_hwtstamp_requests: device wants to see calls to * ndo_hwtstamp_set() for all timestamp requests * regardless of source, even if those aren't * HWTSTAMP_SOURCE_NETDEV * @change_proto_down: device supports setting carrier via IFLA_PROTO_DOWN * @netns_immutable: interface can't change network namespaces * @fcoe_mtu: device supports maximum FCoE MTU, 2158 bytes * * @net_notifier_list: List of per-net netdev notifier block * that follow this device when it is moved * to another network namespace. * * @macsec_ops: MACsec offloading ops * * @udp_tunnel_nic_info: static structure describing the UDP tunnel * offload capabilities of the device * @udp_tunnel_nic: UDP tunnel offload state * @ethtool: ethtool related state * @xdp_state: stores info on attached XDP BPF programs * * @nested_level: Used as a parameter of spin_lock_nested() of * dev->addr_list_lock. * @unlink_list: As netif_addr_lock() can be called recursively, * keep a list of interfaces to be deleted. * @gro_max_size: Maximum size of aggregated packet in generic * receive offload (GRO) * @gro_ipv4_max_size: Maximum size of aggregated packet in generic * receive offload (GRO), for IPv4. * @xdp_zc_max_segs: Maximum number of segments supported by AF_XDP * zero copy driver * * @dev_addr_shadow: Copy of @dev_addr to catch direct writes. * @linkwatch_dev_tracker: refcount tracker used by linkwatch. * @watchdog_dev_tracker: refcount tracker used by watchdog. * @dev_registered_tracker: tracker for reference held while * registered * @offload_xstats_l3: L3 HW stats for this netdevice. * * @devlink_port: Pointer to related devlink port structure. * Assigned by a driver before netdev registration using * SET_NETDEV_DEVLINK_PORT macro. This pointer is static * during the time netdevice is registered. * * @dpll_pin: Pointer to the SyncE source pin of a DPLL subsystem, * where the clock is recovered. * * @max_pacing_offload_horizon: max EDT offload horizon in nsec. * @napi_config: An array of napi_config structures containing per-NAPI * settings. * @gro_flush_timeout: timeout for GRO layer in NAPI * @napi_defer_hard_irqs: If not zero, provides a counter that would * allow to avoid NIC hard IRQ, on busy queues. * * @neighbours: List heads pointing to this device's neighbours' * dev_list, one per address-family. * @hwprov: Tracks which PTP performs hardware packet time stamping. * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ struct net_device { /* Cacheline organization can be found documented in * Documentation/networking/net_cachelines/net_device.rst. * Please update the document when adding new fields. */ /* TX read-mostly hotpath */ __cacheline_group_begin(net_device_read_tx); struct_group(priv_flags_fast, unsigned long priv_flags:32; unsigned long lltx:1; unsigned long netmem_tx:1; ); const struct net_device_ops *netdev_ops; const struct header_ops *header_ops; struct netdev_queue *_tx; netdev_features_t gso_partial_features; unsigned int real_num_tx_queues; unsigned int gso_max_size; unsigned int gso_ipv4_max_size; u16 gso_max_segs; s16 num_tc; /* Note : dev->mtu is often read without holding a lock. * Writers usually hold RTNL. * It is recommended to use READ_ONCE() to annotate the reads, * and to use WRITE_ONCE() to annotate the writes. */ unsigned int mtu; unsigned short needed_headroom; struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; #ifdef CONFIG_XPS struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX]; #endif #ifdef CONFIG_NETFILTER_EGRESS struct nf_hook_entries __rcu *nf_hooks_egress; #endif #ifdef CONFIG_NET_XGRESS struct bpf_mprog_entry __rcu *tcx_egress; #endif __cacheline_group_end(net_device_read_tx); /* TXRX read-mostly hotpath */ __cacheline_group_begin(net_device_read_txrx); union { struct pcpu_lstats __percpu *lstats; struct pcpu_sw_netstats __percpu *tstats; struct pcpu_dstats __percpu *dstats; }; unsigned long state; unsigned int flags; unsigned short hard_header_len; netdev_features_t features; struct inet6_dev __rcu *ip6_ptr; __cacheline_group_end(net_device_read_txrx); /* RX read-mostly hotpath */ __cacheline_group_begin(net_device_read_rx); struct bpf_prog __rcu *xdp_prog; struct list_head ptype_specific; int ifindex; unsigned int real_num_rx_queues; struct netdev_rx_queue *_rx; unsigned int gro_max_size; unsigned int gro_ipv4_max_size; rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; possible_net_t nd_net; #ifdef CONFIG_NETPOLL struct netpoll_info __rcu *npinfo; #endif #ifdef CONFIG_NET_XGRESS struct bpf_mprog_entry __rcu *tcx_ingress; #endif __cacheline_group_end(net_device_read_rx); char name[IFNAMSIZ]; struct netdev_name_node *name_node; struct dev_ifalias __rcu *ifalias; /* * I/O specific fields * FIXME: Merge these and struct ifmap into one */ unsigned long mem_end; unsigned long mem_start; unsigned long base_addr; /* * Some hardware also needs these fields (state,dev_list, * napi_list,unreg_list,close_list) but they are not * part of the usual set specified in Space.c. */ struct list_head dev_list; struct list_head napi_list; struct list_head unreg_list; struct list_head close_list; struct list_head ptype_all; struct { struct list_head upper; struct list_head lower; } adj_list; /* Read-mostly cache-line for fast-path access */ xdp_features_t xdp_features; const struct xdp_metadata_ops *xdp_metadata_ops; const struct xsk_tx_metadata_ops *xsk_tx_metadata_ops; unsigned short gflags; unsigned short needed_tailroom; netdev_features_t hw_features; netdev_features_t wanted_features; netdev_features_t vlan_features; netdev_features_t hw_enc_features; netdev_features_t mpls_features; unsigned int min_mtu; unsigned int max_mtu; unsigned short type; unsigned char min_header_len; unsigned char name_assign_type; int group; struct net_device_stats stats; /* not used by modern drivers */ struct net_device_core_stats __percpu *core_stats; /* Stats to monitor link on/off, flapping */ atomic_t carrier_up_count; atomic_t carrier_down_count; #ifdef CONFIG_WIRELESS_EXT const struct iw_handler_def *wireless_handlers; #endif const struct ethtool_ops *ethtool_ops; #ifdef CONFIG_NET_L3_MASTER_DEV const struct l3mdev_ops *l3mdev_ops; #endif #if IS_ENABLED(CONFIG_IPV6) const struct ndisc_ops *ndisc_ops; #endif #ifdef CONFIG_XFRM_OFFLOAD const struct xfrmdev_ops *xfrmdev_ops; #endif #if IS_ENABLED(CONFIG_TLS_DEVICE) const struct tlsdev_ops *tlsdev_ops; #endif unsigned int operstate; unsigned char link_mode; unsigned char if_port; unsigned char dma; /* Interface address info. */ unsigned char perm_addr[MAX_ADDR_LEN]; unsigned char addr_assign_type; unsigned char addr_len; unsigned char upper_level; unsigned char lower_level; unsigned short neigh_priv_len; unsigned short dev_id; unsigned short dev_port; int irq; u32 priv_len; spinlock_t addr_list_lock; struct netdev_hw_addr_list uc; struct netdev_hw_addr_list mc; struct netdev_hw_addr_list dev_addrs; #ifdef CONFIG_SYSFS struct kset *queues_kset; #endif #ifdef CONFIG_LOCKDEP struct list_head unlink_list; #endif unsigned int promiscuity; unsigned int allmulti; bool uc_promisc; #ifdef CONFIG_LOCKDEP unsigned char nested_level; #endif /* Protocol-specific pointers */ struct in_device __rcu *ip_ptr; /** @fib_nh_head: nexthops associated with this netdev */ struct hlist_head fib_nh_head; #if IS_ENABLED(CONFIG_VLAN_8021Q) struct vlan_info __rcu *vlan_info; #endif #if IS_ENABLED(CONFIG_NET_DSA) struct dsa_port *dsa_ptr; #endif #if IS_ENABLED(CONFIG_TIPC) struct tipc_bearer __rcu *tipc_ptr; #endif #if IS_ENABLED(CONFIG_ATALK) void *atalk_ptr; #endif #if IS_ENABLED(CONFIG_AX25) struct ax25_dev __rcu *ax25_ptr; #endif #if IS_ENABLED(CONFIG_CFG80211) struct wireless_dev *ieee80211_ptr; #endif #if IS_ENABLED(CONFIG_IEEE802154) || IS_ENABLED(CONFIG_6LOWPAN) struct wpan_dev *ieee802154_ptr; #endif #if IS_ENABLED(CONFIG_MPLS_ROUTING) struct mpls_dev __rcu *mpls_ptr; #endif #if IS_ENABLED(CONFIG_MCTP) struct mctp_dev __rcu *mctp_ptr; #endif /* * Cache lines mostly used on receive path (including eth_type_trans()) */ /* Interface address info used in eth_type_trans() */ const unsigned char *dev_addr; unsigned int num_rx_queues; #define GRO_LEGACY_MAX_SIZE 65536u /* TCP minimal MSS is 8 (TCP_MIN_GSO_SIZE), * and shinfo->gso_segs is a 16bit field. */ #define GRO_MAX_SIZE (8 * 65535u) unsigned int xdp_zc_max_segs; struct netdev_queue __rcu *ingress_queue; #ifdef CONFIG_NETFILTER_INGRESS struct nf_hook_entries __rcu *nf_hooks_ingress; #endif unsigned char broadcast[MAX_ADDR_LEN]; #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *rx_cpu_rmap; #endif struct hlist_node index_hlist; /* * Cache lines mostly used on transmit path */ unsigned int num_tx_queues; struct Qdisc __rcu *qdisc; unsigned int tx_queue_len; spinlock_t tx_global_lock; struct xdp_dev_bulk_queue __percpu *xdp_bulkq; #ifdef CONFIG_NET_SCHED DECLARE_HASHTABLE (qdisc_hash, 4); #endif /* These may be needed for future network-power-down code. */ struct timer_list watchdog_timer; int watchdog_timeo; u32 proto_down_reason; struct list_head todo_list; #ifdef CONFIG_PCPU_DEV_REFCNT int __percpu *pcpu_refcnt; #else refcount_t dev_refcnt; #endif struct ref_tracker_dir refcnt_tracker; struct list_head link_watch_list; u8 reg_state; bool dismantle; /** @moving_ns: device is changing netns, protected by @lock */ bool moving_ns; /** @rtnl_link_initializing: Device being created, suppress events */ bool rtnl_link_initializing; bool needs_free_netdev; void (*priv_destructor)(struct net_device *dev); /* mid-layer private */ void *ml_priv; enum netdev_ml_priv_type ml_priv_type; enum netdev_stat_type pcpu_stat_type:8; #if IS_ENABLED(CONFIG_GARP) struct garp_port __rcu *garp_port; #endif #if IS_ENABLED(CONFIG_MRP) struct mrp_port __rcu *mrp_port; #endif #if IS_ENABLED(CONFIG_NET_DROP_MONITOR) struct dm_hw_stat_delta __rcu *dm_private; #endif struct device dev; const struct attribute_group *sysfs_groups[4]; const struct attribute_group *sysfs_rx_queue_group; const struct rtnl_link_ops *rtnl_link_ops; const struct netdev_stat_ops *stat_ops; const struct netdev_queue_mgmt_ops *queue_mgmt_ops; /* for setting kernel sock attribute on TCP connection setup */ #define GSO_MAX_SEGS 65535u #define GSO_LEGACY_MAX_SIZE 65536u /* TCP minimal MSS is 8 (TCP_MIN_GSO_SIZE), * and shinfo->gso_segs is a 16bit field. */ #define GSO_MAX_SIZE (8 * GSO_MAX_SEGS) #define TSO_LEGACY_MAX_SIZE 65536 #define TSO_MAX_SIZE UINT_MAX unsigned int tso_max_size; #define TSO_MAX_SEGS U16_MAX u16 tso_max_segs; #ifdef CONFIG_DCB const struct dcbnl_rtnl_ops *dcbnl_ops; #endif u8 prio_tc_map[TC_BITMASK + 1]; #if IS_ENABLED(CONFIG_FCOE) unsigned int fcoe_ddp_xid; #endif #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) struct netprio_map __rcu *priomap; #endif struct phy_link_topology *link_topo; struct phy_device *phydev; struct sfp_bus *sfp_bus; struct lock_class_key *qdisc_tx_busylock; bool proto_down; bool threaded; bool irq_affinity_auto; bool rx_cpu_rmap_auto; /* priv_flags_slow, ungrouped to save space */ unsigned long see_all_hwtstamp_requests:1; unsigned long change_proto_down:1; unsigned long netns_immutable:1; unsigned long fcoe_mtu:1; struct list_head net_notifier_list; #if IS_ENABLED(CONFIG_MACSEC) /* MACsec management functions */ const struct macsec_ops *macsec_ops; #endif const struct udp_tunnel_nic_info *udp_tunnel_nic_info; struct udp_tunnel_nic *udp_tunnel_nic; /** @cfg: net_device queue-related configuration */ struct netdev_config *cfg; /** * @cfg_pending: same as @cfg but when device is being actively * reconfigured includes any changes to the configuration * requested by the user, but which may or may not be rejected. */ struct netdev_config *cfg_pending; struct ethtool_netdev_state *ethtool; /* protected by rtnl_lock */ struct bpf_xdp_entity xdp_state[__MAX_XDP_MODE]; u8 dev_addr_shadow[MAX_ADDR_LEN]; netdevice_tracker linkwatch_dev_tracker; netdevice_tracker watchdog_dev_tracker; netdevice_tracker dev_registered_tracker; struct rtnl_hw_stats64 *offload_xstats_l3; struct devlink_port *devlink_port; #if IS_ENABLED(CONFIG_DPLL) struct dpll_pin __rcu *dpll_pin; #endif #if IS_ENABLED(CONFIG_PAGE_POOL) /** @page_pools: page pools created for this netdevice */ struct hlist_head page_pools; #endif /** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */ struct dim_irq_moder *irq_moder; u64 max_pacing_offload_horizon; struct napi_config *napi_config; unsigned long gro_flush_timeout; u32 napi_defer_hard_irqs; /** * @up: copy of @state's IFF_UP, but safe to read with just @lock. * May report false negatives while the device is being opened * or closed (@lock does not protect .ndo_open, or .ndo_close). */ bool up; /** * @request_ops_lock: request the core to run all @netdev_ops and * @ethtool_ops under the @lock. */ bool request_ops_lock; /** * @lock: netdev-scope lock, protects a small selection of fields. * Should always be taken using netdev_lock() / netdev_unlock() helpers. * Drivers are free to use it for other protection. * * For the drivers that implement shaper or queue API, the scope * of this lock is expanded to cover most ndo/queue/ethtool/sysfs * operations. Drivers may opt-in to this behavior by setting * @request_ops_lock. * * @lock protection mixes with rtnl_lock in multiple ways, fields are * either: * * - simply protected by the instance @lock; * * - double protected - writers hold both locks, readers hold either; * * - ops protected - protected by the lock held around the NDOs * and other callbacks, that is the instance lock on devices for * which netdev_need_ops_lock() returns true, otherwise by rtnl_lock; * * - double ops protected - always protected by rtnl_lock but for * devices for which netdev_need_ops_lock() returns true - also * the instance lock. * * Simply protects: * @gro_flush_timeout, @napi_defer_hard_irqs, @napi_list, * @net_shaper_hierarchy, @reg_state, @threaded * * Double protects: * @up, @moving_ns, @nd_net, @xdp_features * * Double ops protects: * @real_num_rx_queues, @real_num_tx_queues * * Also protects some fields in: * struct napi_struct, struct netdev_queue, struct netdev_rx_queue * * Ordering: take after rtnl_lock. */ struct mutex lock; #if IS_ENABLED(CONFIG_NET_SHAPER) /** * @net_shaper_hierarchy: data tracking the current shaper status * see include/net/net_shapers.h */ struct net_shaper_hierarchy *net_shaper_hierarchy; #endif struct hlist_head neighbours[NEIGH_NR_TABLES]; struct hwtstamp_provider __rcu *hwprov; u8 priv[] ____cacheline_aligned __counted_by(priv_len); } ____cacheline_aligned; #define to_net_dev(d) container_of(d, struct net_device, dev) /* * Driver should use this to assign devlink port instance to a netdevice * before it registers the netdevice. Therefore devlink_port is static * during the netdev lifetime after it is registered. */ #define SET_NETDEV_DEVLINK_PORT(dev, port) \ ({ \ WARN_ON((dev)->reg_state != NETREG_UNINITIALIZED); \ ((dev)->devlink_port = (port)); \ }) static inline bool netif_elide_gro(const struct net_device *dev) { if (!(dev->features & NETIF_F_GRO) || dev->xdp_prog) return true; return false; } #define NETDEV_ALIGN 32 static inline int netdev_get_prio_tc_map(const struct net_device *dev, u32 prio) { return dev->prio_tc_map[prio & TC_BITMASK]; } static inline int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc) { if (tc >= dev->num_tc) return -EINVAL; dev->prio_tc_map[prio & TC_BITMASK] = tc & TC_BITMASK; return 0; } int netdev_txq_to_tc(struct net_device *dev, unsigned int txq); void netdev_reset_tc(struct net_device *dev); int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset); int netdev_set_num_tc(struct net_device *dev, u8 num_tc); static inline int netdev_get_num_tc(struct net_device *dev) { return dev->num_tc; } static inline void net_prefetch(void *p) { prefetch(p); #if L1_CACHE_BYTES < 128 prefetch((u8 *)p + L1_CACHE_BYTES); #endif } static inline void net_prefetchw(void *p) { prefetchw(p); #if L1_CACHE_BYTES < 128 prefetchw((u8 *)p + L1_CACHE_BYTES); #endif } void netdev_unbind_sb_channel(struct net_device *dev, struct net_device *sb_dev); int netdev_bind_sb_channel_queue(struct net_device *dev, struct net_device *sb_dev, u8 tc, u16 count, u16 offset); int netdev_set_sb_channel(struct net_device *dev, u16 channel); static inline int netdev_get_sb_channel(struct net_device *dev) { return max_t(int, -dev->num_tc, 0); } static inline struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev, unsigned int index) { DEBUG_NET_WARN_ON_ONCE(index >= dev->num_tx_queues); return &dev->_tx[index]; } static inline struct netdev_queue *skb_get_tx_queue(const struct net_device *dev, const struct sk_buff *skb) { return netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); } static inline void netdev_for_each_tx_queue(struct net_device *dev, void (*f)(struct net_device *, struct netdev_queue *, void *), void *arg) { unsigned int i; for (i = 0; i < dev->num_tx_queues; i++) f(dev, &dev->_tx[i], arg); } u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); struct netdev_queue *netdev_core_pick_tx(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); /* returns the headroom that the master device needs to take in account * when forwarding to this dev */ static inline unsigned netdev_get_fwd_headroom(struct net_device *dev) { return dev->priv_flags & IFF_PHONY_HEADROOM ? 0 : dev->needed_headroom; } static inline void netdev_set_rx_headroom(struct net_device *dev, int new_hr) { if (dev->netdev_ops->ndo_set_rx_headroom) dev->netdev_ops->ndo_set_rx_headroom(dev, new_hr); } /* set the device rx headroom to the dev's default */ static inline void netdev_reset_rx_headroom(struct net_device *dev) { netdev_set_rx_headroom(dev, -1); } static inline void *netdev_get_ml_priv(struct net_device *dev, enum netdev_ml_priv_type type) { if (dev->ml_priv_type != type) return NULL; return dev->ml_priv; } static inline void netdev_set_ml_priv(struct net_device *dev, void *ml_priv, enum netdev_ml_priv_type type) { WARN(dev->ml_priv_type && dev->ml_priv_type != type, "Overwriting already set ml_priv_type (%u) with different ml_priv_type (%u)!\n", dev->ml_priv_type, type); WARN(!dev->ml_priv_type && dev->ml_priv, "Overwriting already set ml_priv and ml_priv_type is ML_PRIV_NONE!\n"); dev->ml_priv = ml_priv; dev->ml_priv_type = type; } /* * Net namespace inlines */ static inline struct net *dev_net(const struct net_device *dev) { return read_pnet(&dev->nd_net); } static inline struct net *dev_net_rcu(const struct net_device *dev) { return read_pnet_rcu(&dev->nd_net); } static inline void dev_net_set(struct net_device *dev, struct net *net) { write_pnet(&dev->nd_net, net); } /** * netdev_priv - access network device private data * @dev: network device * * Get network device private data */ static inline void *netdev_priv(const struct net_device *dev) { return (void *)dev->priv; } /* Set the sysfs physical device reference for the network logical device * if set prior to registration will cause a symlink during initialization. */ #define SET_NETDEV_DEV(net, pdev) ((net)->dev.parent = (pdev)) /* Set the sysfs device type for the network logical device to allow * fine-grained identification of different network device types. For * example Ethernet, Wireless LAN, Bluetooth, WiMAX etc. */ #define SET_NETDEV_DEVTYPE(net, devtype) ((net)->dev.type = (devtype)) void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index, enum netdev_queue_type type, struct napi_struct *napi); static inline void netdev_lock(struct net_device *dev) { mutex_lock(&dev->lock); } static inline void netdev_unlock(struct net_device *dev) { mutex_unlock(&dev->lock); } /* Additional netdev_lock()-related helpers are in net/netdev_lock.h */ void netif_napi_set_irq_locked(struct napi_struct *napi, int irq); static inline void netif_napi_set_irq(struct napi_struct *napi, int irq) { netdev_lock(napi->dev); netif_napi_set_irq_locked(napi, irq); netdev_unlock(napi->dev); } /* Default NAPI poll() weight * Device drivers are strongly advised to not use bigger value */ #define NAPI_POLL_WEIGHT 64 void netif_napi_add_weight_locked(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight); static inline void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { netdev_lock(dev); netif_napi_add_weight_locked(dev, napi, poll, weight); netdev_unlock(dev); } /** * netif_napi_add() - initialize a NAPI context * @dev: network device * @napi: NAPI context * @poll: polling function * * netif_napi_add() must be used to initialize a NAPI context prior to calling * *any* of the other NAPI-related functions. */ static inline void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int)) { netif_napi_add_weight(dev, napi, poll, NAPI_POLL_WEIGHT); } static inline void netif_napi_add_locked(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int)) { netif_napi_add_weight_locked(dev, napi, poll, NAPI_POLL_WEIGHT); } static inline void netif_napi_add_tx_weight(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { set_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state); netif_napi_add_weight(dev, napi, poll, weight); } static inline void netif_napi_add_config_locked(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int index) { napi->index = index; napi->config = &dev->napi_config[index]; netif_napi_add_weight_locked(dev, napi, poll, NAPI_POLL_WEIGHT); } /** * netif_napi_add_config - initialize a NAPI context with persistent config * @dev: network device * @napi: NAPI context * @poll: polling function * @index: the NAPI index */ static inline void netif_napi_add_config(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int index) { netdev_lock(dev); netif_napi_add_config_locked(dev, napi, poll, index); netdev_unlock(dev); } /** * netif_napi_add_tx() - initialize a NAPI context to be used for Tx only * @dev: network device * @napi: NAPI context * @poll: polling function * * This variant of netif_napi_add() should be used from drivers using NAPI * to exclusively poll a TX queue. * This will avoid we add it into napi_hash[], thus polluting this hash table. */ static inline void netif_napi_add_tx(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int)) { netif_napi_add_tx_weight(dev, napi, poll, NAPI_POLL_WEIGHT); } void __netif_napi_del_locked(struct napi_struct *napi); /** * __netif_napi_del - remove a NAPI context * @napi: NAPI context * * Warning: caller must observe RCU grace period before freeing memory * containing @napi. Drivers might want to call this helper to combine * all the needed RCU grace periods into a single one. */ static inline void __netif_napi_del(struct napi_struct *napi) { netdev_lock(napi->dev); __netif_napi_del_locked(napi); netdev_unlock(napi->dev); } static inline void netif_napi_del_locked(struct napi_struct *napi) { __netif_napi_del_locked(napi); synchronize_net(); } /** * netif_napi_del - remove a NAPI context * @napi: NAPI context * * netif_napi_del() removes a NAPI context from the network device NAPI list */ static inline void netif_napi_del(struct napi_struct *napi) { __netif_napi_del(napi); synchronize_net(); } int netif_enable_cpu_rmap(struct net_device *dev, unsigned int num_irqs); void netif_set_affinity_auto(struct net_device *dev); struct packet_type { __be16 type; /* This is really htons(ether_type). */ bool ignore_outgoing; struct net_device *dev; /* NULL is wildcarded here */ netdevice_tracker dev_tracker; int (*func) (struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); void (*list_func) (struct list_head *, struct packet_type *, struct net_device *); bool (*id_match)(struct packet_type *ptype, struct sock *sk); struct net *af_packet_net; void *af_packet_priv; struct list_head list; }; struct offload_callbacks { struct sk_buff *(*gso_segment)(struct sk_buff *skb, netdev_features_t features); struct sk_buff *(*gro_receive)(struct list_head *head, struct sk_buff *skb); int (*gro_complete)(struct sk_buff *skb, int nhoff); }; struct packet_offload { __be16 type; /* This is really htons(ether_type). */ u16 priority; struct offload_callbacks callbacks; struct list_head list; }; /* often modified stats are per-CPU, other are shared (netdev->stats) */ struct pcpu_sw_netstats { u64_stats_t rx_packets; u64_stats_t rx_bytes; u64_stats_t tx_packets; u64_stats_t tx_bytes; struct u64_stats_sync syncp; } __aligned(4 * sizeof(u64)); struct pcpu_dstats { u64_stats_t rx_packets; u64_stats_t rx_bytes; u64_stats_t tx_packets; u64_stats_t tx_bytes; u64_stats_t rx_drops; u64_stats_t tx_drops; struct u64_stats_sync syncp; } __aligned(8 * sizeof(u64)); struct pcpu_lstats { u64_stats_t packets; u64_stats_t bytes; struct u64_stats_sync syncp; } __aligned(2 * sizeof(u64)); void dev_lstats_read(struct net_device *dev, u64 *packets, u64 *bytes); static inline void dev_sw_netstats_rx_add(struct net_device *dev, unsigned int len) { struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); u64_stats_add(&tstats->rx_bytes, len); u64_stats_inc(&tstats->rx_packets); u64_stats_update_end(&tstats->syncp); } static inline void dev_sw_netstats_tx_add(struct net_device *dev, unsigned int packets, unsigned int len) { struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); u64_stats_add(&tstats->tx_bytes, len); u64_stats_add(&tstats->tx_packets, packets); u64_stats_update_end(&tstats->syncp); } static inline void dev_lstats_add(struct net_device *dev, unsigned int len) { struct pcpu_lstats *lstats = this_cpu_ptr(dev->lstats); u64_stats_update_begin(&lstats->syncp); u64_stats_add(&lstats->bytes, len); u64_stats_inc(&lstats->packets); u64_stats_update_end(&lstats->syncp); } static inline void dev_dstats_rx_add(struct net_device *dev, unsigned int len) { struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); u64_stats_update_begin(&dstats->syncp); u64_stats_inc(&dstats->rx_packets); u64_stats_add(&dstats->rx_bytes, len); u64_stats_update_end(&dstats->syncp); } static inline void dev_dstats_rx_dropped(struct net_device *dev) { struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); u64_stats_update_begin(&dstats->syncp); u64_stats_inc(&dstats->rx_drops); u64_stats_update_end(&dstats->syncp); } static inline void dev_dstats_tx_add(struct net_device *dev, unsigned int len) { struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); u64_stats_update_begin(&dstats->syncp); u64_stats_inc(&dstats->tx_packets); u64_stats_add(&dstats->tx_bytes, len); u64_stats_update_end(&dstats->syncp); } static inline void dev_dstats_tx_dropped(struct net_device *dev) { struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); u64_stats_update_begin(&dstats->syncp); u64_stats_inc(&dstats->tx_drops); u64_stats_update_end(&dstats->syncp); } #define __netdev_alloc_pcpu_stats(type, gfp) \ ({ \ typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\ if (pcpu_stats) { \ int __cpu; \ for_each_possible_cpu(__cpu) { \ typeof(type) *stat; \ stat = per_cpu_ptr(pcpu_stats, __cpu); \ u64_stats_init(&stat->syncp); \ } \ } \ pcpu_stats; \ }) #define netdev_alloc_pcpu_stats(type) \ __netdev_alloc_pcpu_stats(type, GFP_KERNEL) #define devm_netdev_alloc_pcpu_stats(dev, type) \ ({ \ typeof(type) __percpu *pcpu_stats = devm_alloc_percpu(dev, type);\ if (pcpu_stats) { \ int __cpu; \ for_each_possible_cpu(__cpu) { \ typeof(type) *stat; \ stat = per_cpu_ptr(pcpu_stats, __cpu); \ u64_stats_init(&stat->syncp); \ } \ } \ pcpu_stats; \ }) enum netdev_lag_tx_type { NETDEV_LAG_TX_TYPE_UNKNOWN, NETDEV_LAG_TX_TYPE_RANDOM, NETDEV_LAG_TX_TYPE_BROADCAST, NETDEV_LAG_TX_TYPE_ROUNDROBIN, NETDEV_LAG_TX_TYPE_ACTIVEBACKUP, NETDEV_LAG_TX_TYPE_HASH, }; enum netdev_lag_hash { NETDEV_LAG_HASH_NONE, NETDEV_LAG_HASH_L2, NETDEV_LAG_HASH_L34, NETDEV_LAG_HASH_L23, NETDEV_LAG_HASH_E23, NETDEV_LAG_HASH_E34, NETDEV_LAG_HASH_VLAN_SRCMAC, NETDEV_LAG_HASH_UNKNOWN, }; struct netdev_lag_upper_info { enum netdev_lag_tx_type tx_type; enum netdev_lag_hash hash_type; }; struct netdev_lag_lower_state_info { u8 link_up : 1, tx_enabled : 1; }; #include <linux/notifier.h> /* netdevice notifier chain. Please remember to update netdev_cmd_to_name() * and the rtnetlink notification exclusion list in rtnetlink_event() when * adding new types. */ enum netdev_cmd { NETDEV_UP = 1, /* For now you can't veto a device up/down */ NETDEV_DOWN, NETDEV_REBOOT, /* Tell a protocol stack a network interface detected a hardware crash and restarted - we can use this eg to kick tcp sessions once done */ NETDEV_CHANGE, /* Notify device state change */ NETDEV_REGISTER, NETDEV_UNREGISTER, NETDEV_CHANGEMTU, /* notify after mtu change happened */ NETDEV_CHANGEADDR, /* notify after the address change */ NETDEV_PRE_CHANGEADDR, /* notify before the address change */ NETDEV_GOING_DOWN, NETDEV_CHANGENAME, NETDEV_FEAT_CHANGE, NETDEV_BONDING_FAILOVER, NETDEV_PRE_UP, NETDEV_PRE_TYPE_CHANGE, NETDEV_POST_TYPE_CHANGE, NETDEV_POST_INIT, NETDEV_PRE_UNINIT, NETDEV_RELEASE, NETDEV_NOTIFY_PEERS, NETDEV_JOIN, NETDEV_CHANGEUPPER, NETDEV_RESEND_IGMP, NETDEV_PRECHANGEMTU, /* notify before mtu change happened */ NETDEV_CHANGEINFODATA, NETDEV_BONDING_INFO, NETDEV_PRECHANGEUPPER, NETDEV_CHANGELOWERSTATE, NETDEV_UDP_TUNNEL_PUSH_INFO, NETDEV_UDP_TUNNEL_DROP_INFO, NETDEV_CHANGE_TX_QUEUE_LEN, NETDEV_CVLAN_FILTER_PUSH_INFO, NETDEV_CVLAN_FILTER_DROP_INFO, NETDEV_SVLAN_FILTER_PUSH_INFO, NETDEV_SVLAN_FILTER_DROP_INFO, NETDEV_OFFLOAD_XSTATS_ENABLE, NETDEV_OFFLOAD_XSTATS_DISABLE, NETDEV_OFFLOAD_XSTATS_REPORT_USED, NETDEV_OFFLOAD_XSTATS_REPORT_DELTA, NETDEV_XDP_FEAT_CHANGE, }; const char *netdev_cmd_to_name(enum netdev_cmd cmd); int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb); int unregister_netdevice_notifier_net(struct net *net, struct notifier_block *nb); int register_netdevice_notifier_dev_net(struct net_device *dev, struct notifier_block *nb, struct netdev_net_notifier *nn); int unregister_netdevice_notifier_dev_net(struct net_device *dev, struct notifier_block *nb, struct netdev_net_notifier *nn); struct netdev_notifier_info { struct net_device *dev; struct netlink_ext_ack *extack; }; struct netdev_notifier_info_ext { struct netdev_notifier_info info; /* must be first */ union { u32 mtu; } ext; }; struct netdev_notifier_change_info { struct netdev_notifier_info info; /* must be first */ unsigned int flags_changed; }; struct netdev_notifier_changeupper_info { struct netdev_notifier_info info; /* must be first */ struct net_device *upper_dev; /* new upper dev */ bool master; /* is upper dev master */ bool linking; /* is the notification for link or unlink */ void *upper_info; /* upper dev info */ }; struct netdev_notifier_changelowerstate_info { struct netdev_notifier_info info; /* must be first */ void *lower_state_info; /* is lower dev state */ }; struct netdev_notifier_pre_changeaddr_info { struct netdev_notifier_info info; /* must be first */ const unsigned char *dev_addr; }; enum netdev_offload_xstats_type { NETDEV_OFFLOAD_XSTATS_TYPE_L3 = 1, }; struct netdev_notifier_offload_xstats_info { struct netdev_notifier_info info; /* must be first */ enum netdev_offload_xstats_type type; union { /* NETDEV_OFFLOAD_XSTATS_REPORT_DELTA */ struct netdev_notifier_offload_xstats_rd *report_delta; /* NETDEV_OFFLOAD_XSTATS_REPORT_USED */ struct netdev_notifier_offload_xstats_ru *report_used; }; }; int netdev_offload_xstats_enable(struct net_device *dev, enum netdev_offload_xstats_type type, struct netlink_ext_ack *extack); int netdev_offload_xstats_disable(struct net_device *dev, enum netdev_offload_xstats_type type); bool netdev_offload_xstats_enabled(const struct net_device *dev, enum netdev_offload_xstats_type type); int netdev_offload_xstats_get(struct net_device *dev, enum netdev_offload_xstats_type type, struct rtnl_hw_stats64 *stats, bool *used, struct netlink_ext_ack *extack); void netdev_offload_xstats_report_delta(struct netdev_notifier_offload_xstats_rd *rd, const struct rtnl_hw_stats64 *stats); void netdev_offload_xstats_report_used(struct netdev_notifier_offload_xstats_ru *ru); void netdev_offload_xstats_push_delta(struct net_device *dev, enum netdev_offload_xstats_type type, const struct rtnl_hw_stats64 *stats); static inline void netdev_notifier_info_init(struct netdev_notifier_info *info, struct net_device *dev) { info->dev = dev; info->extack = NULL; } static inline struct net_device * netdev_notifier_info_to_dev(const struct netdev_notifier_info *info) { return info->dev; } static inline struct netlink_ext_ack * netdev_notifier_info_to_extack(const struct netdev_notifier_info *info) { return info->extack; } int call_netdevice_notifiers(unsigned long val, struct net_device *dev); int call_netdevice_notifiers_info(unsigned long val, struct netdev_notifier_info *info); #define for_each_netdev(net, d) \ list_for_each_entry(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_reverse(net, d) \ list_for_each_entry_reverse(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_rcu(net, d) \ list_for_each_entry_rcu(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_safe(net, d, n) \ list_for_each_entry_safe(d, n, &(net)->dev_base_head, dev_list) #define for_each_netdev_continue(net, d) \ list_for_each_entry_continue(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_continue_reverse(net, d) \ list_for_each_entry_continue_reverse(d, &(net)->dev_base_head, \ dev_list) #define for_each_netdev_continue_rcu(net, d) \ list_for_each_entry_continue_rcu(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_in_bond_rcu(bond, slave) \ for_each_netdev_rcu(dev_net_rcu(bond), slave) \ if (netdev_master_upper_dev_get_rcu(slave) == (bond)) #define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) #define for_each_netdev_dump(net, d, ifindex) \ for (; (d = xa_find(&(net)->dev_by_index, &ifindex, \ ULONG_MAX, XA_PRESENT)); ifindex++) static inline struct net_device *next_net_device(struct net_device *dev) { struct list_head *lh; struct net *net; net = dev_net(dev); lh = dev->dev_list.next; return lh == &net->dev_base_head ? NULL : net_device_entry(lh); } static inline struct net_device *next_net_device_rcu(struct net_device *dev) { struct list_head *lh; struct net *net; net = dev_net(dev); lh = rcu_dereference(list_next_rcu(&dev->dev_list)); return lh == &net->dev_base_head ? NULL : net_device_entry(lh); } static inline struct net_device *first_net_device(struct net *net) { return list_empty(&net->dev_base_head) ? NULL : net_device_entry(net->dev_base_head.next); } static inline struct net_device *first_net_device_rcu(struct net *net) { struct list_head *lh = rcu_dereference(list_next_rcu(&net->dev_base_head)); return lh == &net->dev_base_head ? NULL : net_device_entry(lh); } int netdev_boot_setup_check(struct net_device *dev); struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, const char *hwaddr); struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, const char *hwaddr); struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type); void dev_add_pack(struct packet_type *pt); void dev_remove_pack(struct packet_type *pt); void __dev_remove_pack(struct packet_type *pt); void dev_add_offload(struct packet_offload *po); void dev_remove_offload(struct packet_offload *po); int dev_get_iflink(const struct net_device *dev); int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb); int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr, struct net_device_path_stack *stack); struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags, unsigned short mask); struct net_device *dev_get_by_name(struct net *net, const char *name); struct net_device *dev_get_by_name_rcu(struct net *net, const char *name); struct net_device *__dev_get_by_name(struct net *net, const char *name); bool netdev_name_in_use(struct net *net, const char *name); int dev_alloc_name(struct net_device *dev, const char *name); int netif_open(struct net_device *dev, struct netlink_ext_ack *extack); int dev_open(struct net_device *dev, struct netlink_ext_ack *extack); void netif_close(struct net_device *dev); void dev_close(struct net_device *dev); void dev_close_many(struct list_head *head, bool unlink); void netif_disable_lro(struct net_device *dev); void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev); int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id); static inline int dev_queue_xmit(struct sk_buff *skb) { return __dev_queue_xmit(skb, NULL); } static inline int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev) { return __dev_queue_xmit(skb, sb_dev); } static inline int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) { int ret; ret = __dev_direct_xmit(skb, queue_id); if (!dev_xmit_complete(ret)) kfree_skb(skb); return ret; } int register_netdevice(struct net_device *dev); void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); void unregister_netdevice_many(struct list_head *head); static inline void unregister_netdevice(struct net_device *dev) { unregister_netdevice_queue(dev, NULL); } int netdev_refcnt_read(const struct net_device *dev); void free_netdev(struct net_device *dev); struct net_device *netdev_get_xmit_slave(struct net_device *dev, struct sk_buff *skb, bool all_slaves); struct net_device *netdev_sk_get_lowest_dev(struct net_device *dev, struct sock *sk); struct net_device *dev_get_by_index(struct net *net, int ifindex); struct net_device *__dev_get_by_index(struct net *net, int ifindex); struct net_device *netdev_get_by_index(struct net *net, int ifindex, netdevice_tracker *tracker, gfp_t gfp); struct net_device *netdev_get_by_name(struct net *net, const char *name, netdevice_tracker *tracker, gfp_t gfp); struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); void netdev_copy_name(struct net_device *dev, char *name); static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, const void *daddr, const void *saddr, unsigned int len) { if (!dev->header_ops || !dev->header_ops->create) return 0; return dev->header_ops->create(skb, dev, type, daddr, saddr, len); } static inline int dev_parse_header(const struct sk_buff *skb, unsigned char *haddr) { const struct net_device *dev = skb->dev; if (!dev->header_ops || !dev->header_ops->parse) return 0; return dev->header_ops->parse(skb, haddr); } static inline __be16 dev_parse_header_protocol(const struct sk_buff *skb) { const struct net_device *dev = skb->dev; if (!dev->header_ops || !dev->header_ops->parse_protocol) return 0; return dev->header_ops->parse_protocol(skb); } /* ll_header must have at least hard_header_len allocated */ static inline bool dev_validate_header(const struct net_device *dev, char *ll_header, int len) { if (likely(len >= dev->hard_header_len)) return true; if (len < dev->min_header_len) return false; if (capable(CAP_SYS_RAWIO)) { memset(ll_header + len, 0, dev->hard_header_len - len); return true; } if (dev->header_ops && dev->header_ops->validate) return dev->header_ops->validate(ll_header, len); return false; } static inline bool dev_has_header(const struct net_device *dev) { return dev->header_ops && dev->header_ops->create; } /* * Incoming packets are placed on per-CPU queues */ struct softnet_data { struct list_head poll_list; struct sk_buff_head process_queue; local_lock_t process_queue_bh_lock; /* stats */ unsigned int processed; unsigned int time_squeeze; #ifdef CONFIG_RPS struct softnet_data *rps_ipi_list; #endif unsigned int received_rps; bool in_net_rx_action; bool in_napi_threaded_poll; #ifdef CONFIG_NET_FLOW_LIMIT struct sd_flow_limit __rcu *flow_limit; #endif struct Qdisc *output_queue; struct Qdisc **output_queue_tailp; struct sk_buff *completion_queue; #ifdef CONFIG_XFRM_OFFLOAD struct sk_buff_head xfrm_backlog; #endif /* written and read only by owning cpu: */ struct netdev_xmit xmit; #ifdef CONFIG_RPS /* input_queue_head should be written by cpu owning this struct, * and only read by other cpus. Worth using a cache line. */ unsigned int input_queue_head ____cacheline_aligned_in_smp; /* Elements below can be accessed between CPUs for RPS/RFS */ call_single_data_t csd ____cacheline_aligned_in_smp; struct softnet_data *rps_ipi_next; unsigned int cpu; unsigned int input_queue_tail; #endif struct sk_buff_head input_pkt_queue; struct napi_struct backlog; atomic_t dropped ____cacheline_aligned_in_smp; /* Another possibly contended cache line */ spinlock_t defer_lock ____cacheline_aligned_in_smp; int defer_count; int defer_ipi_scheduled; struct sk_buff *defer_list; call_single_data_t defer_csd; }; DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); struct page_pool_bh { struct page_pool *pool; local_lock_t bh_lock; }; DECLARE_PER_CPU(struct page_pool_bh, system_page_pool); #ifndef CONFIG_PREEMPT_RT static inline int dev_recursion_level(void) { return this_cpu_read(softnet_data.xmit.recursion); } #else static inline int dev_recursion_level(void) { return current->net_xmit.recursion; } #endif void __netif_schedule(struct Qdisc *q); void netif_schedule_queue(struct netdev_queue *txq); static inline void netif_tx_schedule_all(struct net_device *dev) { unsigned int i; for (i = 0; i < dev->num_tx_queues; i++) netif_schedule_queue(netdev_get_tx_queue(dev, i)); } static __always_inline void netif_tx_start_queue(struct netdev_queue *dev_queue) { clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); } /** * netif_start_queue - allow transmit * @dev: network device * * Allow upper layers to call the device hard_start_xmit routine. */ static inline void netif_start_queue(struct net_device *dev) { netif_tx_start_queue(netdev_get_tx_queue(dev, 0)); } static inline void netif_tx_start_all_queues(struct net_device *dev) { unsigned int i; for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); netif_tx_start_queue(txq); } } void netif_tx_wake_queue(struct netdev_queue *dev_queue); /** * netif_wake_queue - restart transmit * @dev: network device * * Allow upper layers to call the device hard_start_xmit routine. * Used for flow control when transmit resources are available. */ static inline void netif_wake_queue(struct net_device *dev) { netif_tx_wake_queue(netdev_get_tx_queue(dev, 0)); } static inline void netif_tx_wake_all_queues(struct net_device *dev) { unsigned int i; for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); netif_tx_wake_queue(txq); } } static __always_inline void netif_tx_stop_queue(struct netdev_queue *dev_queue) { /* Paired with READ_ONCE() from dev_watchdog() */ WRITE_ONCE(dev_queue->trans_start, jiffies); /* This barrier is paired with smp_mb() from dev_watchdog() */ smp_mb__before_atomic(); /* Must be an atomic op see netif_txq_try_stop() */ set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); } /** * netif_stop_queue - stop transmitted packets * @dev: network device * * Stop upper layers calling the device hard_start_xmit routine. * Used for flow control when transmit resources are unavailable. */ static inline void netif_stop_queue(struct net_device *dev) { netif_tx_stop_queue(netdev_get_tx_queue(dev, 0)); } void netif_tx_stop_all_queues(struct net_device *dev); static inline bool netif_tx_queue_stopped(const struct netdev_queue *dev_queue) { return test_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state); } /** * netif_queue_stopped - test if transmit queue is flowblocked * @dev: network device * * Test if transmit queue on device is currently unable to send. */ static inline bool netif_queue_stopped(const struct net_device *dev) { return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0)); } static inline bool netif_xmit_stopped(const struct netdev_queue *dev_queue) { return dev_queue->state & QUEUE_STATE_ANY_XOFF; } static inline bool netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_queue) { return dev_queue->state & QUEUE_STATE_ANY_XOFF_OR_FROZEN; } static inline bool netif_xmit_frozen_or_drv_stopped(const struct netdev_queue *dev_queue) { return dev_queue->state & QUEUE_STATE_DRV_XOFF_OR_FROZEN; } /** * netdev_queue_set_dql_min_limit - set dql minimum limit * @dev_queue: pointer to transmit queue * @min_limit: dql minimum limit * * Forces xmit_more() to return true until the minimum threshold * defined by @min_limit is reached (or until the tx queue is * empty). Warning: to be use with care, misuse will impact the * latency. */ static inline void netdev_queue_set_dql_min_limit(struct netdev_queue *dev_queue, unsigned int min_limit) { #ifdef CONFIG_BQL dev_queue->dql.min_limit = min_limit; #endif } static inline int netdev_queue_dql_avail(const struct netdev_queue *txq) { #ifdef CONFIG_BQL /* Non-BQL migrated drivers will return 0, too. */ return dql_avail(&txq->dql); #else return 0; #endif } /** * netdev_txq_bql_enqueue_prefetchw - prefetch bql data for write * @dev_queue: pointer to transmit queue * * BQL enabled drivers might use this helper in their ndo_start_xmit(), * to give appropriate hint to the CPU. */ static inline void netdev_txq_bql_enqueue_prefetchw(struct netdev_queue *dev_queue) { #ifdef CONFIG_BQL prefetchw(&dev_queue->dql.num_queued); #endif } /** * netdev_txq_bql_complete_prefetchw - prefetch bql data for write * @dev_queue: pointer to transmit queue * * BQL enabled drivers might use this helper in their TX completion path, * to give appropriate hint to the CPU. */ static inline void netdev_txq_bql_complete_prefetchw(struct netdev_queue *dev_queue) { #ifdef CONFIG_BQL prefetchw(&dev_queue->dql.limit); #endif } /** * netdev_tx_sent_queue - report the number of bytes queued to a given tx queue * @dev_queue: network device queue * @bytes: number of bytes queued to the device queue * * Report the number of bytes queued for sending/completion to the network * device hardware queue. @bytes should be a good approximation and should * exactly match netdev_completed_queue() @bytes. * This is typically called once per packet, from ndo_start_xmit(). */ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue, unsigned int bytes) { #ifdef CONFIG_BQL dql_queued(&dev_queue->dql, bytes); if (likely(dql_avail(&dev_queue->dql) >= 0)) return; /* Paired with READ_ONCE() from dev_watchdog() */ WRITE_ONCE(dev_queue->trans_start, jiffies); /* This barrier is paired with smp_mb() from dev_watchdog() */ smp_mb__before_atomic(); set_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state); /* * The XOFF flag must be set before checking the dql_avail below, * because in netdev_tx_completed_queue we update the dql_completed * before checking the XOFF flag. */ smp_mb__after_atomic(); /* check again in case another CPU has just made room avail */ if (unlikely(dql_avail(&dev_queue->dql) >= 0)) clear_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state); #endif } /* Variant of netdev_tx_sent_queue() for drivers that are aware * that they should not test BQL status themselves. * We do want to change __QUEUE_STATE_STACK_XOFF only for the last * skb of a batch. * Returns true if the doorbell must be used to kick the NIC. */ static inline bool __netdev_tx_sent_queue(struct netdev_queue *dev_queue, unsigned int bytes, bool xmit_more) { if (xmit_more) { #ifdef CONFIG_BQL dql_queued(&dev_queue->dql, bytes); #endif return netif_tx_queue_stopped(dev_queue); } netdev_tx_sent_queue(dev_queue, bytes); return true; } /** * netdev_sent_queue - report the number of bytes queued to hardware * @dev: network device * @bytes: number of bytes queued to the hardware device queue * * Report the number of bytes queued for sending/completion to the network * device hardware queue#0. @bytes should be a good approximation and should * exactly match netdev_completed_queue() @bytes. * This is typically called once per packet, from ndo_start_xmit(). */ static inline void netdev_sent_queue(struct net_device *dev, unsigned int bytes) { netdev_tx_sent_queue(netdev_get_tx_queue(dev, 0), bytes); } static inline bool __netdev_sent_queue(struct net_device *dev, unsigned int bytes, bool xmit_more) { return __netdev_tx_sent_queue(netdev_get_tx_queue(dev, 0), bytes, xmit_more); } /** * netdev_tx_completed_queue - report number of packets/bytes at TX completion. * @dev_queue: network device queue * @pkts: number of packets (currently ignored) * @bytes: number of bytes dequeued from the device queue * * Must be called at most once per TX completion round (and not per * individual packet), so that BQL can adjust its limits appropriately. */ static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue, unsigned int pkts, unsigned int bytes) { #ifdef CONFIG_BQL if (unlikely(!bytes)) return; dql_completed(&dev_queue->dql, bytes); /* * Without the memory barrier there is a small possibility that * netdev_tx_sent_queue will miss the update and cause the queue to * be stopped forever */ smp_mb(); /* NOTE: netdev_txq_completed_mb() assumes this exists */ if (unlikely(dql_avail(&dev_queue->dql) < 0)) return; if (test_and_clear_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state)) netif_schedule_queue(dev_queue); #endif } /** * netdev_completed_queue - report bytes and packets completed by device * @dev: network device * @pkts: actual number of packets sent over the medium * @bytes: actual number of bytes sent over the medium * * Report the number of bytes and packets transmitted by the network device * hardware queue over the physical medium, @bytes must exactly match the * @bytes amount passed to netdev_sent_queue() */ static inline void netdev_completed_queue(struct net_device *dev, unsigned int pkts, unsigned int bytes) { netdev_tx_completed_queue(netdev_get_tx_queue(dev, 0), pkts, bytes); } static inline void netdev_tx_reset_queue(struct netdev_queue *q) { #ifdef CONFIG_BQL clear_bit(__QUEUE_STATE_STACK_XOFF, &q->state); dql_reset(&q->dql); #endif } /** * netdev_tx_reset_subqueue - reset the BQL stats and state of a netdev queue * @dev: network device * @qid: stack index of the queue to reset */ static inline void netdev_tx_reset_subqueue(const struct net_device *dev, u32 qid) { netdev_tx_reset_queue(netdev_get_tx_queue(dev, qid)); } /** * netdev_reset_queue - reset the packets and bytes count of a network device * @dev_queue: network device * * Reset the bytes and packet count of a network device and clear the * software flow control OFF bit for this network device */ static inline void netdev_reset_queue(struct net_device *dev_queue) { netdev_tx_reset_subqueue(dev_queue, 0); } /** * netdev_cap_txqueue - check if selected tx queue exceeds device queues * @dev: network device * @queue_index: given tx queue index * * Returns 0 if given tx queue index >= number of device tx queues, * otherwise returns the originally passed tx queue index. */ static inline u16 netdev_cap_txqueue(struct net_device *dev, u16 queue_index) { if (unlikely(queue_index >= dev->real_num_tx_queues)) { net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n", dev->name, queue_index, dev->real_num_tx_queues); return 0; } return queue_index; } /** * netif_running - test if up * @dev: network device * * Test if the device has been brought up. */ static inline bool netif_running(const struct net_device *dev) { return test_bit(__LINK_STATE_START, &dev->state); } /* * Routines to manage the subqueues on a device. We only need start, * stop, and a check if it's stopped. All other device management is * done at the overall netdevice level. * Also test the device if we're multiqueue. */ /** * netif_start_subqueue - allow sending packets on subqueue * @dev: network device * @queue_index: sub queue index * * Start individual transmit queue of a device with multiple transmit queues. */ static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index) { struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); netif_tx_start_queue(txq); } /** * netif_stop_subqueue - stop sending packets on subqueue * @dev: network device * @queue_index: sub queue index * * Stop individual transmit queue of a device with multiple transmit queues. */ static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index) { struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); netif_tx_stop_queue(txq); } /** * __netif_subqueue_stopped - test status of subqueue * @dev: network device * @queue_index: sub queue index * * Check individual transmit queue of a device with multiple transmit queues. */ static inline bool __netif_subqueue_stopped(const struct net_device *dev, u16 queue_index) { struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); return netif_tx_queue_stopped(txq); } /** * netif_subqueue_stopped - test status of subqueue * @dev: network device * @skb: sub queue buffer pointer * * Check individual transmit queue of a device with multiple transmit queues. */ static inline bool netif_subqueue_stopped(const struct net_device *dev, struct sk_buff *skb) { return __netif_subqueue_stopped(dev, skb_get_queue_mapping(skb)); } /** * netif_wake_subqueue - allow sending packets on subqueue * @dev: network device * @queue_index: sub queue index * * Resume individual transmit queue of a device with multiple transmit queues. */ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) { struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index); netif_tx_wake_queue(txq); } #ifdef CONFIG_XPS int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, u16 index); int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, u16 index, enum xps_map_type type); /** * netif_attr_test_mask - Test a CPU or Rx queue set in a mask * @j: CPU/Rx queue index * @mask: bitmask of all cpus/rx queues * @nr_bits: number of bits in the bitmask * * Test if a CPU or Rx queue index is set in a mask of all CPU/Rx queues. */ static inline bool netif_attr_test_mask(unsigned long j, const unsigned long *mask, unsigned int nr_bits) { cpu_max_bits_warn(j, nr_bits); return test_bit(j, mask); } /** * netif_attr_test_online - Test for online CPU/Rx queue * @j: CPU/Rx queue index * @online_mask: bitmask for CPUs/Rx queues that are online * @nr_bits: number of bits in the bitmask * * Returns: true if a CPU/Rx queue is online. */ static inline bool netif_attr_test_online(unsigned long j, const unsigned long *online_mask, unsigned int nr_bits) { cpu_max_bits_warn(j, nr_bits); if (online_mask) return test_bit(j, online_mask); return (j < nr_bits); } /** * netif_attrmask_next - get the next CPU/Rx queue in a cpu/Rx queues mask * @n: CPU/Rx queue index * @srcp: the cpumask/Rx queue mask pointer * @nr_bits: number of bits in the bitmask * * Returns: next (after n) CPU/Rx queue index in the mask; * >= nr_bits if no further CPUs/Rx queues set. */ static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp, unsigned int nr_bits) { /* -1 is a legal arg here. */ if (n != -1) cpu_max_bits_warn(n, nr_bits); if (srcp) return find_next_bit(srcp, nr_bits, n + 1); return n + 1; } /** * netif_attrmask_next_and - get the next CPU/Rx queue in \*src1p & \*src2p * @n: CPU/Rx queue index * @src1p: the first CPUs/Rx queues mask pointer * @src2p: the second CPUs/Rx queues mask pointer * @nr_bits: number of bits in the bitmask * * Returns: next (after n) CPU/Rx queue index set in both masks; * >= nr_bits if no further CPUs/Rx queues set in both. */ static inline int netif_attrmask_next_and(int n, const unsigned long *src1p, const unsigned long *src2p, unsigned int nr_bits) { /* -1 is a legal arg here. */ if (n != -1) cpu_max_bits_warn(n, nr_bits); if (src1p && src2p) return find_next_and_bit(src1p, src2p, nr_bits, n + 1); else if (src1p) return find_next_bit(src1p, nr_bits, n + 1); else if (src2p) return find_next_bit(src2p, nr_bits, n + 1); return n + 1; } #else static inline int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, u16 index) { return 0; } static inline int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, u16 index, enum xps_map_type type) { return 0; } #endif /** * netif_is_multiqueue - test if device has multiple transmit queues * @dev: network device * * Check if device has multiple transmit queues */ static inline bool netif_is_multiqueue(const struct net_device *dev) { return dev->num_tx_queues > 1; } int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq); int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq); int netif_set_real_num_queues(struct net_device *dev, unsigned int txq, unsigned int rxq); int netif_get_num_default_rss_queues(void); void dev_kfree_skb_irq_reason(struct sk_buff *skb, enum skb_drop_reason reason); void dev_kfree_skb_any_reason(struct sk_buff *skb, enum skb_drop_reason reason); /* * It is not allowed to call kfree_skb() or consume_skb() from hardware * interrupt context or with hardware interrupts being disabled. * (in_hardirq() || irqs_disabled()) * * We provide four helpers that can be used in following contexts : * * dev_kfree_skb_irq(skb) when caller drops a packet from irq context, * replacing kfree_skb(skb) * * dev_consume_skb_irq(skb) when caller consumes a packet from irq context. * Typically used in place of consume_skb(skb) in TX completion path * * dev_kfree_skb_any(skb) when caller doesn't know its current irq context, * replacing kfree_skb(skb) * * dev_consume_skb_any(skb) when caller doesn't know its current irq context, * and consumed a packet. Used in place of consume_skb(skb) */ static inline void dev_kfree_skb_irq(struct sk_buff *skb) { dev_kfree_skb_irq_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED); } static inline void dev_consume_skb_irq(struct sk_buff *skb) { dev_kfree_skb_irq_reason(skb, SKB_CONSUMED); } static inline void dev_kfree_skb_any(struct sk_buff *skb) { dev_kfree_skb_any_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED); } static inline void dev_consume_skb_any(struct sk_buff *skb) { dev_kfree_skb_any_reason(skb, SKB_CONSUMED); } u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, const struct bpf_prog *xdp_prog); void generic_xdp_tx(struct sk_buff *skb, const struct bpf_prog *xdp_prog); int do_xdp_generic(const struct bpf_prog *xdp_prog, struct sk_buff **pskb); int netif_rx(struct sk_buff *skb); int __netif_rx(struct sk_buff *skb); int netif_receive_skb(struct sk_buff *skb); int netif_receive_skb_core(struct sk_buff *skb); void netif_receive_skb_list_internal(struct list_head *head); void netif_receive_skb_list(struct list_head *head); gro_result_t gro_receive_skb(struct gro_node *gro, struct sk_buff *skb); static inline gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { return gro_receive_skb(&napi->gro, skb); } struct sk_buff *napi_get_frags(struct napi_struct *napi); gro_result_t napi_gro_frags(struct napi_struct *napi); static inline void napi_free_frags(struct napi_struct *napi) { kfree_skb(napi->skb); napi->skb = NULL; } bool netdev_is_rx_handler_busy(struct net_device *dev); int netdev_rx_handler_register(struct net_device *dev, rx_handler_func_t *rx_handler, void *rx_handler_data); void netdev_rx_handler_unregister(struct net_device *dev); bool dev_valid_name(const char *name); static inline bool is_socket_ioctl_cmd(unsigned int cmd) { return _IOC_TYPE(cmd) == SOCK_IOC_TYPE; } int get_user_ifreq(struct ifreq *ifr, void __user **ifrdata, void __user *arg); int put_user_ifreq(struct ifreq *ifr, void __user *arg); int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, void __user *data, bool *need_copyout); int dev_ifconf(struct net *net, struct ifconf __user *ifc); int dev_eth_ioctl(struct net_device *dev, struct ifreq *ifr, unsigned int cmd); int generic_hwtstamp_get_lower(struct net_device *dev, struct kernel_hwtstamp_config *kernel_cfg); int generic_hwtstamp_set_lower(struct net_device *dev, struct kernel_hwtstamp_config *kernel_cfg, struct netlink_ext_ack *extack); int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata); unsigned int dev_get_flags(const struct net_device *); int __dev_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack); int netif_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack); int dev_change_flags(struct net_device *dev, unsigned int flags, struct netlink_ext_ack *extack); int netif_set_alias(struct net_device *dev, const char *alias, size_t len); int dev_set_alias(struct net_device *, const char *, size_t); int dev_get_alias(const struct net_device *, char *, size_t); int __dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat, int new_ifindex, struct netlink_ext_ack *extack); int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat); int __dev_set_mtu(struct net_device *, int); int netif_set_mtu(struct net_device *dev, int new_mtu); int dev_set_mtu(struct net_device *, int); int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, struct netlink_ext_ack *extack); int netif_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, struct netlink_ext_ack *extack); int dev_set_mac_address(struct net_device *dev, struct sockaddr_storage *ss, struct netlink_ext_ack *extack); int dev_set_mac_address_user(struct net_device *dev, struct sockaddr_storage *ss, struct netlink_ext_ack *extack); int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); int dev_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid, bool recurse); bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); u8 dev_xdp_prog_count(struct net_device *dev); int netif_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf); int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf); u8 dev_xdp_sb_prog_count(struct net_device *dev); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); u32 dev_get_min_mp_channel_count(const struct net_device *dev); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb_nomtu(struct net_device *dev, struct sk_buff *skb); bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb); static __always_inline bool __is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb, const bool check_mtu) { const u32 vlan_hdr_len = 4; /* VLAN_HLEN */ unsigned int len; if (!(dev->flags & IFF_UP)) return false; if (!check_mtu) return true; len = dev->mtu + dev->hard_header_len + vlan_hdr_len; if (skb->len <= len) return true; /* if TSO is enabled, we don't care about the length as the packet * could be forwarded without being segmented before */ if (skb_is_gso(skb)) return true; return false; } void netdev_core_stats_inc(struct net_device *dev, u32 offset); #define DEV_CORE_STATS_INC(FIELD) \ static inline void dev_core_stats_##FIELD##_inc(struct net_device *dev) \ { \ netdev_core_stats_inc(dev, \ offsetof(struct net_device_core_stats, FIELD)); \ } DEV_CORE_STATS_INC(rx_dropped) DEV_CORE_STATS_INC(tx_dropped) DEV_CORE_STATS_INC(rx_nohandler) DEV_CORE_STATS_INC(rx_otherhost_dropped) #undef DEV_CORE_STATS_INC static __always_inline int ____dev_forward_skb(struct net_device *dev, struct sk_buff *skb, const bool check_mtu) { if (skb_orphan_frags(skb, GFP_ATOMIC) || unlikely(!__is_skb_forwardable(dev, skb, check_mtu))) { dev_core_stats_rx_dropped_inc(dev); kfree_skb(skb); return NET_RX_DROP; } skb_scrub_packet(skb, !net_eq(dev_net(dev), dev_net(skb->dev))); skb->priority = 0; return 0; } bool dev_nit_active_rcu(const struct net_device *dev); static inline bool dev_nit_active(const struct net_device *dev) { bool ret; rcu_read_lock(); ret = dev_nit_active_rcu(dev); rcu_read_unlock(); return ret; } void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); static inline void __dev_put(struct net_device *dev) { if (dev) { #ifdef CONFIG_PCPU_DEV_REFCNT this_cpu_dec(*dev->pcpu_refcnt); #else refcount_dec(&dev->dev_refcnt); #endif } } static inline void __dev_hold(struct net_device *dev) { if (dev) { #ifdef CONFIG_PCPU_DEV_REFCNT this_cpu_inc(*dev->pcpu_refcnt); #else refcount_inc(&dev->dev_refcnt); #endif } } static inline void __netdev_tracker_alloc(struct net_device *dev, netdevice_tracker *tracker, gfp_t gfp) { #ifdef CONFIG_NET_DEV_REFCNT_TRACKER ref_tracker_alloc(&dev->refcnt_tracker, tracker, gfp); #endif } /* netdev_tracker_alloc() can upgrade a prior untracked reference * taken by dev_get_by_name()/dev_get_by_index() to a tracked one. */ static inline void netdev_tracker_alloc(struct net_device *dev, netdevice_tracker *tracker, gfp_t gfp) { #ifdef CONFIG_NET_DEV_REFCNT_TRACKER refcount_dec(&dev->refcnt_tracker.no_tracker); __netdev_tracker_alloc(dev, tracker, gfp); #endif } static inline void netdev_tracker_free(struct net_device *dev, netdevice_tracker *tracker) { #ifdef CONFIG_NET_DEV_REFCNT_TRACKER ref_tracker_free(&dev->refcnt_tracker, tracker); #endif } static inline void netdev_hold(struct net_device *dev, netdevice_tracker *tracker, gfp_t gfp) { if (dev) { __dev_hold(dev); __netdev_tracker_alloc(dev, tracker, gfp); } } static inline void netdev_put(struct net_device *dev, netdevice_tracker *tracker) { if (dev) { netdev_tracker_free(dev, tracker); __dev_put(dev); } } /** * dev_hold - get reference to device * @dev: network device * * Hold reference to device to keep it from being freed. * Try using netdev_hold() instead. */ static inline void dev_hold(struct net_device *dev) { netdev_hold(dev, NULL, GFP_ATOMIC); } /** * dev_put - release reference to device * @dev: network device * * Release reference to device to allow it to be freed. * Try using netdev_put() instead. */ static inline void dev_put(struct net_device *dev) { netdev_put(dev, NULL); } DEFINE_FREE(dev_put, struct net_device *, if (_T) dev_put(_T)) static inline void netdev_ref_replace(struct net_device *odev, struct net_device *ndev, netdevice_tracker *tracker, gfp_t gfp) { if (odev) netdev_tracker_free(odev, tracker); __dev_hold(ndev); __dev_put(odev); if (ndev) __netdev_tracker_alloc(ndev, tracker, gfp); } /* Carrier loss detection, dial on demand. The functions netif_carrier_on * and _off may be called from IRQ context, but it is caller * who is responsible for serialization of these calls. * * The name carrier is inappropriate, these functions should really be * called netif_lowerlayer_*() because they represent the state of any * kind of lower layer not just hardware media. */ void linkwatch_fire_event(struct net_device *dev); /** * linkwatch_sync_dev - sync linkwatch for the given device * @dev: network device to sync linkwatch for * * Sync linkwatch for the given device, removing it from the * pending work list (if queued). */ void linkwatch_sync_dev(struct net_device *dev); void __linkwatch_sync_dev(struct net_device *dev); /** * netif_carrier_ok - test if carrier present * @dev: network device * * Check if carrier is present on device */ static inline bool netif_carrier_ok(const struct net_device *dev) { return !test_bit(__LINK_STATE_NOCARRIER, &dev->state); } unsigned long dev_trans_start(struct net_device *dev); void netdev_watchdog_up(struct net_device *dev); void netif_carrier_on(struct net_device *dev); void netif_carrier_off(struct net_device *dev); void netif_carrier_event(struct net_device *dev); /** * netif_dormant_on - mark device as dormant. * @dev: network device * * Mark device as dormant (as per RFC2863). * * The dormant state indicates that the relevant interface is not * actually in a condition to pass packets (i.e., it is not 'up') but is * in a "pending" state, waiting for some external event. For "on- * demand" interfaces, this new state identifies the situation where the * interface is waiting for events to place it in the up state. */ static inline void netif_dormant_on(struct net_device *dev) { if (!test_and_set_bit(__LINK_STATE_DORMANT, &dev->state)) linkwatch_fire_event(dev); } /** * netif_dormant_off - set device as not dormant. * @dev: network device * * Device is not in dormant state. */ static inline void netif_dormant_off(struct net_device *dev) { if (test_and_clear_bit(__LINK_STATE_DORMANT, &dev->state)) linkwatch_fire_event(dev); } /** * netif_dormant - test if device is dormant * @dev: network device * * Check if device is dormant. */ static inline bool netif_dormant(const struct net_device *dev) { return test_bit(__LINK_STATE_DORMANT, &dev->state); } /** * netif_testing_on - mark device as under test. * @dev: network device * * Mark device as under test (as per RFC2863). * * The testing state indicates that some test(s) must be performed on * the interface. After completion, of the test, the interface state * will change to up, dormant, or down, as appropriate. */ static inline void netif_testing_on(struct net_device *dev) { if (!test_and_set_bit(__LINK_STATE_TESTING, &dev->state)) linkwatch_fire_event(dev); } /** * netif_testing_off - set device as not under test. * @dev: network device * * Device is not in testing state. */ static inline void netif_testing_off(struct net_device *dev) { if (test_and_clear_bit(__LINK_STATE_TESTING, &dev->state)) linkwatch_fire_event(dev); } /** * netif_testing - test if device is under test * @dev: network device * * Check if device is under test */ static inline bool netif_testing(const struct net_device *dev) { return test_bit(__LINK_STATE_TESTING, &dev->state); } /** * netif_oper_up - test if device is operational * @dev: network device * * Check if carrier is operational */ static inline bool netif_oper_up(const struct net_device *dev) { unsigned int operstate = READ_ONCE(dev->operstate); return operstate == IF_OPER_UP || operstate == IF_OPER_UNKNOWN /* backward compat */; } /** * netif_device_present - is device available or removed * @dev: network device * * Check if device has not been removed from system. */ static inline bool netif_device_present(const struct net_device *dev) { return test_bit(__LINK_STATE_PRESENT, &dev->state); } void netif_device_detach(struct net_device *dev); void netif_device_attach(struct net_device *dev); /* * Network interface message level settings */ enum { NETIF_MSG_DRV_BIT, NETIF_MSG_PROBE_BIT, NETIF_MSG_LINK_BIT, NETIF_MSG_TIMER_BIT, NETIF_MSG_IFDOWN_BIT, NETIF_MSG_IFUP_BIT, NETIF_MSG_RX_ERR_BIT, NETIF_MSG_TX_ERR_BIT, NETIF_MSG_TX_QUEUED_BIT, NETIF_MSG_INTR_BIT, NETIF_MSG_TX_DONE_BIT, NETIF_MSG_RX_STATUS_BIT, NETIF_MSG_PKTDATA_BIT, NETIF_MSG_HW_BIT, NETIF_MSG_WOL_BIT, /* When you add a new bit above, update netif_msg_class_names array * in net/ethtool/common.c */ NETIF_MSG_CLASS_COUNT, }; /* Both ethtool_ops interface and internal driver implementation use u32 */ static_assert(NETIF_MSG_CLASS_COUNT <= 32); #define __NETIF_MSG_BIT(bit) ((u32)1 << (bit)) #define __NETIF_MSG(name) __NETIF_MSG_BIT(NETIF_MSG_ ## name ## _BIT) #define NETIF_MSG_DRV __NETIF_MSG(DRV) #define NETIF_MSG_PROBE __NETIF_MSG(PROBE) #define NETIF_MSG_LINK __NETIF_MSG(LINK) #define NETIF_MSG_TIMER __NETIF_MSG(TIMER) #define NETIF_MSG_IFDOWN __NETIF_MSG(IFDOWN) #define NETIF_MSG_IFUP __NETIF_MSG(IFUP) #define NETIF_MSG_RX_ERR __NETIF_MSG(RX_ERR) #define NETIF_MSG_TX_ERR __NETIF_MSG(TX_ERR) #define NETIF_MSG_TX_QUEUED __NETIF_MSG(TX_QUEUED) #define NETIF_MSG_INTR __NETIF_MSG(INTR) #define NETIF_MSG_TX_DONE __NETIF_MSG(TX_DONE) #define NETIF_MSG_RX_STATUS __NETIF_MSG(RX_STATUS) #define NETIF_MSG_PKTDATA __NETIF_MSG(PKTDATA) #define NETIF_MSG_HW __NETIF_MSG(HW) #define NETIF_MSG_WOL __NETIF_MSG(WOL) #define netif_msg_drv(p) ((p)->msg_enable & NETIF_MSG_DRV) #define netif_msg_probe(p) ((p)->msg_enable & NETIF_MSG_PROBE) #define netif_msg_link(p) ((p)->msg_enable & NETIF_MSG_LINK) #define netif_msg_timer(p) ((p)->msg_enable & NETIF_MSG_TIMER) #define netif_msg_ifdown(p) ((p)->msg_enable & NETIF_MSG_IFDOWN) #define netif_msg_ifup(p) ((p)->msg_enable & NETIF_MSG_IFUP) #define netif_msg_rx_err(p) ((p)->msg_enable & NETIF_MSG_RX_ERR) #define netif_msg_tx_err(p) ((p)->msg_enable & NETIF_MSG_TX_ERR) #define netif_msg_tx_queued(p) ((p)->msg_enable & NETIF_MSG_TX_QUEUED) #define netif_msg_intr(p) ((p)->msg_enable & NETIF_MSG_INTR) #define netif_msg_tx_done(p) ((p)->msg_enable & NETIF_MSG_TX_DONE) #define netif_msg_rx_status(p) ((p)->msg_enable & NETIF_MSG_RX_STATUS) #define netif_msg_pktdata(p) ((p)->msg_enable & NETIF_MSG_PKTDATA) #define netif_msg_hw(p) ((p)->msg_enable & NETIF_MSG_HW) #define netif_msg_wol(p) ((p)->msg_enable & NETIF_MSG_WOL) static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) { /* use default */ if (debug_value < 0 || debug_value >= (sizeof(u32) * 8)) return default_msg_enable_bits; if (debug_value == 0) /* no output */ return 0; /* set low N bits */ return (1U << debug_value) - 1; } static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) { spin_lock(&txq->_xmit_lock); /* Pairs with READ_ONCE() in __dev_queue_xmit() */ WRITE_ONCE(txq->xmit_lock_owner, cpu); } static inline bool __netif_tx_acquire(struct netdev_queue *txq) { __acquire(&txq->_xmit_lock); return true; } static inline void __netif_tx_release(struct netdev_queue *txq) { __release(&txq->_xmit_lock); } static inline void __netif_tx_lock_bh(struct netdev_queue *txq) { spin_lock_bh(&txq->_xmit_lock); /* Pairs with READ_ONCE() in __dev_queue_xmit() */ WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); } static inline bool __netif_tx_trylock(struct netdev_queue *txq) { bool ok = spin_trylock(&txq->_xmit_lock); if (likely(ok)) { /* Pairs with READ_ONCE() in __dev_queue_xmit() */ WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); } return ok; } static inline void __netif_tx_unlock(struct netdev_queue *txq) { /* Pairs with READ_ONCE() in __dev_queue_xmit() */ WRITE_ONCE(txq->xmit_lock_owner, -1); spin_unlock(&txq->_xmit_lock); } static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) { /* Pairs with READ_ONCE() in __dev_queue_xmit() */ WRITE_ONCE(txq->xmit_lock_owner, -1); spin_unlock_bh(&txq->_xmit_lock); } /* * txq->trans_start can be read locklessly from dev_watchdog() */ static inline void txq_trans_update(const struct net_device *dev, struct netdev_queue *txq) { if (!dev->lltx) WRITE_ONCE(txq->trans_start, jiffies); } static inline void txq_trans_cond_update(struct netdev_queue *txq) { unsigned long now = jiffies; if (READ_ONCE(txq->trans_start) != now) WRITE_ONCE(txq->trans_start, now); } /* legacy drivers only, netdev_start_xmit() sets txq->trans_start */ static inline void netif_trans_update(struct net_device *dev) { struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); txq_trans_cond_update(txq); } /** * netif_tx_lock - grab network device transmit lock * @dev: network device * * Get network device transmit lock */ void netif_tx_lock(struct net_device *dev); static inline void netif_tx_lock_bh(struct net_device *dev) { local_bh_disable(); netif_tx_lock(dev); } void netif_tx_unlock(struct net_device *dev); static inline void netif_tx_unlock_bh(struct net_device *dev) { netif_tx_unlock(dev); local_bh_enable(); } #define HARD_TX_LOCK(dev, txq, cpu) { \ if (!(dev)->lltx) { \ __netif_tx_lock(txq, cpu); \ } else { \ __netif_tx_acquire(txq); \ } \ } #define HARD_TX_TRYLOCK(dev, txq) \ (!(dev)->lltx ? \ __netif_tx_trylock(txq) : \ __netif_tx_acquire(txq)) #define HARD_TX_UNLOCK(dev, txq) { \ if (!(dev)->lltx) { \ __netif_tx_unlock(txq); \ } else { \ __netif_tx_release(txq); \ } \ } static inline void netif_tx_disable(struct net_device *dev) { unsigned int i; int cpu; local_bh_disable(); cpu = smp_processor_id(); spin_lock(&dev->tx_global_lock); for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); __netif_tx_lock(txq, cpu); netif_tx_stop_queue(txq); __netif_tx_unlock(txq); } spin_unlock(&dev->tx_global_lock); local_bh_enable(); } static inline void netif_addr_lock(struct net_device *dev) { unsigned char nest_level = 0; #ifdef CONFIG_LOCKDEP nest_level = dev->nested_level; #endif spin_lock_nested(&dev->addr_list_lock, nest_level); } static inline void netif_addr_lock_bh(struct net_device *dev) { unsigned char nest_level = 0; #ifdef CONFIG_LOCKDEP nest_level = dev->nested_level; #endif local_bh_disable(); spin_lock_nested(&dev->addr_list_lock, nest_level); } static inline void netif_addr_unlock(struct net_device *dev) { spin_unlock(&dev->addr_list_lock); } static inline void netif_addr_unlock_bh(struct net_device *dev) { spin_unlock_bh(&dev->addr_list_lock); } /* * dev_addrs walker. Should be used only for read access. Call with * rcu_read_lock held. */ #define for_each_dev_addr(dev, ha) \ list_for_each_entry_rcu(ha, &dev->dev_addrs.list, list) /* These functions live elsewhere (drivers/net/net_init.c, but related) */ void ether_setup(struct net_device *dev); /* Allocate dummy net_device */ struct net_device *alloc_netdev_dummy(int sizeof_priv); /* Support for loadable net-drivers */ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, unsigned char name_assign_type, void (*setup)(struct net_device *), unsigned int txqs, unsigned int rxqs); #define alloc_netdev(sizeof_priv, name, name_assign_type, setup) \ alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, 1, 1) #define alloc_netdev_mq(sizeof_priv, name, name_assign_type, setup, count) \ alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, count, \ count) int register_netdev(struct net_device *dev); void unregister_netdev(struct net_device *dev); int devm_register_netdev(struct device *dev, struct net_device *ndev); /* General hardware address lists handling functions */ int __hw_addr_sync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len); int __hw_addr_sync_multiple(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len); void __hw_addr_unsync(struct netdev_hw_addr_list *to_list, struct netdev_hw_addr_list *from_list, int addr_len); int __hw_addr_sync_dev(struct netdev_hw_addr_list *list, struct net_device *dev, int (*sync)(struct net_device *, const unsigned char *), int (*unsync)(struct net_device *, const unsigned char *)); int __hw_addr_ref_sync_dev(struct netdev_hw_addr_list *list, struct net_device *dev, int (*sync)(struct net_device *, const unsigned char *, int), int (*unsync)(struct net_device *, const unsigned char *, int)); void __hw_addr_ref_unsync_dev(struct netdev_hw_addr_list *list, struct net_device *dev, int (*unsync)(struct net_device *, const unsigned char *, int)); void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list, struct net_device *dev, int (*unsync)(struct net_device *, const unsigned char *)); void __hw_addr_init(struct netdev_hw_addr_list *list); /* Functions used for device addresses handling */ void dev_addr_mod(struct net_device *dev, unsigned int offset, const void *addr, size_t len); static inline void __dev_addr_set(struct net_device *dev, const void *addr, size_t len) { dev_addr_mod(dev, 0, addr, len); } static inline void dev_addr_set(struct net_device *dev, const u8 *addr) { __dev_addr_set(dev, addr, dev->addr_len); } int dev_addr_add(struct net_device *dev, const unsigned char *addr, unsigned char addr_type); int dev_addr_del(struct net_device *dev, const unsigned char *addr, unsigned char addr_type); /* Functions used for unicast addresses handling */ int dev_uc_add(struct net_device *dev, const unsigned char *addr); int dev_uc_add_excl(struct net_device *dev, const unsigned char *addr); int dev_uc_del(struct net_device *dev, const unsigned char *addr); int dev_uc_sync(struct net_device *to, struct net_device *from); int dev_uc_sync_multiple(struct net_device *to, struct net_device *from); void dev_uc_unsync(struct net_device *to, struct net_device *from); void dev_uc_flush(struct net_device *dev); void dev_uc_init(struct net_device *dev); /** * __dev_uc_sync - Synchronize device's unicast list * @dev: device to sync * @sync: function to call if address should be added * @unsync: function to call if address should be removed * * Add newly added addresses to the interface, and release * addresses that have been deleted. */ static inline int __dev_uc_sync(struct net_device *dev, int (*sync)(struct net_device *, const unsigned char *), int (*unsync)(struct net_device *, const unsigned char *)) { return __hw_addr_sync_dev(&dev->uc, dev, sync, unsync); } /** * __dev_uc_unsync - Remove synchronized addresses from device * @dev: device to sync * @unsync: function to call if address should be removed * * Remove all addresses that were added to the device by dev_uc_sync(). */ static inline void __dev_uc_unsync(struct net_device *dev, int (*unsync)(struct net_device *, const unsigned char *)) { __hw_addr_unsync_dev(&dev->uc, dev, unsync); } /* Functions used for multicast addresses handling */ int dev_mc_add(struct net_device *dev, const unsigned char *addr); int dev_mc_add_global(struct net_device *dev, const unsigned char *addr); int dev_mc_add_excl(struct net_device *dev, const unsigned char *addr); int dev_mc_del(struct net_device *dev, const unsigned char *addr); int dev_mc_del_global(struct net_device *dev, const unsigned char *addr); int dev_mc_sync(struct net_device *to, struct net_device *from); int dev_mc_sync_multiple(struct net_device *to, struct net_device *from); void dev_mc_unsync(struct net_device *to, struct net_device *from); void dev_mc_flush(struct net_device *dev); void dev_mc_init(struct net_device *dev); /** * __dev_mc_sync - Synchronize device's multicast list * @dev: device to sync * @sync: function to call if address should be added * @unsync: function to call if address should be removed * * Add newly added addresses to the interface, and release * addresses that have been deleted. */ static inline int __dev_mc_sync(struct net_device *dev, int (*sync)(struct net_device *, const unsigned char *), int (*unsync)(struct net_device *, const unsigned char *)) { return __hw_addr_sync_dev(&dev->mc, dev, sync, unsync); } /** * __dev_mc_unsync - Remove synchronized addresses from device * @dev: device to sync * @unsync: function to call if address should be removed * * Remove all addresses that were added to the device by dev_mc_sync(). */ static inline void __dev_mc_unsync(struct net_device *dev, int (*unsync)(struct net_device *, const unsigned char *)) { __hw_addr_unsync_dev(&dev->mc, dev, unsync); } /* Functions used for secondary unicast and multicast support */ void dev_set_rx_mode(struct net_device *dev); int netif_set_promiscuity(struct net_device *dev, int inc); int dev_set_promiscuity(struct net_device *dev, int inc); int netif_set_allmulti(struct net_device *dev, int inc, bool notify); int dev_set_allmulti(struct net_device *dev, int inc); void netif_state_change(struct net_device *dev); void netdev_state_change(struct net_device *dev); void __netdev_notify_peers(struct net_device *dev); void netdev_notify_peers(struct net_device *dev); void netdev_features_change(struct net_device *dev); /* Load a device via the kmod */ void dev_load(struct net *net, const char *name); struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, struct rtnl_link_stats64 *storage); void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, const struct net_device_stats *netdev_stats); void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s, const struct pcpu_sw_netstats __percpu *netstats); void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s); enum { NESTED_SYNC_IMM_BIT, NESTED_SYNC_TODO_BIT, }; #define __NESTED_SYNC_BIT(bit) ((u32)1 << (bit)) #define __NESTED_SYNC(name) __NESTED_SYNC_BIT(NESTED_SYNC_ ## name ## _BIT) #define NESTED_SYNC_IMM __NESTED_SYNC(IMM) #define NESTED_SYNC_TODO __NESTED_SYNC(TODO) struct netdev_nested_priv { unsigned char flags; void *data; }; bool netdev_has_upper_dev(struct net_device *dev, struct net_device *upper_dev); struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, struct list_head **iter); /* iterate through upper list, must be called under RCU read lock */ #define netdev_for_each_upper_dev_rcu(dev, updev, iter) \ for (iter = &(dev)->adj_list.upper, \ updev = netdev_upper_get_next_dev_rcu(dev, &(iter)); \ updev; \ updev = netdev_upper_get_next_dev_rcu(dev, &(iter))) int netdev_walk_all_upper_dev_rcu(struct net_device *dev, int (*fn)(struct net_device *upper_dev, struct netdev_nested_priv *priv), struct netdev_nested_priv *priv); bool netdev_has_upper_dev_all_rcu(struct net_device *dev, struct net_device *upper_dev); bool netdev_has_any_upper_dev(struct net_device *dev); void *netdev_lower_get_next_private(struct net_device *dev, struct list_head **iter); void *netdev_lower_get_next_private_rcu(struct net_device *dev, struct list_head **iter); #define netdev_for_each_lower_private(dev, priv, iter) \ for (iter = (dev)->adj_list.lower.next, \ priv = netdev_lower_get_next_private(dev, &(iter)); \ priv; \ priv = netdev_lower_get_next_private(dev, &(iter))) #define netdev_for_each_lower_private_rcu(dev, priv, iter) \ for (iter = &(dev)->adj_list.lower, \ priv = netdev_lower_get_next_private_rcu(dev, &(iter)); \ priv; \ priv = netdev_lower_get_next_private_rcu(dev, &(iter))) void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter); #define netdev_for_each_lower_dev(dev, ldev, iter) \ for (iter = (dev)->adj_list.lower.next, \ ldev = netdev_lower_get_next(dev, &(iter)); \ ldev; \ ldev = netdev_lower_get_next(dev, &(iter))) struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, struct list_head **iter); int netdev_walk_all_lower_dev(struct net_device *dev, int (*fn)(struct net_device *lower_dev, struct netdev_nested_priv *priv), struct netdev_nested_priv *priv); int netdev_walk_all_lower_dev_rcu(struct net_device *dev, int (*fn)(struct net_device *lower_dev, struct netdev_nested_priv *priv), struct netdev_nested_priv *priv); void *netdev_adjacent_get_private(struct list_head *adj_list); void *netdev_lower_get_first_private_rcu(struct net_device *dev); struct net_device *netdev_master_upper_dev_get(struct net_device *dev); struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev); int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, struct netlink_ext_ack *extack); int netdev_master_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, void *upper_priv, void *upper_info, struct netlink_ext_ack *extack); void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev); int netdev_adjacent_change_prepare(struct net_device *old_dev, struct net_device *new_dev, struct net_device *dev, struct netlink_ext_ack *extack); void netdev_adjacent_change_commit(struct net_device *old_dev, struct net_device *new_dev, struct net_device *dev); void netdev_adjacent_change_abort(struct net_device *old_dev, struct net_device *new_dev, struct net_device *dev); void netdev_adjacent_rename_links(struct net_device *dev, char *oldname); void *netdev_lower_dev_get_private(struct net_device *dev, struct net_device *lower_dev); void netdev_lower_state_changed(struct net_device *lower_dev, void *lower_state_info); /* RSS keys are 40 or 52 bytes long */ #define NETDEV_RSS_KEY_LEN 52 extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly; void netdev_rss_key_fill(void *buffer, size_t len); int skb_checksum_help(struct sk_buff *skb); int skb_crc32c_csum_help(struct sk_buff *skb); int skb_csum_hwoffload_help(struct sk_buff *skb, const netdev_features_t features); struct netdev_bonding_info { ifslave slave; ifbond master; }; struct netdev_notifier_bonding_info { struct netdev_notifier_info info; /* must be first */ struct netdev_bonding_info bonding_info; }; void netdev_bonding_info_change(struct net_device *dev, struct netdev_bonding_info *bonding_info); #if IS_ENABLED(CONFIG_ETHTOOL_NETLINK) void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data); #else static inline void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data) { } #endif __be16 skb_network_protocol(struct sk_buff *skb, int *depth); static inline bool can_checksum_protocol(netdev_features_t features, __be16 protocol) { if (protocol == htons(ETH_P_FCOE)) return !!(features & NETIF_F_FCOE_CRC); /* Assume this is an IP checksum (not SCTP CRC) */ if (features & NETIF_F_HW_CSUM) { /* Can checksum everything */ return true; } switch (protocol) { case htons(ETH_P_IP): return !!(features & NETIF_F_IP_CSUM); case htons(ETH_P_IPV6): return !!(features & NETIF_F_IPV6_CSUM); default: return false; } } #ifdef CONFIG_BUG void netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb); #else static inline void netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb) { } #endif /* rx skb timestamps */ void net_enable_timestamp(void); void net_disable_timestamp(void); static inline ktime_t netdev_get_tstamp(struct net_device *dev, const struct skb_shared_hwtstamps *hwtstamps, bool cycles) { const struct net_device_ops *ops = dev->netdev_ops; if (ops->ndo_get_tstamp) return ops->ndo_get_tstamp(dev, hwtstamps, cycles); return hwtstamps->hwtstamp; } #ifndef CONFIG_PREEMPT_RT static inline void netdev_xmit_set_more(bool more) { __this_cpu_write(softnet_data.xmit.more, more); } static inline bool netdev_xmit_more(void) { return __this_cpu_read(softnet_data.xmit.more); } #else static inline void netdev_xmit_set_more(bool more) { current->net_xmit.more = more; } static inline bool netdev_xmit_more(void) { return current->net_xmit.more; } #endif static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops, struct sk_buff *skb, struct net_device *dev, bool more) { netdev_xmit_set_more(more); return ops->ndo_start_xmit(skb, dev); } static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, bool more) { const struct net_device_ops *ops = dev->netdev_ops; netdev_tx_t rc; rc = __netdev_start_xmit(ops, skb, dev, more); if (rc == NETDEV_TX_OK) txq_trans_update(dev, txq); return rc; } int netdev_class_create_file_ns(const struct class_attribute *class_attr, const void *ns); void netdev_class_remove_file_ns(const struct class_attribute *class_attr, const void *ns); extern const struct kobj_ns_type_operations net_ns_type_operations; const char *netdev_drivername(const struct net_device *dev); static inline netdev_features_t netdev_intersect_features(netdev_features_t f1, netdev_features_t f2) { if ((f1 ^ f2) & NETIF_F_HW_CSUM) { if (f1 & NETIF_F_HW_CSUM) f1 |= (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); else f2 |= (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); } return f1 & f2; } static inline netdev_features_t netdev_get_wanted_features( struct net_device *dev) { return (dev->features & ~dev->hw_features) | dev->wanted_features; } netdev_features_t netdev_increment_features(netdev_features_t all, netdev_features_t one, netdev_features_t mask); /* Allow TSO being used on stacked device : * Performing the GSO segmentation before last device * is a performance improvement. */ static inline netdev_features_t netdev_add_tso_features(netdev_features_t features, netdev_features_t mask) { return netdev_increment_features(features, NETIF_F_ALL_TSO, mask); } int __netdev_update_features(struct net_device *dev); void netdev_update_features(struct net_device *dev); void netdev_change_features(struct net_device *dev); void netif_stacked_transfer_operstate(const struct net_device *rootdev, struct net_device *dev); netdev_features_t passthru_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features); netdev_features_t netif_skb_features(struct sk_buff *skb); void skb_warn_bad_offload(const struct sk_buff *skb); static inline bool net_gso_ok(netdev_features_t features, int gso_type) { netdev_features_t feature = (netdev_features_t)gso_type << NETIF_F_GSO_SHIFT; /* check flags correspondence */ BUILD_BUG_ON(SKB_GSO_TCPV4 != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_DODGY != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCP_FIXEDID != (NETIF_F_TSO_MANGLEID >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCPV6 != (NETIF_F_TSO6 >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_FCOE != (NETIF_F_FSO >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_GRE != (NETIF_F_GSO_GRE >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_GRE_CSUM != (NETIF_F_GSO_GRE_CSUM >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_IPXIP4 != (NETIF_F_GSO_IPXIP4 >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_IPXIP6 != (NETIF_F_GSO_IPXIP6 >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL != (NETIF_F_GSO_UDP_TUNNEL >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_PARTIAL != (NETIF_F_GSO_PARTIAL >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_SCTP != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_ESP != (NETIF_F_GSO_ESP >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_GSO_UDP >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_L4 != (NETIF_F_GSO_UDP_L4 >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_FRAGLIST != (NETIF_F_GSO_FRAGLIST >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TCP_ACCECN != (NETIF_F_GSO_ACCECN >> NETIF_F_GSO_SHIFT)); return (features & feature) == feature; } static inline bool skb_gso_ok(struct sk_buff *skb, netdev_features_t features) { return net_gso_ok(features, skb_shinfo(skb)->gso_type) && (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST)); } static inline bool netif_needs_gso(struct sk_buff *skb, netdev_features_t features) { return skb_is_gso(skb) && (!skb_gso_ok(skb, features) || unlikely((skb->ip_summed != CHECKSUM_PARTIAL) && (skb->ip_summed != CHECKSUM_UNNECESSARY))); } void netif_set_tso_max_size(struct net_device *dev, unsigned int size); void netif_set_tso_max_segs(struct net_device *dev, unsigned int segs); void netif_inherit_tso_max(struct net_device *to, const struct net_device *from); static inline unsigned int netif_get_gro_max_size(const struct net_device *dev, const struct sk_buff *skb) { /* pairs with WRITE_ONCE() in netif_set_gro(_ipv4)_max_size() */ return skb->protocol == htons(ETH_P_IPV6) ? READ_ONCE(dev->gro_max_size) : READ_ONCE(dev->gro_ipv4_max_size); } static inline unsigned int netif_get_gso_max_size(const struct net_device *dev, const struct sk_buff *skb) { /* pairs with WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */ return skb->protocol == htons(ETH_P_IPV6) ? READ_ONCE(dev->gso_max_size) : READ_ONCE(dev->gso_ipv4_max_size); } static inline bool netif_is_macsec(const struct net_device *dev) { return dev->priv_flags & IFF_MACSEC; } static inline bool netif_is_macvlan(const struct net_device *dev) { return dev->priv_flags & IFF_MACVLAN; } static inline bool netif_is_macvlan_port(const struct net_device *dev) { return dev->priv_flags & IFF_MACVLAN_PORT; } static inline bool netif_is_bond_master(const struct net_device *dev) { return dev->flags & IFF_MASTER && dev->priv_flags & IFF_BONDING; } static inline bool netif_is_bond_slave(const struct net_device *dev) { return dev->flags & IFF_SLAVE && dev->priv_flags & IFF_BONDING; } static inline bool netif_supports_nofcs(struct net_device *dev) { return dev->priv_flags & IFF_SUPP_NOFCS; } static inline bool netif_has_l3_rx_handler(const struct net_device *dev) { return dev->priv_flags & IFF_L3MDEV_RX_HANDLER; } static inline bool netif_is_l3_master(const struct net_device *dev) { return dev->priv_flags & IFF_L3MDEV_MASTER; } static inline bool netif_is_l3_slave(const struct net_device *dev) { return dev->priv_flags & IFF_L3MDEV_SLAVE; } static inline int dev_sdif(const struct net_device *dev) { #ifdef CONFIG_NET_L3_MASTER_DEV if (netif_is_l3_slave(dev)) return dev->ifindex; #endif return 0; } static inline bool netif_is_bridge_master(const struct net_device *dev) { return dev->priv_flags & IFF_EBRIDGE; } static inline bool netif_is_bridge_port(const struct net_device *dev) { return dev->priv_flags & IFF_BRIDGE_PORT; } static inline bool netif_is_ovs_master(const struct net_device *dev) { return dev->priv_flags & IFF_OPENVSWITCH; } static inline bool netif_is_ovs_port(const struct net_device *dev) { return dev->priv_flags & IFF_OVS_DATAPATH; } static inline bool netif_is_any_bridge_master(const struct net_device *dev) { return netif_is_bridge_master(dev) || netif_is_ovs_master(dev); } static inline bool netif_is_any_bridge_port(const struct net_device *dev) { return netif_is_bridge_port(dev) || netif_is_ovs_port(dev); } static inline bool netif_is_team_master(const struct net_device *dev) { return dev->priv_flags & IFF_TEAM; } static inline bool netif_is_team_port(const struct net_device *dev) { return dev->priv_flags & IFF_TEAM_PORT; } static inline bool netif_is_lag_master(const struct net_device *dev) { return netif_is_bond_master(dev) || netif_is_team_master(dev); } static inline bool netif_is_lag_port(const struct net_device *dev) { return netif_is_bond_slave(dev) || netif_is_team_port(dev); } static inline bool netif_is_rxfh_configured(const struct net_device *dev) { return dev->priv_flags & IFF_RXFH_CONFIGURED; } static inline bool netif_is_failover(const struct net_device *dev) { return dev->priv_flags & IFF_FAILOVER; } static inline bool netif_is_failover_slave(const struct net_device *dev) { return dev->priv_flags & IFF_FAILOVER_SLAVE; } /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */ static inline void netif_keep_dst(struct net_device *dev) { dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM); } /* return true if dev can't cope with mtu frames that need vlan tag insertion */ static inline bool netif_reduces_vlan_mtu(struct net_device *dev) { /* TODO: reserve and use an additional IFF bit, if we get more users */ return netif_is_macsec(dev); } extern struct pernet_operations __net_initdata loopback_net_ops; /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* netdev_printk helpers, similar to dev_printk */ static inline const char *netdev_name(const struct net_device *dev) { if (!dev->name[0] || strchr(dev->name, '%')) return "(unnamed net_device)"; return dev->name; } static inline const char *netdev_reg_state(const struct net_device *dev) { u8 reg_state = READ_ONCE(dev->reg_state); switch (reg_state) { case NETREG_UNINITIALIZED: return " (uninitialized)"; case NETREG_REGISTERED: return ""; case NETREG_UNREGISTERING: return " (unregistering)"; case NETREG_UNREGISTERED: return " (unregistered)"; case NETREG_RELEASED: return " (released)"; case NETREG_DUMMY: return " (dummy)"; } WARN_ONCE(1, "%s: unknown reg_state %d\n", dev->name, reg_state); return " (unknown)"; } #define MODULE_ALIAS_NETDEV(device) \ MODULE_ALIAS("netdev-" device) /* * netdev_WARN() acts like dev_printk(), but with the key difference * of using a WARN/WARN_ON to get the message out, including the * file/line information and a backtrace. */ #define netdev_WARN(dev, format, args...) \ WARN(1, "netdevice: %s%s: " format, netdev_name(dev), \ netdev_reg_state(dev), ##args) #define netdev_WARN_ONCE(dev, format, args...) \ WARN_ONCE(1, "netdevice: %s%s: " format, netdev_name(dev), \ netdev_reg_state(dev), ##args) /* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. * * Why 16. Because with 16 the only overlap we get on a hash of the * low nibble of the protocol value is RARP/SNAP/X.25. * * 0800 IP * 0001 802.3 * 0002 AX.25 * 0004 802.2 * 8035 RARP * 0005 SNAP * 0805 X.25 * 0806 ARP * 8137 IPX * 0009 Localtalk * 86DD IPv6 */ #define PTYPE_HASH_SIZE (16) #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; extern struct net_device *blackhole_netdev; /* Note: Avoid these macros in fast path, prefer per-cpu or per-queue counters. */ #define DEV_STATS_INC(DEV, FIELD) atomic_long_inc(&(DEV)->stats.__##FIELD) #define DEV_STATS_ADD(DEV, FIELD, VAL) \ atomic_long_add((VAL), &(DEV)->stats.__##FIELD) #define DEV_STATS_READ(DEV, FIELD) atomic_long_read(&(DEV)->stats.__##FIELD) #endif /* _LINUX_NETDEVICE_H */ |
| 59 26 26 59 30 37 43 3 8 84 13 18 40 10 10 47 3 3 3 76 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * NET Generic infrastructure for Network protocols. * * Definitions for request_sock * * Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * From code originally in include/net/tcp.h */ #ifndef _REQUEST_SOCK_H #define _REQUEST_SOCK_H #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/types.h> #include <linux/bug.h> #include <linux/refcount.h> #include <net/sock.h> #include <net/rstreason.h> struct request_sock; struct sk_buff; struct dst_entry; struct proto; struct request_sock_ops { int family; unsigned int obj_size; struct kmem_cache *slab; char *slab_name; int (*rtx_syn_ack)(const struct sock *sk, struct request_sock *req); void (*send_ack)(const struct sock *sk, struct sk_buff *skb, struct request_sock *req); void (*send_reset)(const struct sock *sk, struct sk_buff *skb, enum sk_rst_reason reason); void (*destructor)(struct request_sock *req); void (*syn_ack_timeout)(const struct request_sock *req); }; int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req); struct saved_syn { u32 mac_hdrlen; u32 network_hdrlen; u32 tcp_hdrlen; u8 data[]; }; /* struct request_sock - mini sock to represent a connection request */ struct request_sock { struct sock_common __req_common; #define rsk_refcnt __req_common.skc_refcnt #define rsk_hash __req_common.skc_hash #define rsk_listener __req_common.skc_listener #define rsk_window_clamp __req_common.skc_window_clamp #define rsk_rcv_wnd __req_common.skc_rcv_wnd struct request_sock *dl_next; u16 mss; u8 num_retrans; /* number of retransmits */ u8 syncookie:1; /* True if * 1) tcpopts needs to be encoded in * TS of SYN+ACK * 2) ACK is validated by BPF kfunc. */ u8 num_timeout:7; /* number of timeouts */ u32 ts_recent; struct timer_list rsk_timer; const struct request_sock_ops *rsk_ops; struct sock *sk; struct saved_syn *saved_syn; u32 secid; u32 peer_secid; u32 timeout; }; static inline struct request_sock *inet_reqsk(const struct sock *sk) { return (struct request_sock *)sk; } static inline struct sock *req_to_sk(struct request_sock *req) { return (struct sock *)req; } /** * skb_steal_sock - steal a socket from an sk_buff * @skb: sk_buff to steal the socket from * @refcounted: is set to true if the socket is reference-counted * @prefetched: is set to true if the socket was assigned from bpf */ static inline struct sock *skb_steal_sock(struct sk_buff *skb, bool *refcounted, bool *prefetched) { struct sock *sk = skb->sk; if (!sk) { *prefetched = false; *refcounted = false; return NULL; } *prefetched = skb_sk_is_prefetched(skb); if (*prefetched) { #if IS_ENABLED(CONFIG_SYN_COOKIES) if (sk->sk_state == TCP_NEW_SYN_RECV && inet_reqsk(sk)->syncookie) { struct request_sock *req = inet_reqsk(sk); *refcounted = false; sk = req->rsk_listener; req->rsk_listener = NULL; return sk; } #endif *refcounted = sk_is_refcounted(sk); } else { *refcounted = true; } skb->destructor = NULL; skb->sk = NULL; return sk; } static inline void __reqsk_free(struct request_sock *req) { req->rsk_ops->destructor(req); if (req->rsk_listener) sock_put(req->rsk_listener); kfree(req->saved_syn); kmem_cache_free(req->rsk_ops->slab, req); } static inline void reqsk_free(struct request_sock *req) { DEBUG_NET_WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0); __reqsk_free(req); } static inline void reqsk_put(struct request_sock *req) { if (refcount_dec_and_test(&req->rsk_refcnt)) __reqsk_free(req); } /* * For a TCP Fast Open listener - * lock - protects the access to all the reqsk, which is co-owned by * the listener and the child socket. * qlen - pending TFO requests (still in TCP_SYN_RECV). * max_qlen - max TFO reqs allowed before TFO is disabled. * * XXX (TFO) - ideally these fields can be made as part of "listen_sock" * structure above. But there is some implementation difficulty due to * listen_sock being part of request_sock_queue hence will be freed when * a listener is stopped. But TFO related fields may continue to be * accessed even after a listener is closed, until its sk_refcnt drops * to 0 implying no more outstanding TFO reqs. One solution is to keep * listen_opt around until sk_refcnt drops to 0. But there is some other * complexity that needs to be resolved. E.g., a listener can be disabled * temporarily through shutdown()->tcp_disconnect(), and re-enabled later. */ struct fastopen_queue { struct request_sock *rskq_rst_head; /* Keep track of past TFO */ struct request_sock *rskq_rst_tail; /* requests that caused RST. * This is part of the defense * against spoofing attack. */ spinlock_t lock; int qlen; /* # of pending (TCP_SYN_RECV) reqs */ int max_qlen; /* != 0 iff TFO is currently enabled */ struct tcp_fastopen_context __rcu *ctx; /* cipher context for cookie */ }; /** struct request_sock_queue - queue of request_socks * * @rskq_accept_head - FIFO head of established children * @rskq_accept_tail - FIFO tail of established children * @rskq_defer_accept - User waits for some data after accept() * */ struct request_sock_queue { spinlock_t rskq_lock; u8 rskq_defer_accept; u32 synflood_warned; atomic_t qlen; atomic_t young; struct request_sock *rskq_accept_head; struct request_sock *rskq_accept_tail; struct fastopen_queue fastopenq; /* Check max_qlen != 0 to determine * if TFO is enabled. */ }; void reqsk_queue_alloc(struct request_sock_queue *queue); void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, bool reset); static inline bool reqsk_queue_empty(const struct request_sock_queue *queue) { return READ_ONCE(queue->rskq_accept_head) == NULL; } static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue, struct sock *parent) { struct request_sock *req; spin_lock_bh(&queue->rskq_lock); req = queue->rskq_accept_head; if (req) { sk_acceptq_removed(parent); WRITE_ONCE(queue->rskq_accept_head, req->dl_next); if (queue->rskq_accept_head == NULL) queue->rskq_accept_tail = NULL; } spin_unlock_bh(&queue->rskq_lock); return req; } static inline void reqsk_queue_removed(struct request_sock_queue *queue, const struct request_sock *req) { if (req->num_timeout == 0) atomic_dec(&queue->young); atomic_dec(&queue->qlen); } static inline void reqsk_queue_added(struct request_sock_queue *queue) { atomic_inc(&queue->young); atomic_inc(&queue->qlen); } static inline int reqsk_queue_len(const struct request_sock_queue *queue) { return atomic_read(&queue->qlen); } static inline int reqsk_queue_len_young(const struct request_sock_queue *queue) { return atomic_read(&queue->young); } /* RFC 7323 2.3 Using the Window Scale Option * The window field (SEG.WND) of every outgoing segment, with the * exception of <SYN> segments, MUST be right-shifted by * Rcv.Wind.Shift bits. * * This means the SEG.WND carried in SYNACK can not exceed 65535. * We use this property to harden TCP stack while in NEW_SYN_RECV state. */ static inline u32 tcp_synack_window(const struct request_sock *req) { return min(req->rsk_rcv_wnd, 65535U); } #endif /* _REQUEST_SOCK_H */ |
| 7 5 6 46 1 1 1 1 1 2 1 2 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* General filesystem caching interface * * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * NOTE!!! See: * * Documentation/filesystems/caching/netfs-api.rst * * for a description of the network filesystem interface declared here. */ #ifndef _LINUX_FSCACHE_H #define _LINUX_FSCACHE_H #include <linux/fs.h> #include <linux/netfs.h> #include <linux/writeback.h> #if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) #define __fscache_available (1) #define fscache_available() (1) #define fscache_volume_valid(volume) (volume) #define fscache_cookie_valid(cookie) (cookie) #define fscache_resources_valid(cres) ((cres)->cache_priv) #define fscache_cookie_enabled(cookie) (cookie && !test_bit(FSCACHE_COOKIE_DISABLED, &cookie->flags)) #else #define __fscache_available (0) #define fscache_available() (0) #define fscache_volume_valid(volume) (0) #define fscache_cookie_valid(cookie) (0) #define fscache_resources_valid(cres) (false) #define fscache_cookie_enabled(cookie) (0) #endif struct fscache_cookie; #define FSCACHE_ADV_SINGLE_CHUNK 0x01 /* The object is a single chunk of data */ #define FSCACHE_ADV_WRITE_CACHE 0x00 /* Do cache if written to locally */ #define FSCACHE_ADV_WRITE_NOCACHE 0x02 /* Don't cache if written to locally */ #define FSCACHE_ADV_WANT_CACHE_SIZE 0x04 /* Retrieve cache size at runtime */ #define FSCACHE_INVAL_DIO_WRITE 0x01 /* Invalidate due to DIO write */ enum fscache_want_state { FSCACHE_WANT_PARAMS, FSCACHE_WANT_WRITE, FSCACHE_WANT_READ, }; /* * Data object state. */ enum fscache_cookie_state { FSCACHE_COOKIE_STATE_QUIESCENT, /* The cookie is uncached */ FSCACHE_COOKIE_STATE_LOOKING_UP, /* The cache object is being looked up */ FSCACHE_COOKIE_STATE_CREATING, /* The cache object is being created */ FSCACHE_COOKIE_STATE_ACTIVE, /* The cache is active, readable and writable */ FSCACHE_COOKIE_STATE_INVALIDATING, /* The cache is being invalidated */ FSCACHE_COOKIE_STATE_FAILED, /* The cache failed, withdraw to clear */ FSCACHE_COOKIE_STATE_LRU_DISCARDING, /* The cookie is being discarded by the LRU */ FSCACHE_COOKIE_STATE_WITHDRAWING, /* The cookie is being withdrawn */ FSCACHE_COOKIE_STATE_RELINQUISHING, /* The cookie is being relinquished */ FSCACHE_COOKIE_STATE_DROPPED, /* The cookie has been dropped */ #define FSCACHE_COOKIE_STATE__NR (FSCACHE_COOKIE_STATE_DROPPED + 1) } __attribute__((mode(byte))); /* * Volume representation cookie. */ struct fscache_volume { refcount_t ref; atomic_t n_cookies; /* Number of data cookies in volume */ atomic_t n_accesses; /* Number of cache accesses in progress */ unsigned int debug_id; unsigned int key_hash; /* Hash of key string */ u8 *key; /* Volume ID, eg. "afs@example.com@1234" */ struct list_head proc_link; /* Link in /proc/fs/fscache/volumes */ struct hlist_bl_node hash_link; /* Link in hash table */ struct work_struct work; struct fscache_cache *cache; /* The cache in which this resides */ void *cache_priv; /* Cache private data */ spinlock_t lock; unsigned long flags; #define FSCACHE_VOLUME_RELINQUISHED 0 /* Volume is being cleaned up */ #define FSCACHE_VOLUME_INVALIDATE 1 /* Volume was invalidated */ #define FSCACHE_VOLUME_COLLIDED_WITH 2 /* Volume was collided with */ #define FSCACHE_VOLUME_ACQUIRE_PENDING 3 /* Volume is waiting to complete acquisition */ #define FSCACHE_VOLUME_CREATING 4 /* Volume is being created on disk */ u8 coherency_len; /* Length of the coherency data */ u8 coherency[]; /* Coherency data */ }; /* * Data file representation cookie. * - a file will only appear in one cache * - a request to cache a file may or may not be honoured, subject to * constraints such as disk space * - indices are created on disk just-in-time */ struct fscache_cookie { refcount_t ref; atomic_t n_active; /* number of active users of cookie */ atomic_t n_accesses; /* Number of cache accesses in progress */ unsigned int debug_id; unsigned int inval_counter; /* Number of invalidations made */ spinlock_t lock; struct fscache_volume *volume; /* Parent volume of this file. */ void *cache_priv; /* Cache-side representation */ struct hlist_bl_node hash_link; /* Link in hash table */ struct list_head proc_link; /* Link in proc list */ struct list_head commit_link; /* Link in commit queue */ struct work_struct work; /* Commit/relinq/withdraw work */ loff_t object_size; /* Size of the netfs object */ unsigned long unused_at; /* Time at which unused (jiffies) */ unsigned long flags; #define FSCACHE_COOKIE_RELINQUISHED 0 /* T if cookie has been relinquished */ #define FSCACHE_COOKIE_RETIRED 1 /* T if this cookie has retired on relinq */ #define FSCACHE_COOKIE_IS_CACHING 2 /* T if this cookie is cached */ #define FSCACHE_COOKIE_NO_DATA_TO_READ 3 /* T if this cookie has nothing to read */ #define FSCACHE_COOKIE_NEEDS_UPDATE 4 /* T if attrs have been updated */ #define FSCACHE_COOKIE_HAS_BEEN_CACHED 5 /* T if cookie needs withdraw-on-relinq */ #define FSCACHE_COOKIE_DISABLED 6 /* T if cookie has been disabled */ #define FSCACHE_COOKIE_LOCAL_WRITE 7 /* T if cookie has been modified locally */ #define FSCACHE_COOKIE_NO_ACCESS_WAKE 8 /* T if no wake when n_accesses goes 0 */ #define FSCACHE_COOKIE_DO_RELINQUISH 9 /* T if this cookie needs relinquishment */ #define FSCACHE_COOKIE_DO_WITHDRAW 10 /* T if this cookie needs withdrawing */ #define FSCACHE_COOKIE_DO_LRU_DISCARD 11 /* T if this cookie needs LRU discard */ #define FSCACHE_COOKIE_DO_PREP_TO_WRITE 12 /* T if cookie needs write preparation */ #define FSCACHE_COOKIE_HAVE_DATA 13 /* T if this cookie has data stored */ #define FSCACHE_COOKIE_IS_HASHED 14 /* T if this cookie is hashed */ #define FSCACHE_COOKIE_DO_INVALIDATE 15 /* T if cookie needs invalidation */ enum fscache_cookie_state state; u8 advice; /* FSCACHE_ADV_* */ u8 key_len; /* Length of index key */ u8 aux_len; /* Length of auxiliary data */ u32 key_hash; /* Hash of volume, key, len */ union { void *key; /* Index key */ u8 inline_key[16]; /* - If the key is short enough */ }; union { void *aux; /* Auxiliary data */ u8 inline_aux[8]; /* - If the aux data is short enough */ }; }; /* * slow-path functions for when there is actually caching available, and the * netfs does actually have a valid token * - these are not to be called directly * - these are undefined symbols when FS-Cache is not configured and the * optimiser takes care of not using them */ extern struct fscache_volume *__fscache_acquire_volume(const char *, const char *, const void *, size_t); extern void __fscache_relinquish_volume(struct fscache_volume *, const void *, bool); extern struct fscache_cookie *__fscache_acquire_cookie( struct fscache_volume *, u8, const void *, size_t, const void *, size_t, loff_t); extern void __fscache_use_cookie(struct fscache_cookie *, bool); extern void __fscache_unuse_cookie(struct fscache_cookie *, const void *, const loff_t *); extern void __fscache_relinquish_cookie(struct fscache_cookie *, bool); extern void __fscache_resize_cookie(struct fscache_cookie *, loff_t); extern void __fscache_invalidate(struct fscache_cookie *, const void *, loff_t, unsigned int); extern int __fscache_begin_read_operation(struct netfs_cache_resources *, struct fscache_cookie *); extern int __fscache_begin_write_operation(struct netfs_cache_resources *, struct fscache_cookie *); void __fscache_write_to_cache(struct fscache_cookie *cookie, struct address_space *mapping, loff_t start, size_t len, loff_t i_size, netfs_io_terminated_t term_func, void *term_func_priv, bool using_pgpriv2, bool cond); extern void __fscache_clear_page_bits(struct address_space *, loff_t, size_t); /** * fscache_acquire_volume - Register a volume as desiring caching services * @volume_key: An identification string for the volume * @cache_name: The name of the cache to use (or NULL for the default) * @coherency_data: Piece of arbitrary coherency data to check (or NULL) * @coherency_len: The size of the coherency data * * Register a volume as desiring caching services if they're available. The * caller must provide an identifier for the volume and may also indicate which * cache it should be in. If a preexisting volume entry is found in the cache, * the coherency data must match otherwise the entry will be invalidated. * * Returns a cookie pointer on success, -ENOMEM if out of memory or -EBUSY if a * cache volume of that name is already acquired. Note that "NULL" is a valid * cookie pointer and can be returned if caching is refused. */ static inline struct fscache_volume *fscache_acquire_volume(const char *volume_key, const char *cache_name, const void *coherency_data, size_t coherency_len) { if (!fscache_available()) return NULL; return __fscache_acquire_volume(volume_key, cache_name, coherency_data, coherency_len); } /** * fscache_relinquish_volume - Cease caching a volume * @volume: The volume cookie * @coherency_data: Piece of arbitrary coherency data to set (or NULL) * @invalidate: True if the volume should be invalidated * * Indicate that a filesystem no longer desires caching services for a volume. * The caller must have relinquished all file cookies prior to calling this. * The stored coherency data is updated. */ static inline void fscache_relinquish_volume(struct fscache_volume *volume, const void *coherency_data, bool invalidate) { if (fscache_volume_valid(volume)) __fscache_relinquish_volume(volume, coherency_data, invalidate); } /** * fscache_acquire_cookie - Acquire a cookie to represent a cache object * @volume: The volume in which to locate/create this cookie * @advice: Advice flags (FSCACHE_COOKIE_ADV_*) * @index_key: The index key for this cookie * @index_key_len: Size of the index key * @aux_data: The auxiliary data for the cookie (may be NULL) * @aux_data_len: Size of the auxiliary data buffer * @object_size: The initial size of object * * Acquire a cookie to represent a data file within the given cache volume. * * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline struct fscache_cookie *fscache_acquire_cookie(struct fscache_volume *volume, u8 advice, const void *index_key, size_t index_key_len, const void *aux_data, size_t aux_data_len, loff_t object_size) { if (!fscache_volume_valid(volume)) return NULL; return __fscache_acquire_cookie(volume, advice, index_key, index_key_len, aux_data, aux_data_len, object_size); } /** * fscache_use_cookie - Request usage of cookie attached to an object * @cookie: The cookie representing the cache object * @will_modify: If cache is expected to be modified locally * * Request usage of the cookie attached to an object. The caller should tell * the cache if the object's contents are about to be modified locally and then * the cache can apply the policy that has been set to handle this case. */ static inline void fscache_use_cookie(struct fscache_cookie *cookie, bool will_modify) { if (fscache_cookie_valid(cookie)) __fscache_use_cookie(cookie, will_modify); } /** * fscache_unuse_cookie - Cease usage of cookie attached to an object * @cookie: The cookie representing the cache object * @aux_data: Updated auxiliary data (or NULL) * @object_size: Revised size of the object (or NULL) * * Cease usage of the cookie attached to an object. When the users count * reaches zero then the cookie relinquishment will be permitted to proceed. */ static inline void fscache_unuse_cookie(struct fscache_cookie *cookie, const void *aux_data, const loff_t *object_size) { if (fscache_cookie_valid(cookie)) __fscache_unuse_cookie(cookie, aux_data, object_size); } /** * fscache_relinquish_cookie - Return the cookie to the cache, maybe discarding * it * @cookie: The cookie being returned * @retire: True if the cache object the cookie represents is to be discarded * * This function returns a cookie to the cache, forcibly discarding the * associated cache object if retire is set to true. * * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline void fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire) { if (fscache_cookie_valid(cookie)) __fscache_relinquish_cookie(cookie, retire); } /* * Find the auxiliary data on a cookie. */ static inline void *fscache_get_aux(struct fscache_cookie *cookie) { if (cookie->aux_len <= sizeof(cookie->inline_aux)) return cookie->inline_aux; else return cookie->aux; } /* * Update the auxiliary data on a cookie. */ static inline void fscache_update_aux(struct fscache_cookie *cookie, const void *aux_data, const loff_t *object_size) { void *p = fscache_get_aux(cookie); if (aux_data && p) memcpy(p, aux_data, cookie->aux_len); if (object_size) cookie->object_size = *object_size; } #ifdef CONFIG_FSCACHE_STATS extern atomic_t fscache_n_updates; #endif static inline void __fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data, const loff_t *object_size) { #ifdef CONFIG_FSCACHE_STATS atomic_inc(&fscache_n_updates); #endif fscache_update_aux(cookie, aux_data, object_size); smp_wmb(); set_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &cookie->flags); } /** * fscache_update_cookie - Request that a cache object be updated * @cookie: The cookie representing the cache object * @aux_data: The updated auxiliary data for the cookie (may be NULL) * @object_size: The current size of the object (may be NULL) * * Request an update of the index data for the cache object associated with the * cookie. The auxiliary data on the cookie will be updated first if @aux_data * is set and the object size will be updated and the object possibly trimmed * if @object_size is set. * * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline void fscache_update_cookie(struct fscache_cookie *cookie, const void *aux_data, const loff_t *object_size) { if (fscache_cookie_enabled(cookie)) __fscache_update_cookie(cookie, aux_data, object_size); } /** * fscache_resize_cookie - Request that a cache object be resized * @cookie: The cookie representing the cache object * @new_size: The new size of the object (may be NULL) * * Request that the size of an object be changed. * * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline void fscache_resize_cookie(struct fscache_cookie *cookie, loff_t new_size) { if (fscache_cookie_enabled(cookie)) __fscache_resize_cookie(cookie, new_size); } /** * fscache_invalidate - Notify cache that an object needs invalidation * @cookie: The cookie representing the cache object * @aux_data: The updated auxiliary data for the cookie (may be NULL) * @size: The revised size of the object. * @flags: Invalidation flags (FSCACHE_INVAL_*) * * Notify the cache that an object is needs to be invalidated and that it * should abort any retrievals or stores it is doing on the cache. This * increments inval_counter on the cookie which can be used by the caller to * reconsider I/O requests as they complete. * * If @flags has FSCACHE_INVAL_DIO_WRITE set, this indicates that this is due * to a direct I/O write and will cause caching to be disabled on this cookie * until it is completely unused. * * See Documentation/filesystems/caching/netfs-api.rst for a complete * description. */ static inline void fscache_invalidate(struct fscache_cookie *cookie, const void *aux_data, loff_t size, unsigned int flags) { if (fscache_cookie_enabled(cookie)) __fscache_invalidate(cookie, aux_data, size, flags); } /** * fscache_operation_valid - Return true if operations resources are usable * @cres: The resources to check. * * Returns a pointer to the operations table if usable or NULL if not. */ static inline const struct netfs_cache_ops *fscache_operation_valid(const struct netfs_cache_resources *cres) { return fscache_resources_valid(cres) ? cres->ops : NULL; } /** * fscache_begin_read_operation - Begin a read operation for the netfs lib * @cres: The cache resources for the read being performed * @cookie: The cookie representing the cache object * * Begin a read operation on behalf of the netfs helper library. @cres * indicates the cache resources to which the operation state should be * attached; @cookie indicates the cache object that will be accessed. * * @cres->inval_counter is set from @cookie->inval_counter for comparison at * the end of the operation. This allows invalidation during the operation to * be detected by the caller. * * Returns: * * 0 - Success * * -ENOBUFS - No caching available * * Other error code from the cache, such as -ENOMEM. */ static inline int fscache_begin_read_operation(struct netfs_cache_resources *cres, struct fscache_cookie *cookie) { if (fscache_cookie_enabled(cookie)) return __fscache_begin_read_operation(cres, cookie); return -ENOBUFS; } /** * fscache_end_operation - End the read operation for the netfs lib * @cres: The cache resources for the read operation * * Clean up the resources at the end of the read request. */ static inline void fscache_end_operation(struct netfs_cache_resources *cres) { const struct netfs_cache_ops *ops = fscache_operation_valid(cres); if (ops) ops->end_operation(cres); } /** * fscache_read - Start a read from the cache. * @cres: The cache resources to use * @start_pos: The beginning file offset in the cache file * @iter: The buffer to fill - and also the length * @read_hole: How to handle a hole in the data. * @term_func: The function to call upon completion * @term_func_priv: The private data for @term_func * * Start a read from the cache. @cres indicates the cache object to read from * and must be obtained by a call to fscache_begin_operation() beforehand. * * The data is read into the iterator, @iter, and that also indicates the size * of the operation. @start_pos is the start position in the file, though if * @seek_data is set appropriately, the cache can use SEEK_DATA to find the * next piece of data, writing zeros for the hole into the iterator. * * Upon termination of the operation, @term_func will be called and supplied * with @term_func_priv plus the amount of data written, if successful, or the * error code otherwise. * * @read_hole indicates how a partially populated region in the cache should be * handled. It can be one of a number of settings: * * NETFS_READ_HOLE_IGNORE - Just try to read (may return a short read). * * NETFS_READ_HOLE_FAIL - Give ENODATA if we encounter a hole. */ static inline int fscache_read(struct netfs_cache_resources *cres, loff_t start_pos, struct iov_iter *iter, enum netfs_read_from_hole read_hole, netfs_io_terminated_t term_func, void *term_func_priv) { const struct netfs_cache_ops *ops = fscache_operation_valid(cres); return ops->read(cres, start_pos, iter, read_hole, term_func, term_func_priv); } /** * fscache_begin_write_operation - Begin a write operation for the netfs lib * @cres: The cache resources for the write being performed * @cookie: The cookie representing the cache object * * Begin a write operation on behalf of the netfs helper library. @cres * indicates the cache resources to which the operation state should be * attached; @cookie indicates the cache object that will be accessed. * * @cres->inval_counter is set from @cookie->inval_counter for comparison at * the end of the operation. This allows invalidation during the operation to * be detected by the caller. * * Returns: * * 0 - Success * * -ENOBUFS - No caching available * * Other error code from the cache, such as -ENOMEM. */ static inline int fscache_begin_write_operation(struct netfs_cache_resources *cres, struct fscache_cookie *cookie) { if (fscache_cookie_enabled(cookie)) return __fscache_begin_write_operation(cres, cookie); return -ENOBUFS; } /** * fscache_write - Start a write to the cache. * @cres: The cache resources to use * @start_pos: The beginning file offset in the cache file * @iter: The data to write - and also the length * @term_func: The function to call upon completion * @term_func_priv: The private data for @term_func * * Start a write to the cache. @cres indicates the cache object to write to and * must be obtained by a call to fscache_begin_operation() beforehand. * * The data to be written is obtained from the iterator, @iter, and that also * indicates the size of the operation. @start_pos is the start position in * the file. * * Upon termination of the operation, @term_func will be called and supplied * with @term_func_priv plus the amount of data written, if successful, or the * error code otherwise. */ static inline int fscache_write(struct netfs_cache_resources *cres, loff_t start_pos, struct iov_iter *iter, netfs_io_terminated_t term_func, void *term_func_priv) { const struct netfs_cache_ops *ops = fscache_operation_valid(cres); return ops->write(cres, start_pos, iter, term_func, term_func_priv); } /** * fscache_clear_page_bits - Clear the PG_fscache bits from a set of pages * @mapping: The netfs inode to use as the source * @start: The start position in @mapping * @len: The amount of data to unlock * @caching: If PG_fscache has been set * * Clear the PG_fscache flag from a sequence of pages and wake up anyone who's * waiting. */ static inline void fscache_clear_page_bits(struct address_space *mapping, loff_t start, size_t len, bool caching) { if (caching) __fscache_clear_page_bits(mapping, start, len); } /** * fscache_write_to_cache - Save a write to the cache and clear PG_fscache * @cookie: The cookie representing the cache object * @mapping: The netfs inode to use as the source * @start: The start position in @mapping * @len: The amount of data to write back * @i_size: The new size of the inode * @term_func: The function to call upon completion * @term_func_priv: The private data for @term_func * @using_pgpriv2: If we're using PG_private_2 to mark in-progress write * @caching: If we actually want to do the caching * * Helper function for a netfs to write dirty data from an inode into the cache * object that's backing it. * * @start and @len describe the range of the data. This does not need to be * page-aligned, but to satisfy DIO requirements, the cache may expand it up to * the page boundaries on either end. All the pages covering the range must be * marked with PG_fscache. * * If given, @term_func will be called upon completion and supplied with * @term_func_priv. Note that if @using_pgpriv2 is set, the PG_private_2 flags * will have been cleared by this point, so the netfs must retain its own pin * on the mapping. */ static inline void fscache_write_to_cache(struct fscache_cookie *cookie, struct address_space *mapping, loff_t start, size_t len, loff_t i_size, netfs_io_terminated_t term_func, void *term_func_priv, bool using_pgpriv2, bool caching) { if (caching) __fscache_write_to_cache(cookie, mapping, start, len, i_size, term_func, term_func_priv, using_pgpriv2, caching); else if (term_func) term_func(term_func_priv, -ENOBUFS); } /** * fscache_note_page_release - Note that a netfs page got released * @cookie: The cookie corresponding to the file * * Note that a page that has been copied to the cache has been released. This * means that future reads will need to look in the cache to see if it's there. */ static inline void fscache_note_page_release(struct fscache_cookie *cookie) { /* If we've written data to the cache (HAVE_DATA) and there wasn't any * data in the cache when we started (NO_DATA_TO_READ), it may no * longer be true that we can skip reading from the cache - so clear * the flag that causes reads to be skipped. */ if (cookie && test_bit(FSCACHE_COOKIE_HAVE_DATA, &cookie->flags) && test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags)) clear_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags); } #endif /* _LINUX_FSCACHE_H */ |
| 4 5 3 6 4 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/namei.h> #include <linux/io_uring.h> #include <linux/splice.h> #include <uapi/linux/io_uring.h> #include "io_uring.h" #include "splice.h" struct io_splice { struct file *file_out; loff_t off_out; loff_t off_in; u64 len; int splice_fd_in; unsigned int flags; struct io_rsrc_node *rsrc_node; }; static int __io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_splice *sp = io_kiocb_to_cmd(req, struct io_splice); unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL; sp->len = READ_ONCE(sqe->len); sp->flags = READ_ONCE(sqe->splice_flags); if (unlikely(sp->flags & ~valid_flags)) return -EINVAL; sp->splice_fd_in = READ_ONCE(sqe->splice_fd_in); sp->rsrc_node = NULL; req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_tee_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { if (READ_ONCE(sqe->splice_off_in) || READ_ONCE(sqe->off)) return -EINVAL; return __io_splice_prep(req, sqe); } void io_splice_cleanup(struct io_kiocb *req) { struct io_splice *sp = io_kiocb_to_cmd(req, struct io_splice); if (sp->rsrc_node) io_put_rsrc_node(req->ctx, sp->rsrc_node); } static struct file *io_splice_get_file(struct io_kiocb *req, unsigned int issue_flags) { struct io_splice *sp = io_kiocb_to_cmd(req, struct io_splice); struct io_ring_ctx *ctx = req->ctx; struct io_rsrc_node *node; struct file *file = NULL; if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) return io_file_get_normal(req, sp->splice_fd_in); io_ring_submit_lock(ctx, issue_flags); node = io_rsrc_node_lookup(&ctx->file_table.data, sp->splice_fd_in); if (node) { node->refs++; sp->rsrc_node = node; file = io_slot_file(node); req->flags |= REQ_F_NEED_CLEANUP; } io_ring_submit_unlock(ctx, issue_flags); return file; } int io_tee(struct io_kiocb *req, unsigned int issue_flags) { struct io_splice *sp = io_kiocb_to_cmd(req, struct io_splice); struct file *out = sp->file_out; unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; struct file *in; ssize_t ret = 0; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); in = io_splice_get_file(req, issue_flags); if (!in) { ret = -EBADF; goto done; } if (sp->len) ret = do_tee(in, out, sp->len, flags); if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) fput(in); done: if (ret != sp->len) req_set_fail(req); io_req_set_res(req, ret, 0); return IOU_COMPLETE; } int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_splice *sp = io_kiocb_to_cmd(req, struct io_splice); sp->off_in = READ_ONCE(sqe->splice_off_in); sp->off_out = READ_ONCE(sqe->off); return __io_splice_prep(req, sqe); } int io_splice(struct io_kiocb *req, unsigned int issue_flags) { struct io_splice *sp = io_kiocb_to_cmd(req, struct io_splice); struct file *out = sp->file_out; unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; loff_t *poff_in, *poff_out; struct file *in; ssize_t ret = 0; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); in = io_splice_get_file(req, issue_flags); if (!in) { ret = -EBADF; goto done; } poff_in = (sp->off_in == -1) ? NULL : &sp->off_in; poff_out = (sp->off_out == -1) ? NULL : &sp->off_out; if (sp->len) ret = do_splice(in, poff_in, out, poff_out, sp->len, flags); if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) fput(in); done: if (ret != sp->len) req_set_fail(req); io_req_set_res(req, ret, 0); return IOU_COMPLETE; } |
| 565 564 540 537 468 11 163 679 163 163 161 80 111 464 111 111 520 524 76 492 30 30 476 79 78 54 26 26 23 9 37 1 35 6 32 504 501 498 503 501 503 503 21 13 21 11 7 5 1 38 25 24 18 38 23 15 3 1 23 23 21 23 23 40 23 11 23 23 15 1 14 1 8 13 4 1 2 3 1 11 8 4 2 4 15 9 8 2 7 7 1 7 7 626 11 7 8 8 8 8 2 2 2 2 2 2 149 123 161 161 161 161 110 96 138 47 47 19 47 47 49 19 19 252 252 252 156 157 156 145 32 32 30 31 4 32 157 42 42 289 43 43 281 280 280 2 165 165 165 164 163 13 4 163 7 1 165 165 165 165 8 164 165 165 23 165 165 164 164 165 10 164 161 10 166 10 2 2 2 1 2 1 1 306 306 154 154 132 154 153 153 154 269 281 2 2 297 281 281 2 279 12 12 1 281 226 281 280 264 253 253 251 65 250 12 264 269 268 252 268 269 268 15 278 223 29 29 2 29 28 28 27 28 1 27 294 27 482 11 1 10 1 9 480 7 1 6 1 5 487 2 482 483 21 1 465 252 252 462 350 483 434 485 479 14 6 8 474 473 310 15 2 13 12 295 486 194 191 194 203 204 203 204 206 54 50 1 50 2 48 38 2 38 3 3 14 13 6 4 1 3 2 1 7 11 2 10 2 8 2 1 1 6 1 5 3 3 1 2 2 16 16 13 14 14 14 163 1 507 501 56 34 3 34 5 22 22 463 497 502 459 332 1 500 235 1 235 234 165 163 163 1 2 1 1 1 86 27 58 6 54 38 487 2 488 21 21 466 30 450 465 485 206 296 2 502 501 2 1 527 529 527 1 507 507 502 10 10 10 10 10 10 10 6 2 6 4 6 10 10 12 9 12 10 10 10 10 10 10 10 10 10 3 3 3 10 10 10 11 12 12 12 12 12 11 11 11 11 11 12 8 6 7 1 1 1 1 1 1 1 2 2 1 1 1 7 5 7 8 1 7 34 28 14 3 13 12 24 9 6 4 33 30 30 6 24 26 25 22 18 8 5 2 2 1 24 2 22 22 20 34 7 6 7 35 36 34 34 34 34 10 13 13 13 11 3 10 13 10 1 10 7 13 12 13 11 11 2 2 1 9 5 13 13 11 1 13 15 15 14 13 13 13 13 2 80 80 14 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 | // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_api.c Packet scheduler API. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * Fixes: * * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired. * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support */ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/kmod.h> #include <linux/list.h> #include <linux/hrtimer.h> #include <linux/slab.h> #include <linux/hashtable.h> #include <linux/bpf.h> #include <net/netdev_lock.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <net/tc_wrapper.h> #include <trace/events/qdisc.h> /* Short review. ------------- This file consists of two interrelated parts: 1. queueing disciplines manager frontend. 2. traffic classes manager frontend. Generally, queueing discipline ("qdisc") is a black box, which is able to enqueue packets and to dequeue them (when device is ready to send something) in order and at times determined by algorithm hidden in it. qdisc's are divided to two categories: - "queues", which have no internal structure visible from outside. - "schedulers", which split all the packets to "traffic classes", using "packet classifiers" (look at cls_api.c) In turn, classes may have child qdiscs (as rule, queues) attached to them etc. etc. etc. The goal of the routines in this file is to translate information supplied by user in the form of handles to more intelligible for kernel form, to make some sanity checks and part of work, which is common to all qdiscs and to provide rtnetlink notifications. All real intelligent work is done inside qdisc modules. Every discipline has two major routines: enqueue and dequeue. ---dequeue dequeue usually returns a skb to send. It is allowed to return NULL, but it does not mean that queue is empty, it just means that discipline does not want to send anything this time. Queue is really empty if q->q.qlen == 0. For complicated disciplines with multiple queues q->q is not real packet queue, but however q->q.qlen must be valid. ---enqueue enqueue returns 0, if packet was enqueued successfully. If packet (this one or another one) was dropped, it returns not zero error code. NET_XMIT_DROP - this packet dropped Expected action: do not backoff, but wait until queue will clear. NET_XMIT_CN - probably this packet enqueued, but another one dropped. Expected action: backoff or ignore Auxiliary routines: ---peek like dequeue but without removing a packet from the queue ---reset returns qdisc to initial state: purge all buffers, clear all timers, counters (except for statistics) etc. ---init initializes newly created qdisc. ---destroy destroys resources allocated by init and during lifetime of qdisc. ---change changes qdisc parameters. */ /* Protects list of registered TC modules. It is pure SMP lock. */ static DEFINE_RWLOCK(qdisc_mod_lock); /************************************************ * Queueing disciplines manipulation. * ************************************************/ /* The list of all installed queueing disciplines. */ static struct Qdisc_ops *qdisc_base; /* Register/unregister queueing discipline */ int register_qdisc(struct Qdisc_ops *qops) { struct Qdisc_ops *q, **qp; int rc = -EEXIST; write_lock(&qdisc_mod_lock); for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next) if (!strcmp(qops->id, q->id)) goto out; if (qops->enqueue == NULL) qops->enqueue = noop_qdisc_ops.enqueue; if (qops->peek == NULL) { if (qops->dequeue == NULL) qops->peek = noop_qdisc_ops.peek; else goto out_einval; } if (qops->dequeue == NULL) qops->dequeue = noop_qdisc_ops.dequeue; if (qops->cl_ops) { const struct Qdisc_class_ops *cops = qops->cl_ops; if (!(cops->find && cops->walk && cops->leaf)) goto out_einval; if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf)) goto out_einval; } qops->next = NULL; *qp = qops; rc = 0; out: write_unlock(&qdisc_mod_lock); return rc; out_einval: rc = -EINVAL; goto out; } EXPORT_SYMBOL(register_qdisc); void unregister_qdisc(struct Qdisc_ops *qops) { struct Qdisc_ops *q, **qp; int err = -ENOENT; write_lock(&qdisc_mod_lock); for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next) if (q == qops) break; if (q) { *qp = q->next; q->next = NULL; err = 0; } write_unlock(&qdisc_mod_lock); WARN(err, "unregister qdisc(%s) failed\n", qops->id); } EXPORT_SYMBOL(unregister_qdisc); /* Get default qdisc if not otherwise specified */ void qdisc_get_default(char *name, size_t len) { read_lock(&qdisc_mod_lock); strscpy(name, default_qdisc_ops->id, len); read_unlock(&qdisc_mod_lock); } static struct Qdisc_ops *qdisc_lookup_default(const char *name) { struct Qdisc_ops *q = NULL; for (q = qdisc_base; q; q = q->next) { if (!strcmp(name, q->id)) { if (!bpf_try_module_get(q, q->owner)) q = NULL; break; } } return q; } /* Set new default qdisc to use */ int qdisc_set_default(const char *name) { const struct Qdisc_ops *ops; if (!capable(CAP_NET_ADMIN)) return -EPERM; write_lock(&qdisc_mod_lock); ops = qdisc_lookup_default(name); if (!ops) { /* Not found, drop lock and try to load module */ write_unlock(&qdisc_mod_lock); request_module(NET_SCH_ALIAS_PREFIX "%s", name); write_lock(&qdisc_mod_lock); ops = qdisc_lookup_default(name); } if (ops) { /* Set new default */ bpf_module_put(default_qdisc_ops, default_qdisc_ops->owner); default_qdisc_ops = ops; } write_unlock(&qdisc_mod_lock); return ops ? 0 : -ENOENT; } #ifdef CONFIG_NET_SCH_DEFAULT /* Set default value from kernel config */ static int __init sch_default_qdisc(void) { return qdisc_set_default(CONFIG_DEFAULT_NET_SCH); } late_initcall(sch_default_qdisc); #endif /* We know handle. Find qdisc among all qdisc's attached to device * (root qdisc, all its children, children of children etc.) * Note: caller either uses rtnl or rcu_read_lock() */ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) { struct Qdisc *q; if (!qdisc_dev(root)) return (root->handle == handle ? root : NULL); if (!(root->flags & TCQ_F_BUILTIN) && root->handle == handle) return root; hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle, lockdep_rtnl_is_held()) { if (q->handle == handle) return q; } return NULL; } void qdisc_hash_add(struct Qdisc *q, bool invisible) { if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { ASSERT_RTNL(); hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle); if (invisible) q->flags |= TCQ_F_INVISIBLE; } } EXPORT_SYMBOL(qdisc_hash_add); void qdisc_hash_del(struct Qdisc *q) { if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { ASSERT_RTNL(); hash_del_rcu(&q->hash); } } EXPORT_SYMBOL(qdisc_hash_del); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { struct Qdisc *q; if (!handle) return NULL; q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle); if (q) goto out; if (dev_ingress_queue(dev)) q = qdisc_match_from_root( rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping), handle); out: return q; } struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle) { struct netdev_queue *nq; struct Qdisc *q; if (!handle) return NULL; q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle); if (q) goto out; nq = dev_ingress_queue_rcu(dev); if (nq) q = qdisc_match_from_root(rcu_dereference(nq->qdisc_sleeping), handle); out: return q; } static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid, struct netlink_ext_ack *extack) { unsigned long cl; const struct Qdisc_class_ops *cops = p->ops->cl_ops; if (cops == NULL) { NL_SET_ERR_MSG(extack, "Parent qdisc is not classful"); return ERR_PTR(-EOPNOTSUPP); } cl = cops->find(p, classid); if (cl == 0) { NL_SET_ERR_MSG(extack, "Specified class not found"); return ERR_PTR(-ENOENT); } return cops->leaf(p, cl); } /* Find queueing discipline by name */ static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind) { struct Qdisc_ops *q = NULL; if (kind) { read_lock(&qdisc_mod_lock); for (q = qdisc_base; q; q = q->next) { if (nla_strcmp(kind, q->id) == 0) { if (!bpf_try_module_get(q, q->owner)) q = NULL; break; } } read_unlock(&qdisc_mod_lock); } return q; } /* The linklayer setting were not transferred from iproute2, in older * versions, and the rate tables lookup systems have been dropped in * the kernel. To keep backward compatible with older iproute2 tc * utils, we detect the linklayer setting by detecting if the rate * table were modified. * * For linklayer ATM table entries, the rate table will be aligned to * 48 bytes, thus some table entries will contain the same value. The * mpu (min packet unit) is also encoded into the old rate table, thus * starting from the mpu, we find low and high table entries for * mapping this cell. If these entries contain the same value, when * the rate tables have been modified for linklayer ATM. * * This is done by rounding mpu to the nearest 48 bytes cell/entry, * and then roundup to the next cell, calc the table entry one below, * and compare. */ static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab) { int low = roundup(r->mpu, 48); int high = roundup(low+1, 48); int cell_low = low >> r->cell_log; int cell_high = (high >> r->cell_log) - 1; /* rtab is too inaccurate at rates > 100Mbit/s */ if ((r->rate > (100000000/8)) || (rtab[0] == 0)) { pr_debug("TC linklayer: Giving up ATM detection\n"); return TC_LINKLAYER_ETHERNET; } if ((cell_high > cell_low) && (cell_high < 256) && (rtab[cell_low] == rtab[cell_high])) { pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n", cell_low, cell_high, rtab[cell_high]); return TC_LINKLAYER_ATM; } return TC_LINKLAYER_ETHERNET; } static struct qdisc_rate_table *qdisc_rtab_list; struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab, struct netlink_ext_ack *extack) { struct qdisc_rate_table *rtab; if (tab == NULL || r->rate == 0 || r->cell_log == 0 || r->cell_log >= 32 || nla_len(tab) != TC_RTAB_SIZE) { NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching"); return NULL; } for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) { if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) && !memcmp(&rtab->data, nla_data(tab), 1024)) { rtab->refcnt++; return rtab; } } rtab = kmalloc(sizeof(*rtab), GFP_KERNEL); if (rtab) { rtab->rate = *r; rtab->refcnt = 1; memcpy(rtab->data, nla_data(tab), 1024); if (r->linklayer == TC_LINKLAYER_UNAWARE) r->linklayer = __detect_linklayer(r, rtab->data); rtab->next = qdisc_rtab_list; qdisc_rtab_list = rtab; } else { NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table"); } return rtab; } EXPORT_SYMBOL(qdisc_get_rtab); void qdisc_put_rtab(struct qdisc_rate_table *tab) { struct qdisc_rate_table *rtab, **rtabp; if (!tab || --tab->refcnt) return; for (rtabp = &qdisc_rtab_list; (rtab = *rtabp) != NULL; rtabp = &rtab->next) { if (rtab == tab) { *rtabp = rtab->next; kfree(rtab); return; } } } EXPORT_SYMBOL(qdisc_put_rtab); static LIST_HEAD(qdisc_stab_list); static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = { [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) }, [TCA_STAB_DATA] = { .type = NLA_BINARY }, }; static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_STAB_MAX + 1]; struct qdisc_size_table *stab; struct tc_sizespec *s; unsigned int tsize = 0; u16 *tab = NULL; int err; err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy, extack); if (err < 0) return ERR_PTR(err); if (!tb[TCA_STAB_BASE]) { NL_SET_ERR_MSG(extack, "Size table base attribute is missing"); return ERR_PTR(-EINVAL); } s = nla_data(tb[TCA_STAB_BASE]); if (s->tsize > 0) { if (!tb[TCA_STAB_DATA]) { NL_SET_ERR_MSG(extack, "Size table data attribute is missing"); return ERR_PTR(-EINVAL); } tab = nla_data(tb[TCA_STAB_DATA]); tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); } if (tsize != s->tsize || (!tab && tsize > 0)) { NL_SET_ERR_MSG(extack, "Invalid size of size table"); return ERR_PTR(-EINVAL); } list_for_each_entry(stab, &qdisc_stab_list, list) { if (memcmp(&stab->szopts, s, sizeof(*s))) continue; if (tsize > 0 && memcmp(stab->data, tab, flex_array_size(stab, data, tsize))) continue; stab->refcnt++; return stab; } if (s->size_log > STAB_SIZE_LOG_MAX || s->cell_log > STAB_SIZE_LOG_MAX) { NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table"); return ERR_PTR(-EINVAL); } stab = kmalloc(struct_size(stab, data, tsize), GFP_KERNEL); if (!stab) return ERR_PTR(-ENOMEM); stab->refcnt = 1; stab->szopts = *s; if (tsize > 0) memcpy(stab->data, tab, flex_array_size(stab, data, tsize)); list_add_tail(&stab->list, &qdisc_stab_list); return stab; } void qdisc_put_stab(struct qdisc_size_table *tab) { if (!tab) return; if (--tab->refcnt == 0) { list_del(&tab->list); kfree_rcu(tab, rcu); } } EXPORT_SYMBOL(qdisc_put_stab); static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) { struct nlattr *nest; nest = nla_nest_start_noflag(skb, TCA_STAB); if (nest == NULL) goto nla_put_failure; if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts)) goto nla_put_failure; nla_nest_end(skb, nest); return skb->len; nla_put_failure: return -1; } void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab) { int pkt_len, slot; pkt_len = skb->len + stab->szopts.overhead; if (unlikely(!stab->szopts.tsize)) goto out; slot = pkt_len + stab->szopts.cell_align; if (unlikely(slot < 0)) slot = 0; slot >>= stab->szopts.cell_log; if (likely(slot < stab->szopts.tsize)) pkt_len = stab->data[slot]; else pkt_len = stab->data[stab->szopts.tsize - 1] * (slot / stab->szopts.tsize) + stab->data[slot % stab->szopts.tsize]; pkt_len <<= stab->szopts.size_log; out: if (unlikely(pkt_len < 1)) pkt_len = 1; qdisc_skb_cb(skb)->pkt_len = pkt_len; } static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) { struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, timer); rcu_read_lock(); __netif_schedule(qdisc_root(wd->qdisc)); rcu_read_unlock(); return HRTIMER_NORESTART; } void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc, clockid_t clockid) { hrtimer_setup(&wd->timer, qdisc_watchdog, clockid, HRTIMER_MODE_ABS_PINNED); wd->qdisc = qdisc; } EXPORT_SYMBOL(qdisc_watchdog_init_clockid); void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc) { qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC); } EXPORT_SYMBOL(qdisc_watchdog_init); void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires, u64 delta_ns) { bool deactivated; rcu_read_lock(); deactivated = test_bit(__QDISC_STATE_DEACTIVATED, &qdisc_root_sleeping(wd->qdisc)->state); rcu_read_unlock(); if (deactivated) return; if (hrtimer_is_queued(&wd->timer)) { u64 softexpires; softexpires = ktime_to_ns(hrtimer_get_softexpires(&wd->timer)); /* If timer is already set in [expires, expires + delta_ns], * do not reprogram it. */ if (softexpires - expires <= delta_ns) return; } hrtimer_start_range_ns(&wd->timer, ns_to_ktime(expires), delta_ns, HRTIMER_MODE_ABS_PINNED); } EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns); void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) { hrtimer_cancel(&wd->timer); } EXPORT_SYMBOL(qdisc_watchdog_cancel); static struct hlist_head *qdisc_class_hash_alloc(unsigned int n) { struct hlist_head *h; unsigned int i; h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL); if (h != NULL) { for (i = 0; i < n; i++) INIT_HLIST_HEAD(&h[i]); } return h; } void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash) { struct Qdisc_class_common *cl; struct hlist_node *next; struct hlist_head *nhash, *ohash; unsigned int nsize, nmask, osize; unsigned int i, h; /* Rehash when load factor exceeds 0.75 */ if (clhash->hashelems * 4 <= clhash->hashsize * 3) return; nsize = clhash->hashsize * 2; nmask = nsize - 1; nhash = qdisc_class_hash_alloc(nsize); if (nhash == NULL) return; ohash = clhash->hash; osize = clhash->hashsize; sch_tree_lock(sch); for (i = 0; i < osize; i++) { hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) { h = qdisc_class_hash(cl->classid, nmask); hlist_add_head(&cl->hnode, &nhash[h]); } } clhash->hash = nhash; clhash->hashsize = nsize; clhash->hashmask = nmask; sch_tree_unlock(sch); kvfree(ohash); } EXPORT_SYMBOL(qdisc_class_hash_grow); int qdisc_class_hash_init(struct Qdisc_class_hash *clhash) { unsigned int size = 4; clhash->hash = qdisc_class_hash_alloc(size); if (!clhash->hash) return -ENOMEM; clhash->hashsize = size; clhash->hashmask = size - 1; clhash->hashelems = 0; return 0; } EXPORT_SYMBOL(qdisc_class_hash_init); void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash) { kvfree(clhash->hash); } EXPORT_SYMBOL(qdisc_class_hash_destroy); void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash, struct Qdisc_class_common *cl) { unsigned int h; INIT_HLIST_NODE(&cl->hnode); h = qdisc_class_hash(cl->classid, clhash->hashmask); hlist_add_head(&cl->hnode, &clhash->hash[h]); clhash->hashelems++; } EXPORT_SYMBOL(qdisc_class_hash_insert); void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash, struct Qdisc_class_common *cl) { hlist_del(&cl->hnode); clhash->hashelems--; } EXPORT_SYMBOL(qdisc_class_hash_remove); /* Allocate an unique handle from space managed by kernel * Possible range is [8000-FFFF]:0000 (0x8000 values) */ static u32 qdisc_alloc_handle(struct net_device *dev) { int i = 0x8000; static u32 autohandle = TC_H_MAKE(0x80000000U, 0); do { autohandle += TC_H_MAKE(0x10000U, 0); if (autohandle == TC_H_MAKE(TC_H_ROOT, 0)) autohandle = TC_H_MAKE(0x80000000U, 0); if (!qdisc_lookup(dev, autohandle)) return autohandle; cond_resched(); } while (--i > 0); return 0; } void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len) { const struct Qdisc_class_ops *cops; unsigned long cl; u32 parentid; bool notify; int drops; drops = max_t(int, n, 0); rcu_read_lock(); while ((parentid = sch->parent)) { if (parentid == TC_H_ROOT) break; if (sch->flags & TCQ_F_NOPARENT) break; /* Notify parent qdisc only if child qdisc becomes empty. */ notify = !sch->q.qlen; /* TODO: perform the search on a per txq basis */ sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { WARN_ON_ONCE(parentid != TC_H_ROOT); break; } cops = sch->ops->cl_ops; if (notify && cops->qlen_notify) { /* Note that qlen_notify must be idempotent as it may get called * multiple times. */ cl = cops->find(sch, parentid); cops->qlen_notify(sch, cl); } sch->q.qlen -= n; sch->qstats.backlog -= len; __qdisc_qstats_drop(sch, drops); } rcu_read_unlock(); } EXPORT_SYMBOL(qdisc_tree_reduce_backlog); int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type, void *type_data) { struct net_device *dev = qdisc_dev(sch); int err; sch->flags &= ~TCQ_F_OFFLOADED; if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return 0; err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data); if (err == -EOPNOTSUPP) return 0; if (!err) sch->flags |= TCQ_F_OFFLOADED; return err; } EXPORT_SYMBOL(qdisc_offload_dump_helper); void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch, struct Qdisc *new, struct Qdisc *old, enum tc_setup_type type, void *type_data, struct netlink_ext_ack *extack) { bool any_qdisc_is_offloaded; int err; if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) return; err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data); /* Don't report error if the graft is part of destroy operation. */ if (!err || !new || new == &noop_qdisc) return; /* Don't report error if the parent, the old child and the new * one are not offloaded. */ any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED; any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED; any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED; if (any_qdisc_is_offloaded) NL_SET_ERR_MSG(extack, "Offloading graft operation failed."); } EXPORT_SYMBOL(qdisc_offload_graft_helper); void qdisc_offload_query_caps(struct net_device *dev, enum tc_setup_type type, void *caps, size_t caps_len) { const struct net_device_ops *ops = dev->netdev_ops; struct tc_query_caps_base base = { .type = type, .caps = caps, }; memset(caps, 0, caps_len); if (ops->ndo_setup_tc) ops->ndo_setup_tc(dev, TC_QUERY_CAPS, &base); } EXPORT_SYMBOL(qdisc_offload_query_caps); static void qdisc_offload_graft_root(struct net_device *dev, struct Qdisc *new, struct Qdisc *old, struct netlink_ext_ack *extack) { struct tc_root_qopt_offload graft_offload = { .command = TC_ROOT_GRAFT, .handle = new ? new->handle : 0, .ingress = (new && new->flags & TCQ_F_INGRESS) || (old && old->flags & TCQ_F_INGRESS), }; qdisc_offload_graft_helper(dev, NULL, new, old, TC_SETUP_ROOT_QDISC, &graft_offload, extack); } static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, u32 portid, u32 seq, u16 flags, int event, struct netlink_ext_ack *extack) { struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL; struct gnet_stats_queue __percpu *cpu_qstats = NULL; struct tcmsg *tcm; struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); struct gnet_dump d; struct qdisc_size_table *stab; u32 block_index; __u32 qlen; cond_resched(); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; tcm = nlmsg_data(nlh); tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad2 = 0; tcm->tcm_ifindex = qdisc_dev(q)->ifindex; tcm->tcm_parent = clid; tcm->tcm_handle = q->handle; tcm->tcm_info = refcount_read(&q->refcnt); if (nla_put_string(skb, TCA_KIND, q->ops->id)) goto nla_put_failure; if (q->ops->ingress_block_get) { block_index = q->ops->ingress_block_get(q); if (block_index && nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index)) goto nla_put_failure; } if (q->ops->egress_block_get) { block_index = q->ops->egress_block_get(q); if (block_index && nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index)) goto nla_put_failure; } if (q->ops->dump && q->ops->dump(q, skb) < 0) goto nla_put_failure; if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED))) goto nla_put_failure; qlen = qdisc_qlen_sum(q); stab = rtnl_dereference(q->stab); if (stab && qdisc_dump_stab(skb, stab) < 0) goto nla_put_failure; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, NULL, &d, TCA_PAD) < 0) goto nla_put_failure; if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) goto nla_put_failure; if (qdisc_is_percpu_stats(q)) { cpu_bstats = q->cpu_bstats; cpu_qstats = q->cpu_qstats; } if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 || gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0) goto nla_put_failure; if (gnet_stats_finish_copy(&d) < 0) goto nla_put_failure; if (extack && extack->_msg && nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg)) goto out_nlmsg_trim; nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; out_nlmsg_trim: nla_put_failure: nlmsg_trim(skb, b); return -1; } static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible) { if (q->flags & TCQ_F_BUILTIN) return true; if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible) return true; return false; } static int qdisc_get_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, u32 clid, struct Qdisc *q, struct netlink_ext_ack *extack) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; if (!tc_qdisc_dump_ignore(q, false)) { if (tc_fill_qdisc(skb, q, clid, portid, n->nlmsg_seq, 0, RTM_NEWQDISC, extack) < 0) goto err_out; } if (skb->len) return rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); err_out: kfree_skb(skb); return -EINVAL; } static int qdisc_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, u32 clid, struct Qdisc *old, struct Qdisc *new, struct netlink_ext_ack *extack) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) return 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; if (old && !tc_qdisc_dump_ignore(old, false)) { if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq, 0, RTM_DELQDISC, extack) < 0) goto err_out; } if (new && !tc_qdisc_dump_ignore(new, false)) { if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC, extack) < 0) goto err_out; } if (skb->len) return rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); err_out: kfree_skb(skb); return -EINVAL; } static void notify_and_destroy(struct net *net, struct sk_buff *skb, struct nlmsghdr *n, u32 clid, struct Qdisc *old, struct Qdisc *new, struct netlink_ext_ack *extack) { if (new || old) qdisc_notify(net, skb, n, clid, old, new, extack); if (old) qdisc_put(old); } static void qdisc_clear_nolock(struct Qdisc *sch) { sch->flags &= ~TCQ_F_NOLOCK; if (!(sch->flags & TCQ_F_CPUSTATS)) return; free_percpu(sch->cpu_bstats); free_percpu(sch->cpu_qstats); sch->cpu_bstats = NULL; sch->cpu_qstats = NULL; sch->flags &= ~TCQ_F_CPUSTATS; } /* Graft qdisc "new" to class "classid" of qdisc "parent" or * to device "dev". * * When appropriate send a netlink notification using 'skb' * and "n". * * On success, destroy old qdisc. */ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, struct sk_buff *skb, struct nlmsghdr *n, u32 classid, struct Qdisc *new, struct Qdisc *old, struct netlink_ext_ack *extack) { struct Qdisc *q = old; struct net *net = dev_net(dev); if (parent == NULL) { unsigned int i, num_q, ingress; struct netdev_queue *dev_queue; ingress = 0; num_q = dev->num_tx_queues; if ((q && q->flags & TCQ_F_INGRESS) || (new && new->flags & TCQ_F_INGRESS)) { ingress = 1; dev_queue = dev_ingress_queue(dev); if (!dev_queue) { NL_SET_ERR_MSG(extack, "Device does not have an ingress queue"); return -ENOENT; } q = rtnl_dereference(dev_queue->qdisc_sleeping); /* This is the counterpart of that qdisc_refcount_inc_nz() call in * __tcf_qdisc_find() for filter requests. */ if (!qdisc_refcount_dec_if_one(q)) { NL_SET_ERR_MSG(extack, "Current ingress or clsact Qdisc has ongoing filter requests"); return -EBUSY; } } if (dev->flags & IFF_UP) dev_deactivate(dev); qdisc_offload_graft_root(dev, new, old, extack); if (new && new->ops->attach && !ingress) goto skip; if (!ingress) { for (i = 0; i < num_q; i++) { dev_queue = netdev_get_tx_queue(dev, i); old = dev_graft_qdisc(dev_queue, new); if (new && i > 0) qdisc_refcount_inc(new); qdisc_put(old); } } else { old = dev_graft_qdisc(dev_queue, NULL); /* {ingress,clsact}_destroy() @old before grafting @new to avoid * unprotected concurrent accesses to net_device::miniq_{in,e}gress * pointer(s) in mini_qdisc_pair_swap(). */ qdisc_notify(net, skb, n, classid, old, new, extack); qdisc_destroy(old); dev_graft_qdisc(dev_queue, new); } skip: if (!ingress) { old = rtnl_dereference(dev->qdisc); if (new && !new->ops->attach) qdisc_refcount_inc(new); rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc); notify_and_destroy(net, skb, n, classid, old, new, extack); if (new && new->ops->attach) new->ops->attach(new); } if (dev->flags & IFF_UP) dev_activate(dev); } else { const struct Qdisc_class_ops *cops = parent->ops->cl_ops; unsigned long cl; int err; /* Only support running class lockless if parent is lockless */ if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK)) qdisc_clear_nolock(new); if (!cops || !cops->graft) return -EOPNOTSUPP; cl = cops->find(parent, classid); if (!cl) { NL_SET_ERR_MSG(extack, "Specified class not found"); return -ENOENT; } if (new && new->ops == &noqueue_qdisc_ops) { NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class"); return -EINVAL; } if (new && !(parent->flags & TCQ_F_MQROOT) && rcu_access_pointer(new->stab)) { NL_SET_ERR_MSG(extack, "STAB not supported on a non root"); return -EINVAL; } err = cops->graft(parent, cl, new, &old, extack); if (err) return err; notify_and_destroy(net, skb, n, classid, old, new, extack); } return 0; } static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca, struct netlink_ext_ack *extack) { u32 block_index; if (tca[TCA_INGRESS_BLOCK]) { block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]); if (!block_index) { NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0"); return -EINVAL; } if (!sch->ops->ingress_block_set) { NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported"); return -EOPNOTSUPP; } sch->ops->ingress_block_set(sch, block_index); } if (tca[TCA_EGRESS_BLOCK]) { block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]); if (!block_index) { NL_SET_ERR_MSG(extack, "Egress block index cannot be 0"); return -EINVAL; } if (!sch->ops->egress_block_set) { NL_SET_ERR_MSG(extack, "Egress block sharing is not supported"); return -EOPNOTSUPP; } sch->ops->egress_block_set(sch, block_index); } return 0; } /* Allocate and initialize new qdisc. Parameters are passed via opt. */ static struct Qdisc *qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, u32 parent, u32 handle, struct nlattr **tca, int *errp, struct netlink_ext_ack *extack) { int err; struct nlattr *kind = tca[TCA_KIND]; struct Qdisc *sch; struct Qdisc_ops *ops; struct qdisc_size_table *stab; ops = qdisc_lookup_ops(kind); if (!ops) { err = -ENOENT; NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown"); goto err_out; } sch = qdisc_alloc(dev_queue, ops, extack); if (IS_ERR(sch)) { err = PTR_ERR(sch); goto err_out2; } sch->parent = parent; if (handle == TC_H_INGRESS) { if (!(sch->flags & TCQ_F_INGRESS)) { NL_SET_ERR_MSG(extack, "Specified parent ID is reserved for ingress and clsact Qdiscs"); err = -EINVAL; goto err_out3; } handle = TC_H_MAKE(TC_H_INGRESS, 0); } else { if (handle == 0) { handle = qdisc_alloc_handle(dev); if (handle == 0) { NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded"); err = -ENOSPC; goto err_out3; } } if (!netif_is_multiqueue(dev)) sch->flags |= TCQ_F_ONETXQUEUE; } sch->handle = handle; /* This exist to keep backward compatible with a userspace * loophole, what allowed userspace to get IFF_NO_QUEUE * facility on older kernels by setting tx_queue_len=0 (prior * to qdisc init), and then forgot to reinit tx_queue_len * before again attaching a qdisc. */ if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) { WRITE_ONCE(dev->tx_queue_len, DEFAULT_TX_QUEUE_LEN); netdev_info(dev, "Caught tx_queue_len zero misconfig\n"); } err = qdisc_block_indexes_set(sch, tca, extack); if (err) goto err_out3; if (tca[TCA_STAB]) { stab = qdisc_get_stab(tca[TCA_STAB], extack); if (IS_ERR(stab)) { err = PTR_ERR(stab); goto err_out3; } rcu_assign_pointer(sch->stab, stab); } if (ops->init) { err = ops->init(sch, tca[TCA_OPTIONS], extack); if (err != 0) goto err_out4; } if (tca[TCA_RATE]) { err = -EOPNOTSUPP; if (sch->flags & TCQ_F_MQROOT) { NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc"); goto err_out4; } err = gen_new_estimator(&sch->bstats, sch->cpu_bstats, &sch->rate_est, NULL, true, tca[TCA_RATE]); if (err) { NL_SET_ERR_MSG(extack, "Failed to generate new estimator"); goto err_out4; } } qdisc_hash_add(sch, false); trace_qdisc_create(ops, dev, parent); return sch; err_out4: /* Even if ops->init() failed, we call ops->destroy() * like qdisc_create_dflt(). */ if (ops->destroy) ops->destroy(sch); qdisc_put_stab(rtnl_dereference(sch->stab)); err_out3: lockdep_unregister_key(&sch->root_lock_key); netdev_put(dev, &sch->dev_tracker); qdisc_free(sch); err_out2: bpf_module_put(ops, ops->owner); err_out: *errp = err; return NULL; } static int qdisc_change(struct Qdisc *sch, struct nlattr **tca, struct netlink_ext_ack *extack) { struct qdisc_size_table *ostab, *stab = NULL; int err = 0; if (tca[TCA_OPTIONS]) { if (!sch->ops->change) { NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc"); return -EINVAL; } if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) { NL_SET_ERR_MSG(extack, "Change of blocks is not supported"); return -EOPNOTSUPP; } err = sch->ops->change(sch, tca[TCA_OPTIONS], extack); if (err) return err; } if (tca[TCA_STAB]) { stab = qdisc_get_stab(tca[TCA_STAB], extack); if (IS_ERR(stab)) return PTR_ERR(stab); } ostab = rtnl_dereference(sch->stab); rcu_assign_pointer(sch->stab, stab); qdisc_put_stab(ostab); if (tca[TCA_RATE]) { /* NB: ignores errors from replace_estimator because change can't be undone. */ if (sch->flags & TCQ_F_MQROOT) goto out; gen_replace_estimator(&sch->bstats, sch->cpu_bstats, &sch->rate_est, NULL, true, tca[TCA_RATE]); } out: return 0; } struct check_loop_arg { struct qdisc_walker w; struct Qdisc *p; int depth; }; static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w); static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth) { struct check_loop_arg arg; if (q->ops->cl_ops == NULL) return 0; arg.w.stop = arg.w.skip = arg.w.count = 0; arg.w.fn = check_loop_fn; arg.depth = depth; arg.p = p; q->ops->cl_ops->walk(q, &arg.w); return arg.w.stop ? -ELOOP : 0; } static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) { struct Qdisc *leaf; const struct Qdisc_class_ops *cops = q->ops->cl_ops; struct check_loop_arg *arg = (struct check_loop_arg *)w; leaf = cops->leaf(q, cl); if (leaf) { if (leaf == arg->p || arg->depth > 7) return -ELOOP; return check_loop(leaf, arg->p, arg->depth + 1); } return 0; } const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { [TCA_KIND] = { .type = NLA_STRING }, [TCA_RATE] = { .type = NLA_BINARY, .len = sizeof(struct tc_estimator) }, [TCA_STAB] = { .type = NLA_NESTED }, [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG }, [TCA_CHAIN] = { .type = NLA_U32 }, [TCA_INGRESS_BLOCK] = { .type = NLA_U32 }, [TCA_EGRESS_BLOCK] = { .type = NLA_U32 }, }; /* * Delete/get qdisc. */ static int __tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack, struct net_device *dev, struct nlattr *tca[TCA_MAX + 1], struct tcmsg *tcm) { struct net *net = sock_net(skb->sk); struct Qdisc *q = NULL; struct Qdisc *p = NULL; u32 clid; int err; clid = tcm->tcm_parent; if (clid) { if (clid != TC_H_ROOT) { if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { p = qdisc_lookup(dev, TC_H_MAJ(clid)); if (!p) { NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid"); return -ENOENT; } q = qdisc_leaf(p, clid, extack); } else if (dev_ingress_queue(dev)) { q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping); } } else { q = rtnl_dereference(dev->qdisc); } if (!q) { NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device"); return -ENOENT; } if (IS_ERR(q)) return PTR_ERR(q); if (tcm->tcm_handle && q->handle != tcm->tcm_handle) { NL_SET_ERR_MSG(extack, "Invalid handle"); return -EINVAL; } } else { q = qdisc_lookup(dev, tcm->tcm_handle); if (!q) { NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle"); return -ENOENT; } } if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { NL_SET_ERR_MSG(extack, "Invalid qdisc name: must match existing qdisc"); return -EINVAL; } if (n->nlmsg_type == RTM_DELQDISC) { if (!clid) { NL_SET_ERR_MSG(extack, "Classid cannot be zero"); return -EINVAL; } if (q->handle == 0) { NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero"); return -ENOENT; } err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack); if (err != 0) return err; } else { qdisc_get_notify(net, skb, n, clid, q, NULL); } return 0; } static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct tcmsg *tcm = nlmsg_data(n); struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; int err; err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; dev = __dev_get_by_index(net, tcm->tcm_ifindex); if (!dev) return -ENODEV; netdev_lock_ops(dev); err = __tc_get_qdisc(skb, n, extack, dev, tca, tcm); netdev_unlock_ops(dev); return err; } static bool req_create_or_replace(struct nlmsghdr *n) { return (n->nlmsg_flags & NLM_F_CREATE && n->nlmsg_flags & NLM_F_REPLACE); } static bool req_create_exclusive(struct nlmsghdr *n) { return (n->nlmsg_flags & NLM_F_CREATE && n->nlmsg_flags & NLM_F_EXCL); } static bool req_change(struct nlmsghdr *n) { return (!(n->nlmsg_flags & NLM_F_CREATE) && !(n->nlmsg_flags & NLM_F_REPLACE) && !(n->nlmsg_flags & NLM_F_EXCL)); } static int __tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack, struct net_device *dev, struct nlattr *tca[TCA_MAX + 1], struct tcmsg *tcm) { struct Qdisc *q = NULL; struct Qdisc *p = NULL; u32 clid; int err; clid = tcm->tcm_parent; if (clid) { if (clid != TC_H_ROOT) { if (clid != TC_H_INGRESS) { p = qdisc_lookup(dev, TC_H_MAJ(clid)); if (!p) { NL_SET_ERR_MSG(extack, "Failed to find specified qdisc"); return -ENOENT; } q = qdisc_leaf(p, clid, extack); if (IS_ERR(q)) return PTR_ERR(q); } else if (dev_ingress_queue_create(dev)) { q = rtnl_dereference(dev_ingress_queue(dev)->qdisc_sleeping); } } else { q = rtnl_dereference(dev->qdisc); } /* It may be default qdisc, ignore it */ if (q && q->handle == 0) q = NULL; if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) { if (tcm->tcm_handle) { if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) { NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override"); return -EEXIST; } if (TC_H_MIN(tcm->tcm_handle)) { NL_SET_ERR_MSG(extack, "Invalid minor handle"); return -EINVAL; } q = qdisc_lookup(dev, tcm->tcm_handle); if (!q) goto create_n_graft; if (q->parent != tcm->tcm_parent) { NL_SET_ERR_MSG(extack, "Cannot move an existing qdisc to a different parent"); return -EINVAL; } if (n->nlmsg_flags & NLM_F_EXCL) { NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override"); return -EEXIST; } if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { NL_SET_ERR_MSG(extack, "Invalid qdisc name: must match existing qdisc"); return -EINVAL; } if (q->flags & TCQ_F_INGRESS) { NL_SET_ERR_MSG(extack, "Cannot regraft ingress or clsact Qdiscs"); return -EINVAL; } if (q == p || (p && check_loop(q, p, 0))) { NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected"); return -ELOOP; } if (clid == TC_H_INGRESS) { NL_SET_ERR_MSG(extack, "Ingress cannot graft directly"); return -EINVAL; } qdisc_refcount_inc(q); goto graft; } else { if (!q) goto create_n_graft; /* This magic test requires explanation. * * We know, that some child q is already * attached to this parent and have choice: * 1) change it or 2) create/graft new one. * If the requested qdisc kind is different * than the existing one, then we choose graft. * If they are the same then this is "change" * operation - just let it fallthrough.. * * 1. We are allowed to create/graft only * if the request is explicitly stating * "please create if it doesn't exist". * * 2. If the request is to exclusive create * then the qdisc tcm_handle is not expected * to exist, so that we choose create/graft too. * * 3. The last case is when no flags are set. * This will happen when for example tc * utility issues a "change" command. * Alas, it is sort of hole in API, we * cannot decide what to do unambiguously. * For now we select create/graft. */ if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { if (req_create_or_replace(n) || req_create_exclusive(n)) goto create_n_graft; else if (req_change(n)) goto create_n_graft2; } } } } else { if (!tcm->tcm_handle) { NL_SET_ERR_MSG(extack, "Handle cannot be zero"); return -EINVAL; } q = qdisc_lookup(dev, tcm->tcm_handle); } /* Change qdisc parameters */ if (!q) { NL_SET_ERR_MSG(extack, "Specified qdisc not found"); return -ENOENT; } if (n->nlmsg_flags & NLM_F_EXCL) { NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify"); return -EEXIST; } if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) { NL_SET_ERR_MSG(extack, "Invalid qdisc name: must match existing qdisc"); return -EINVAL; } err = qdisc_change(q, tca, extack); if (err == 0) qdisc_notify(sock_net(skb->sk), skb, n, clid, NULL, q, extack); return err; create_n_graft: if (!(n->nlmsg_flags & NLM_F_CREATE)) { NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag"); return -ENOENT; } create_n_graft2: if (clid == TC_H_INGRESS) { if (dev_ingress_queue(dev)) { q = qdisc_create(dev, dev_ingress_queue(dev), tcm->tcm_parent, tcm->tcm_parent, tca, &err, extack); } else { NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device"); err = -ENOENT; } } else { struct netdev_queue *dev_queue; if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue) dev_queue = p->ops->cl_ops->select_queue(p, tcm); else if (p) dev_queue = p->dev_queue; else dev_queue = netdev_get_tx_queue(dev, 0); q = qdisc_create(dev, dev_queue, tcm->tcm_parent, tcm->tcm_handle, tca, &err, extack); } if (!q) return err; graft: err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack); if (err) { if (q) qdisc_put(q); return err; } return 0; } static void request_qdisc_module(struct nlattr *kind) { struct Qdisc_ops *ops; char name[IFNAMSIZ]; if (!kind) return; ops = qdisc_lookup_ops(kind); if (ops) { bpf_module_put(ops, ops->owner); return; } if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) { rtnl_unlock(); request_module(NET_SCH_ALIAS_PREFIX "%s", name); rtnl_lock(); } } /* * Create/change qdisc. */ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; struct tcmsg *tcm; int err; err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; request_qdisc_module(tca[TCA_KIND]); tcm = nlmsg_data(n); dev = __dev_get_by_index(net, tcm->tcm_ifindex); if (!dev) return -ENODEV; netdev_lock_ops(dev); err = __tc_modify_qdisc(skb, n, extack, dev, tca, tcm); netdev_unlock_ops(dev); return err; } static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, struct netlink_callback *cb, int *q_idx_p, int s_q_idx, bool recur, bool dump_invisible) { int ret = 0, q_idx = *q_idx_p; struct Qdisc *q; int b; if (!root) return 0; q = root; if (q_idx < s_q_idx) { q_idx++; } else { if (!tc_qdisc_dump_ignore(q, dump_invisible) && tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC, NULL) <= 0) goto done; q_idx++; } /* If dumping singletons, there is no qdisc_dev(root) and the singleton * itself has already been dumped. * * If we've already dumped the top-level (ingress) qdisc above and the global * qdisc hashtable, we don't want to hit it again */ if (!qdisc_dev(root) || !recur) goto out; hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { if (q_idx < s_q_idx) { q_idx++; continue; } if (!tc_qdisc_dump_ignore(q, dump_invisible) && tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC, NULL) <= 0) goto done; q_idx++; } out: *q_idx_p = q_idx; return ret; done: ret = -1; goto out; } static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); int idx, q_idx; int s_idx, s_q_idx; struct net_device *dev; const struct nlmsghdr *nlh = cb->nlh; struct nlattr *tca[TCA_MAX + 1]; int err; s_idx = cb->args[0]; s_q_idx = q_idx = cb->args[1]; idx = 0; ASSERT_RTNL(); err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX, rtm_tca_policy, cb->extack); if (err < 0) return err; for_each_netdev(net, dev) { struct netdev_queue *dev_queue; if (idx < s_idx) goto cont; if (idx > s_idx) s_q_idx = 0; q_idx = 0; netdev_lock_ops(dev); if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc), skb, cb, &q_idx, s_q_idx, true, tca[TCA_DUMP_INVISIBLE]) < 0) { netdev_unlock_ops(dev); goto done; } dev_queue = dev_ingress_queue(dev); if (dev_queue && tc_dump_qdisc_root(rtnl_dereference(dev_queue->qdisc_sleeping), skb, cb, &q_idx, s_q_idx, false, tca[TCA_DUMP_INVISIBLE]) < 0) { netdev_unlock_ops(dev); goto done; } netdev_unlock_ops(dev); cont: idx++; } done: cb->args[0] = idx; cb->args[1] = q_idx; return skb->len; } /************************************************ * Traffic classes manipulation. * ************************************************/ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, unsigned long cl, u32 portid, u32 seq, u16 flags, int event, struct netlink_ext_ack *extack) { struct tcmsg *tcm; struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); struct gnet_dump d; const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; cond_resched(); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; tcm = nlmsg_data(nlh); tcm->tcm_family = AF_UNSPEC; tcm->tcm__pad1 = 0; tcm->tcm__pad2 = 0; tcm->tcm_ifindex = qdisc_dev(q)->ifindex; tcm->tcm_parent = q->handle; tcm->tcm_handle = q->handle; tcm->tcm_info = 0; if (nla_put_string(skb, TCA_KIND, q->ops->id)) goto nla_put_failure; if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0) goto nla_put_failure; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, NULL, &d, TCA_PAD) < 0) goto nla_put_failure; if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) goto nla_put_failure; if (gnet_stats_finish_copy(&d) < 0) goto nla_put_failure; if (extack && extack->_msg && nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg)) goto out_nlmsg_trim; nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; out_nlmsg_trim: nla_put_failure: nlmsg_trim(skb, b); return -1; } static int tclass_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct Qdisc *q, unsigned long cl, int event, struct netlink_ext_ack *extack) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) return 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event, extack) < 0) { kfree_skb(skb); return -EINVAL; } return rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); } static int tclass_get_notify(struct net *net, struct sk_buff *oskb, struct nlmsghdr *n, struct Qdisc *q, unsigned long cl, struct netlink_ext_ack *extack) { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, RTM_NEWTCLASS, extack) < 0) { kfree_skb(skb); return -EINVAL; } return rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); } static int tclass_del_notify(struct net *net, const struct Qdisc_class_ops *cops, struct sk_buff *oskb, struct nlmsghdr *n, struct Qdisc *q, unsigned long cl, struct netlink_ext_ack *extack) { u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; struct sk_buff *skb; int err = 0; if (!cops->delete) return -EOPNOTSUPP; if (rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) { skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, RTM_DELTCLASS, extack) < 0) { kfree_skb(skb); return -EINVAL; } } else { skb = NULL; } err = cops->delete(q, cl, extack); if (err) { kfree_skb(skb); return err; } err = rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); return err; } #ifdef CONFIG_NET_CLS struct tcf_bind_args { struct tcf_walker w; unsigned long base; unsigned long cl; u32 classid; }; static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg) { struct tcf_bind_args *a = (void *)arg; if (n && tp->ops->bind_class) { struct Qdisc *q = tcf_block_q(tp->chain->block); sch_tree_lock(q); tp->ops->bind_class(n, a->classid, a->cl, q, a->base); sch_tree_unlock(q); } return 0; } struct tc_bind_class_args { struct qdisc_walker w; unsigned long new_cl; u32 portid; u32 clid; }; static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w) { struct tc_bind_class_args *a = (struct tc_bind_class_args *)w; const struct Qdisc_class_ops *cops = q->ops->cl_ops; struct tcf_block *block; struct tcf_chain *chain; block = cops->tcf_block(q, cl, NULL); if (!block) return 0; for (chain = tcf_get_next_chain(block, NULL); chain; chain = tcf_get_next_chain(block, chain)) { struct tcf_proto *tp; for (tp = tcf_get_next_proto(chain, NULL); tp; tp = tcf_get_next_proto(chain, tp)) { struct tcf_bind_args arg = {}; arg.w.fn = tcf_node_bind; arg.classid = a->clid; arg.base = cl; arg.cl = a->new_cl; tp->ops->walk(tp, &arg.w, true); } } return 0; } static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, unsigned long new_cl) { const struct Qdisc_class_ops *cops = q->ops->cl_ops; struct tc_bind_class_args args = {}; if (!cops->tcf_block) return; args.portid = portid; args.clid = clid; args.new_cl = new_cl; args.w.fn = tc_bind_class_walker; q->ops->cl_ops->walk(q, &args.w); } #else static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, unsigned long new_cl) { } #endif static int __tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack, struct net_device *dev, struct nlattr *tca[TCA_MAX + 1], struct tcmsg *tcm) { struct net *net = sock_net(skb->sk); const struct Qdisc_class_ops *cops; struct Qdisc *q = NULL; unsigned long cl = 0; unsigned long new_cl; u32 portid; u32 clid; u32 qid; int err; /* parent == TC_H_UNSPEC - unspecified parent. parent == TC_H_ROOT - class is root, which has no parent. parent == X:0 - parent is root class. parent == X:Y - parent is a node in hierarchy. parent == 0:Y - parent is X:Y, where X:0 is qdisc. handle == 0:0 - generate handle from kernel pool. handle == 0:Y - class is X:Y, where X:0 is qdisc. handle == X:Y - clear. handle == X:0 - root class. */ /* Step 1. Determine qdisc handle X:0 */ portid = tcm->tcm_parent; clid = tcm->tcm_handle; qid = TC_H_MAJ(clid); if (portid != TC_H_ROOT) { u32 qid1 = TC_H_MAJ(portid); if (qid && qid1) { /* If both majors are known, they must be identical. */ if (qid != qid1) return -EINVAL; } else if (qid1) { qid = qid1; } else if (qid == 0) qid = rtnl_dereference(dev->qdisc)->handle; /* Now qid is genuine qdisc handle consistent * both with parent and child. * * TC_H_MAJ(portid) still may be unspecified, complete it now. */ if (portid) portid = TC_H_MAKE(qid, portid); } else { if (qid == 0) qid = rtnl_dereference(dev->qdisc)->handle; } /* OK. Locate qdisc */ q = qdisc_lookup(dev, qid); if (!q) return -ENOENT; /* An check that it supports classes */ cops = q->ops->cl_ops; if (cops == NULL) return -EINVAL; /* Now try to get class */ if (clid == 0) { if (portid == TC_H_ROOT) clid = qid; } else clid = TC_H_MAKE(qid, clid); if (clid) cl = cops->find(q, clid); if (cl == 0) { err = -ENOENT; if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags & NLM_F_CREATE)) goto out; } else { switch (n->nlmsg_type) { case RTM_NEWTCLASS: err = -EEXIST; if (n->nlmsg_flags & NLM_F_EXCL) goto out; break; case RTM_DELTCLASS: err = tclass_del_notify(net, cops, skb, n, q, cl, extack); /* Unbind the class with flilters with 0 */ tc_bind_tclass(q, portid, clid, 0); goto out; case RTM_GETTCLASS: err = tclass_get_notify(net, skb, n, q, cl, extack); goto out; default: err = -EINVAL; goto out; } } if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) { NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes"); return -EOPNOTSUPP; } /* Prevent creation of traffic classes with classid TC_H_ROOT */ if (clid == TC_H_ROOT) { NL_SET_ERR_MSG(extack, "Cannot create traffic class with classid TC_H_ROOT"); return -EINVAL; } new_cl = cl; err = -EOPNOTSUPP; if (cops->change) err = cops->change(q, clid, portid, tca, &new_cl, extack); if (err == 0) { tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS, extack); /* We just create a new class, need to do reverse binding. */ if (cl != new_cl) tc_bind_tclass(q, portid, clid, new_cl); } out: return err; } static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct tcmsg *tcm = nlmsg_data(n); struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; int err; err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) return err; dev = __dev_get_by_index(net, tcm->tcm_ifindex); if (!dev) return -ENODEV; netdev_lock_ops(dev); err = __tc_ctl_tclass(skb, n, extack, dev, tca, tcm); netdev_unlock_ops(dev); return err; } struct qdisc_dump_args { struct qdisc_walker w; struct sk_buff *skb; struct netlink_callback *cb; }; static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg) { struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg; return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS, NULL); } static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, struct tcmsg *tcm, struct netlink_callback *cb, int *t_p, int s_t) { struct qdisc_dump_args arg; if (tc_qdisc_dump_ignore(q, false) || *t_p < s_t || !q->ops->cl_ops || (tcm->tcm_parent && TC_H_MAJ(tcm->tcm_parent) != q->handle)) { (*t_p)++; return 0; } if (*t_p > s_t) memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); arg.w.fn = qdisc_class_dump; arg.skb = skb; arg.cb = cb; arg.w.stop = 0; arg.w.skip = cb->args[1]; arg.w.count = 0; q->ops->cl_ops->walk(q, &arg.w); cb->args[1] = arg.w.count; if (arg.w.stop) return -1; (*t_p)++; return 0; } static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, struct tcmsg *tcm, struct netlink_callback *cb, int *t_p, int s_t, bool recur) { struct Qdisc *q; int b; if (!root) return 0; if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) return -1; if (!qdisc_dev(root) || !recur) return 0; if (tcm->tcm_parent) { q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent)); if (q && q != root && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) return -1; return 0; } hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) return -1; } return 0; } static int __tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb, struct tcmsg *tcm, struct net_device *dev) { struct netdev_queue *dev_queue; int t, s_t; s_t = cb->args[0]; t = 0; if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc), skb, tcm, cb, &t, s_t, true) < 0) goto done; dev_queue = dev_ingress_queue(dev); if (dev_queue && tc_dump_tclass_root(rtnl_dereference(dev_queue->qdisc_sleeping), skb, tcm, cb, &t, s_t, false) < 0) goto done; done: cb->args[0] = t; return skb->len; } static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) { struct tcmsg *tcm = nlmsg_data(cb->nlh); struct net *net = sock_net(skb->sk); struct net_device *dev; int err; if (nlmsg_len(cb->nlh) < sizeof(*tcm)) return 0; dev = dev_get_by_index(net, tcm->tcm_ifindex); if (!dev) return 0; netdev_lock_ops(dev); err = __tc_dump_tclass(skb, cb, tcm, dev); netdev_unlock_ops(dev); dev_put(dev); return err; } #ifdef CONFIG_PROC_FS static int psched_show(struct seq_file *seq, void *v) { seq_printf(seq, "%08x %08x %08x %08x\n", (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1), 1000000, (u32)NSEC_PER_SEC / hrtimer_resolution); return 0; } static int __net_init psched_net_init(struct net *net) { struct proc_dir_entry *e; e = proc_create_single("psched", 0, net->proc_net, psched_show); if (e == NULL) return -ENOMEM; return 0; } static void __net_exit psched_net_exit(struct net *net) { remove_proc_entry("psched", net->proc_net); } #else static int __net_init psched_net_init(struct net *net) { return 0; } static void __net_exit psched_net_exit(struct net *net) { } #endif static struct pernet_operations psched_net_ops = { .init = psched_net_init, .exit = psched_net_exit, }; #if IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper); #endif static const struct rtnl_msg_handler psched_rtnl_msg_handlers[] __initconst = { {.msgtype = RTM_NEWQDISC, .doit = tc_modify_qdisc}, {.msgtype = RTM_DELQDISC, .doit = tc_get_qdisc}, {.msgtype = RTM_GETQDISC, .doit = tc_get_qdisc, .dumpit = tc_dump_qdisc}, {.msgtype = RTM_NEWTCLASS, .doit = tc_ctl_tclass}, {.msgtype = RTM_DELTCLASS, .doit = tc_ctl_tclass}, {.msgtype = RTM_GETTCLASS, .doit = tc_ctl_tclass, .dumpit = tc_dump_tclass}, }; static int __init pktsched_init(void) { int err; err = register_pernet_subsys(&psched_net_ops); if (err) { pr_err("pktsched_init: " "cannot initialize per netns operations\n"); return err; } register_qdisc(&pfifo_fast_ops); register_qdisc(&pfifo_qdisc_ops); register_qdisc(&bfifo_qdisc_ops); register_qdisc(&pfifo_head_drop_qdisc_ops); register_qdisc(&mq_qdisc_ops); register_qdisc(&noqueue_qdisc_ops); rtnl_register_many(psched_rtnl_msg_handlers); tc_wrapper_init(); return 0; } subsys_initcall(pktsched_init); |
| 164 37 133 133 42 168 132 170 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_TTY_PORT_H #define _LINUX_TTY_PORT_H #include <linux/kfifo.h> #include <linux/kref.h> #include <linux/mutex.h> #include <linux/tty_buffer.h> #include <linux/wait.h> struct attribute_group; struct tty_driver; struct tty_port; struct tty_struct; /** * struct tty_port_operations -- operations on tty_port * @carrier_raised: return true if the carrier is raised on @port * @dtr_rts: raise the DTR line if @active is true, otherwise lower DTR * @shutdown: called when the last close completes or a hangup finishes IFF the * port was initialized. Do not use to free resources. Turn off the device * only. Called under the port mutex to serialize against @activate and * @shutdown. * @activate: called under the port mutex from tty_port_open(), serialized using * the port mutex. Supposed to turn on the device. * * FIXME: long term getting the tty argument *out* of this would be good * for consoles. * * @destruct: called on the final put of a port. Free resources, possibly incl. * the port itself. */ struct tty_port_operations { bool (*carrier_raised)(struct tty_port *port); void (*dtr_rts)(struct tty_port *port, bool active); void (*shutdown)(struct tty_port *port); int (*activate)(struct tty_port *port, struct tty_struct *tty); void (*destruct)(struct tty_port *port); }; struct tty_port_client_operations { size_t (*receive_buf)(struct tty_port *port, const u8 *cp, const u8 *fp, size_t count); void (*lookahead_buf)(struct tty_port *port, const u8 *cp, const u8 *fp, size_t count); void (*write_wakeup)(struct tty_port *port); }; extern const struct tty_port_client_operations tty_port_default_client_ops; /** * struct tty_port -- port level information * * @buf: buffer for this port, locked internally * @tty: back pointer to &struct tty_struct, valid only if the tty is open. Use * tty_port_tty_get() to obtain it (and tty_kref_put() to release). * @itty: internal back pointer to &struct tty_struct. Avoid this. It should be * eliminated in the long term. * @ops: tty port operations (like activate, shutdown), see &struct * tty_port_operations * @client_ops: tty port client operations (like receive_buf, write_wakeup). * By default, tty_port_default_client_ops is used. * @lock: lock protecting @tty * @blocked_open: # of procs waiting for open in tty_port_block_til_ready() * @count: usage count * @open_wait: open waiters queue (waiting e.g. for a carrier) * @delta_msr_wait: modem status change queue (waiting for MSR changes) * @flags: user TTY flags (%ASYNC_) * @iflags: internal flags (%TTY_PORT_) * @console: when set, the port is a console * @mutex: locking, for open, shutdown and other port operations * @buf_mutex: @xmit_buf alloc lock * @xmit_buf: optional xmit buffer used by some drivers * @xmit_fifo: optional xmit buffer used by some drivers * @close_delay: delay in jiffies to wait when closing the port * @closing_wait: delay in jiffies for output to be sent before closing * @drain_delay: set to zero if no pure time based drain is needed else set to * size of fifo * @kref: references counter. Reaching zero calls @ops->destruct() if non-%NULL * or frees the port otherwise. * @client_data: pointer to private data, for @client_ops * * Each device keeps its own port level information. &struct tty_port was * introduced as a common structure for such information. As every TTY device * shall have a backing tty_port structure, every driver can use these members. * * The tty port has a different lifetime to the tty so must be kept apart. * In addition be careful as tty -> port mappings are valid for the life * of the tty object but in many cases port -> tty mappings are valid only * until a hangup so don't use the wrong path. * * Tty port shall be initialized by tty_port_init() and shut down either by * tty_port_destroy() (refcounting not used), or tty_port_put() (refcounting). * * There is a lot of helpers around &struct tty_port too. To name the most * significant ones: tty_port_open(), tty_port_close() (or * tty_port_close_start() and tty_port_close_end() separately if need be), and * tty_port_hangup(). These call @ops->activate() and @ops->shutdown() as * needed. */ struct tty_port { struct tty_bufhead buf; struct tty_struct *tty; struct tty_struct *itty; const struct tty_port_operations *ops; const struct tty_port_client_operations *client_ops; spinlock_t lock; int blocked_open; int count; wait_queue_head_t open_wait; wait_queue_head_t delta_msr_wait; unsigned long flags; unsigned long iflags; unsigned char console:1; struct mutex mutex; struct mutex buf_mutex; u8 *xmit_buf; DECLARE_KFIFO_PTR(xmit_fifo, u8); unsigned int close_delay; unsigned int closing_wait; int drain_delay; struct kref kref; void *client_data; }; /* tty_port::iflags bits -- use atomic bit ops */ #define TTY_PORT_INITIALIZED 0 /* device is initialized */ #define TTY_PORT_SUSPENDED 1 /* device is suspended */ #define TTY_PORT_ACTIVE 2 /* device is open */ /* * uart drivers: use the uart_port::status field and the UPSTAT_* defines * for s/w-based flow control steering and carrier detection status */ #define TTY_PORT_CTS_FLOW 3 /* h/w flow control enabled */ #define TTY_PORT_CHECK_CD 4 /* carrier detect enabled */ #define TTY_PORT_KOPENED 5 /* device exclusively opened by kernel */ void tty_port_init(struct tty_port *port); void tty_port_link_device(struct tty_port *port, struct tty_driver *driver, unsigned index); struct device *tty_port_register_device(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device); struct device *tty_port_register_device_attr(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device, void *drvdata, const struct attribute_group **attr_grp); struct device *tty_port_register_device_attr_serdev(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *host, struct device *parent, void *drvdata, const struct attribute_group **attr_grp); void tty_port_unregister_device(struct tty_port *port, struct tty_driver *driver, unsigned index); int tty_port_alloc_xmit_buf(struct tty_port *port); void tty_port_free_xmit_buf(struct tty_port *port); void tty_port_destroy(struct tty_port *port); void tty_port_put(struct tty_port *port); static inline struct tty_port *tty_port_get(struct tty_port *port) { if (port && kref_get_unless_zero(&port->kref)) return port; return NULL; } /* If the cts flow control is enabled, return true. */ static inline bool tty_port_cts_enabled(const struct tty_port *port) { return test_bit(TTY_PORT_CTS_FLOW, &port->iflags); } static inline void tty_port_set_cts_flow(struct tty_port *port, bool val) { assign_bit(TTY_PORT_CTS_FLOW, &port->iflags, val); } static inline bool tty_port_active(const struct tty_port *port) { return test_bit(TTY_PORT_ACTIVE, &port->iflags); } static inline void tty_port_set_active(struct tty_port *port, bool val) { assign_bit(TTY_PORT_ACTIVE, &port->iflags, val); } static inline bool tty_port_check_carrier(const struct tty_port *port) { return test_bit(TTY_PORT_CHECK_CD, &port->iflags); } static inline void tty_port_set_check_carrier(struct tty_port *port, bool val) { assign_bit(TTY_PORT_CHECK_CD, &port->iflags, val); } static inline bool tty_port_suspended(const struct tty_port *port) { return test_bit(TTY_PORT_SUSPENDED, &port->iflags); } static inline void tty_port_set_suspended(struct tty_port *port, bool val) { assign_bit(TTY_PORT_SUSPENDED, &port->iflags, val); } static inline bool tty_port_initialized(const struct tty_port *port) { return test_bit(TTY_PORT_INITIALIZED, &port->iflags); } static inline void tty_port_set_initialized(struct tty_port *port, bool val) { assign_bit(TTY_PORT_INITIALIZED, &port->iflags, val); } static inline bool tty_port_kopened(const struct tty_port *port) { return test_bit(TTY_PORT_KOPENED, &port->iflags); } static inline void tty_port_set_kopened(struct tty_port *port, bool val) { assign_bit(TTY_PORT_KOPENED, &port->iflags, val); } struct tty_struct *tty_port_tty_get(struct tty_port *port); void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty); bool tty_port_carrier_raised(struct tty_port *port); void tty_port_raise_dtr_rts(struct tty_port *port); void tty_port_lower_dtr_rts(struct tty_port *port); void tty_port_hangup(struct tty_port *port); void tty_port_tty_hangup(struct tty_port *port, bool check_clocal); void tty_port_tty_wakeup(struct tty_port *port); int tty_port_block_til_ready(struct tty_port *port, struct tty_struct *tty, struct file *filp); int tty_port_close_start(struct tty_port *port, struct tty_struct *tty, struct file *filp); void tty_port_close_end(struct tty_port *port, struct tty_struct *tty); void tty_port_close(struct tty_port *port, struct tty_struct *tty, struct file *filp); int tty_port_install(struct tty_port *port, struct tty_driver *driver, struct tty_struct *tty); int tty_port_open(struct tty_port *port, struct tty_struct *tty, struct file *filp); static inline int tty_port_users(struct tty_port *port) { return port->count + port->blocked_open; } #endif |
| 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_STATFS_H #define _LINUX_STATFS_H #include <linux/types.h> #include <asm/statfs.h> #include <asm/byteorder.h> struct kstatfs { long f_type; long f_bsize; u64 f_blocks; u64 f_bfree; u64 f_bavail; u64 f_files; u64 f_ffree; __kernel_fsid_t f_fsid; long f_namelen; long f_frsize; long f_flags; long f_spare[4]; }; /* * Definitions for the flag in f_flag. * * Generally these flags are equivalent to the MS_ flags used in the mount * ABI. The exception is ST_VALID which has the same value as MS_REMOUNT * which doesn't make any sense for statfs. */ #define ST_RDONLY 0x0001 /* mount read-only */ #define ST_NOSUID 0x0002 /* ignore suid and sgid bits */ #define ST_NODEV 0x0004 /* disallow access to device special files */ #define ST_NOEXEC 0x0008 /* disallow program execution */ #define ST_SYNCHRONOUS 0x0010 /* writes are synced at once */ #define ST_VALID 0x0020 /* f_flags support is implemented */ #define ST_MANDLOCK 0x0040 /* allow mandatory locks on an FS */ /* 0x0080 used for ST_WRITE in glibc */ /* 0x0100 used for ST_APPEND in glibc */ /* 0x0200 used for ST_IMMUTABLE in glibc */ #define ST_NOATIME 0x0400 /* do not update access times */ #define ST_NODIRATIME 0x0800 /* do not update directory access times */ #define ST_RELATIME 0x1000 /* update atime relative to mtime/ctime */ #define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */ struct dentry; extern int vfs_get_fsid(struct dentry *dentry, __kernel_fsid_t *fsid); static inline __kernel_fsid_t u64_to_fsid(u64 v) { return (__kernel_fsid_t){.val = {(u32)v, (u32)(v>>32)}}; } /* Fold 16 bytes uuid to 64 bit fsid */ static inline __kernel_fsid_t uuid_to_fsid(__u8 *uuid) { return u64_to_fsid(le64_to_cpup((void *)uuid) ^ le64_to_cpup((void *)(uuid + sizeof(u64)))); } #endif |
| 93 93 93 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 | // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES */ #include "iommufd_private.h" struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, size_t size, enum iommufd_object_type type) { struct iommufd_object *obj; int rc; obj = kzalloc(size, GFP_KERNEL_ACCOUNT); if (!obj) return ERR_PTR(-ENOMEM); obj->type = type; /* Starts out bias'd by 1 until it is removed from the xarray */ refcount_set(&obj->shortterm_users, 1); refcount_set(&obj->users, 1); /* * Reserve an ID in the xarray but do not publish the pointer yet since * the caller hasn't initialized it yet. Once the pointer is published * in the xarray and visible to other threads we can't reliably destroy * it anymore, so the caller must complete all errorable operations * before calling iommufd_object_finalize(). */ rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY, xa_limit_31b, GFP_KERNEL_ACCOUNT); if (rc) goto out_free; return obj; out_free: kfree(obj); return ERR_PTR(rc); } EXPORT_SYMBOL_NS_GPL(_iommufd_object_alloc, "IOMMUFD"); /* Caller should xa_lock(&viommu->vdevs) to protect the return value */ struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id) { struct iommufd_vdevice *vdev; lockdep_assert_held(&viommu->vdevs.xa_lock); vdev = xa_load(&viommu->vdevs, vdev_id); return vdev ? vdev->dev : NULL; } EXPORT_SYMBOL_NS_GPL(iommufd_viommu_find_dev, "IOMMUFD"); /* Return -ENOENT if device is not associated to the vIOMMU */ int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu, struct device *dev, unsigned long *vdev_id) { struct iommufd_vdevice *vdev; unsigned long index; int rc = -ENOENT; if (WARN_ON_ONCE(!vdev_id)) return -EINVAL; xa_lock(&viommu->vdevs); xa_for_each(&viommu->vdevs, index, vdev) { if (vdev->dev == dev) { *vdev_id = vdev->id; rc = 0; break; } } xa_unlock(&viommu->vdevs); return rc; } EXPORT_SYMBOL_NS_GPL(iommufd_viommu_get_vdev_id, "IOMMUFD"); /* * Typically called in driver's threaded IRQ handler. * The @type and @event_data must be defined in include/uapi/linux/iommufd.h */ int iommufd_viommu_report_event(struct iommufd_viommu *viommu, enum iommu_veventq_type type, void *event_data, size_t data_len) { struct iommufd_veventq *veventq; struct iommufd_vevent *vevent; int rc = 0; if (WARN_ON_ONCE(!data_len || !event_data)) return -EINVAL; down_read(&viommu->veventqs_rwsem); veventq = iommufd_viommu_find_veventq(viommu, type); if (!veventq) { rc = -EOPNOTSUPP; goto out_unlock_veventqs; } spin_lock(&veventq->common.lock); if (veventq->num_events == veventq->depth) { vevent = &veventq->lost_events_header; goto out_set_header; } vevent = kzalloc(struct_size(vevent, event_data, data_len), GFP_ATOMIC); if (!vevent) { rc = -ENOMEM; vevent = &veventq->lost_events_header; goto out_set_header; } memcpy(vevent->event_data, event_data, data_len); vevent->data_len = data_len; veventq->num_events++; out_set_header: iommufd_vevent_handler(veventq, vevent); spin_unlock(&veventq->common.lock); out_unlock_veventqs: up_read(&viommu->veventqs_rwsem); return rc; } EXPORT_SYMBOL_NS_GPL(iommufd_viommu_report_event, "IOMMUFD"); #ifdef CONFIG_IRQ_MSI_IOMMU /* * Get a iommufd_sw_msi_map for the msi physical address requested by the irq * layer. The mapping to IOVA is global to the iommufd file descriptor, every * domain that is attached to a device using the same MSI parameters will use * the same IOVA. */ static struct iommufd_sw_msi_map * iommufd_sw_msi_get_map(struct iommufd_ctx *ictx, phys_addr_t msi_addr, phys_addr_t sw_msi_start) { struct iommufd_sw_msi_map *cur; unsigned int max_pgoff = 0; lockdep_assert_held(&ictx->sw_msi_lock); list_for_each_entry(cur, &ictx->sw_msi_list, sw_msi_item) { if (cur->sw_msi_start != sw_msi_start) continue; max_pgoff = max(max_pgoff, cur->pgoff + 1); if (cur->msi_addr == msi_addr) return cur; } if (ictx->sw_msi_id >= BITS_PER_BYTE * sizeof_field(struct iommufd_sw_msi_maps, bitmap)) return ERR_PTR(-EOVERFLOW); cur = kzalloc(sizeof(*cur), GFP_KERNEL); if (!cur) return ERR_PTR(-ENOMEM); cur->sw_msi_start = sw_msi_start; cur->msi_addr = msi_addr; cur->pgoff = max_pgoff; cur->id = ictx->sw_msi_id++; list_add_tail(&cur->sw_msi_item, &ictx->sw_msi_list); return cur; } int iommufd_sw_msi_install(struct iommufd_ctx *ictx, struct iommufd_hwpt_paging *hwpt_paging, struct iommufd_sw_msi_map *msi_map) { unsigned long iova; lockdep_assert_held(&ictx->sw_msi_lock); iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE; if (!test_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap)) { int rc; rc = iommu_map(hwpt_paging->common.domain, iova, msi_map->msi_addr, PAGE_SIZE, IOMMU_WRITE | IOMMU_READ | IOMMU_MMIO, GFP_KERNEL_ACCOUNT); if (rc) return rc; __set_bit(msi_map->id, hwpt_paging->present_sw_msi.bitmap); } return 0; } EXPORT_SYMBOL_NS_GPL(iommufd_sw_msi_install, "IOMMUFD_INTERNAL"); /* * Called by the irq code if the platform translates the MSI address through the * IOMMU. msi_addr is the physical address of the MSI page. iommufd will * allocate a fd global iova for the physical page that is the same on all * domains and devices. */ int iommufd_sw_msi(struct iommu_domain *domain, struct msi_desc *desc, phys_addr_t msi_addr) { struct device *dev = msi_desc_to_dev(desc); struct iommufd_hwpt_paging *hwpt_paging; struct iommu_attach_handle *raw_handle; struct iommufd_attach_handle *handle; struct iommufd_sw_msi_map *msi_map; struct iommufd_ctx *ictx; unsigned long iova; int rc; /* * It is safe to call iommu_attach_handle_get() here because the iommu * core code invokes this under the group mutex which also prevents any * change of the attach handle for the duration of this function. */ iommu_group_mutex_assert(dev); raw_handle = iommu_attach_handle_get(dev->iommu_group, IOMMU_NO_PASID, 0); if (IS_ERR(raw_handle)) return 0; hwpt_paging = find_hwpt_paging(domain->iommufd_hwpt); handle = to_iommufd_handle(raw_handle); /* No IOMMU_RESV_SW_MSI means no change to the msi_msg */ if (handle->idev->igroup->sw_msi_start == PHYS_ADDR_MAX) return 0; ictx = handle->idev->ictx; guard(mutex)(&ictx->sw_msi_lock); /* * The input msi_addr is the exact byte offset of the MSI doorbell, we * assume the caller has checked that it is contained with a MMIO region * that is secure to map at PAGE_SIZE. */ msi_map = iommufd_sw_msi_get_map(handle->idev->ictx, msi_addr & PAGE_MASK, handle->idev->igroup->sw_msi_start); if (IS_ERR(msi_map)) return PTR_ERR(msi_map); rc = iommufd_sw_msi_install(ictx, hwpt_paging, msi_map); if (rc) return rc; __set_bit(msi_map->id, handle->idev->igroup->required_sw_msi.bitmap); iova = msi_map->sw_msi_start + msi_map->pgoff * PAGE_SIZE; msi_desc_set_iommu_msi_iova(desc, iova, PAGE_SHIFT); return 0; } EXPORT_SYMBOL_NS_GPL(iommufd_sw_msi, "IOMMUFD"); #endif MODULE_DESCRIPTION("iommufd code shared with builtin modules"); MODULE_IMPORT_NS("IOMMUFD_INTERNAL"); MODULE_LICENSE("GPL"); |
| 12 8 8 8 8 12 8 8 8 8 4 11 8 8 8 8 12 12 12 10 10 9 8 8 6 1 1 4 1 1 2 3 8 12 4 4 4 2 2 2 12 12 8 12 2 2 2 1 2 2 12 12 12 12 12 12 12 12 12 10 12 11 12 12 12 10 2 12 2 2 2 2 2 2 2 12 2 10 10 12 12 12 11 12 12 12 12 12 12 12 12 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 | /* * Copyright (c) 2007-2011 Atheros Communications Inc. * Copyright (c) 2011-2012 Qualcomm Atheros, Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include <linux/module.h> #include <linux/usb.h> #include "debug.h" #include "core.h" /* constants */ #define TX_URB_COUNT 32 #define RX_URB_COUNT 32 #define ATH6KL_USB_RX_BUFFER_SIZE 4096 /* tx/rx pipes for usb */ enum ATH6KL_USB_PIPE_ID { ATH6KL_USB_PIPE_TX_CTRL = 0, ATH6KL_USB_PIPE_TX_DATA_LP, ATH6KL_USB_PIPE_TX_DATA_MP, ATH6KL_USB_PIPE_TX_DATA_HP, ATH6KL_USB_PIPE_RX_CTRL, ATH6KL_USB_PIPE_RX_DATA, ATH6KL_USB_PIPE_RX_DATA2, ATH6KL_USB_PIPE_RX_INT, ATH6KL_USB_PIPE_MAX }; #define ATH6KL_USB_PIPE_INVALID ATH6KL_USB_PIPE_MAX struct ath6kl_usb_pipe { struct list_head urb_list_head; struct usb_anchor urb_submitted; u32 urb_alloc; u32 urb_cnt; u32 urb_cnt_thresh; unsigned int usb_pipe_handle; u32 flags; u8 ep_address; u8 logical_pipe_num; struct ath6kl_usb *ar_usb; u16 max_packet_size; struct work_struct io_complete_work; struct sk_buff_head io_comp_queue; struct usb_endpoint_descriptor *ep_desc; }; #define ATH6KL_USB_PIPE_FLAG_TX (1 << 0) /* usb device object */ struct ath6kl_usb { /* protects pipe->urb_list_head and pipe->urb_cnt */ spinlock_t cs_lock; struct usb_device *udev; struct usb_interface *interface; struct ath6kl_usb_pipe pipes[ATH6KL_USB_PIPE_MAX]; u8 *diag_cmd_buffer; u8 *diag_resp_buffer; struct ath6kl *ar; struct workqueue_struct *wq; }; /* usb urb object */ struct ath6kl_urb_context { struct list_head link; struct ath6kl_usb_pipe *pipe; struct sk_buff *skb; struct ath6kl *ar; }; /* USB endpoint definitions */ #define ATH6KL_USB_EP_ADDR_APP_CTRL_IN 0x81 #define ATH6KL_USB_EP_ADDR_APP_DATA_IN 0x82 #define ATH6KL_USB_EP_ADDR_APP_DATA2_IN 0x83 #define ATH6KL_USB_EP_ADDR_APP_INT_IN 0x84 #define ATH6KL_USB_EP_ADDR_APP_CTRL_OUT 0x01 #define ATH6KL_USB_EP_ADDR_APP_DATA_LP_OUT 0x02 #define ATH6KL_USB_EP_ADDR_APP_DATA_MP_OUT 0x03 #define ATH6KL_USB_EP_ADDR_APP_DATA_HP_OUT 0x04 /* diagnostic command defnitions */ #define ATH6KL_USB_CONTROL_REQ_SEND_BMI_CMD 1 #define ATH6KL_USB_CONTROL_REQ_RECV_BMI_RESP 2 #define ATH6KL_USB_CONTROL_REQ_DIAG_CMD 3 #define ATH6KL_USB_CONTROL_REQ_DIAG_RESP 4 #define ATH6KL_USB_CTRL_DIAG_CC_READ 0 #define ATH6KL_USB_CTRL_DIAG_CC_WRITE 1 struct ath6kl_usb_ctrl_diag_cmd_write { __le32 cmd; __le32 address; __le32 value; __le32 _pad[1]; } __packed; struct ath6kl_usb_ctrl_diag_cmd_read { __le32 cmd; __le32 address; } __packed; struct ath6kl_usb_ctrl_diag_resp_read { __le32 value; } __packed; /* function declarations */ static void ath6kl_usb_recv_complete(struct urb *urb); #define ATH6KL_USB_IS_BULK_EP(attr) (((attr) & 3) == 0x02) #define ATH6KL_USB_IS_INT_EP(attr) (((attr) & 3) == 0x03) #define ATH6KL_USB_IS_ISOC_EP(attr) (((attr) & 3) == 0x01) #define ATH6KL_USB_IS_DIR_IN(addr) ((addr) & 0x80) /* pipe/urb operations */ static struct ath6kl_urb_context * ath6kl_usb_alloc_urb_from_pipe(struct ath6kl_usb_pipe *pipe) { struct ath6kl_urb_context *urb_context = NULL; unsigned long flags; /* bail if this pipe is not initialized */ if (!pipe->ar_usb) return NULL; spin_lock_irqsave(&pipe->ar_usb->cs_lock, flags); if (!list_empty(&pipe->urb_list_head)) { urb_context = list_first_entry(&pipe->urb_list_head, struct ath6kl_urb_context, link); list_del(&urb_context->link); pipe->urb_cnt--; } spin_unlock_irqrestore(&pipe->ar_usb->cs_lock, flags); return urb_context; } static void ath6kl_usb_free_urb_to_pipe(struct ath6kl_usb_pipe *pipe, struct ath6kl_urb_context *urb_context) { unsigned long flags; /* bail if this pipe is not initialized */ if (!pipe->ar_usb) return; spin_lock_irqsave(&pipe->ar_usb->cs_lock, flags); pipe->urb_cnt++; list_add(&urb_context->link, &pipe->urb_list_head); spin_unlock_irqrestore(&pipe->ar_usb->cs_lock, flags); } static void ath6kl_usb_cleanup_recv_urb(struct ath6kl_urb_context *urb_context) { dev_kfree_skb(urb_context->skb); urb_context->skb = NULL; ath6kl_usb_free_urb_to_pipe(urb_context->pipe, urb_context); } static inline struct ath6kl_usb *ath6kl_usb_priv(struct ath6kl *ar) { return ar->hif_priv; } /* pipe resource allocation/cleanup */ static int ath6kl_usb_alloc_pipe_resources(struct ath6kl_usb_pipe *pipe, int urb_cnt) { struct ath6kl_urb_context *urb_context; int status = 0, i; INIT_LIST_HEAD(&pipe->urb_list_head); init_usb_anchor(&pipe->urb_submitted); for (i = 0; i < urb_cnt; i++) { urb_context = kzalloc(sizeof(struct ath6kl_urb_context), GFP_KERNEL); if (urb_context == NULL) { status = -ENOMEM; goto fail_alloc_pipe_resources; } urb_context->pipe = pipe; /* * we are only allocate the urb contexts here, the actual URB * is allocated from the kernel as needed to do a transaction */ pipe->urb_alloc++; ath6kl_usb_free_urb_to_pipe(pipe, urb_context); } ath6kl_dbg(ATH6KL_DBG_USB, "ath6kl usb: alloc resources lpipe:%d hpipe:0x%X urbs:%d\n", pipe->logical_pipe_num, pipe->usb_pipe_handle, pipe->urb_alloc); fail_alloc_pipe_resources: return status; } static void ath6kl_usb_free_pipe_resources(struct ath6kl_usb_pipe *pipe) { struct ath6kl_urb_context *urb_context; if (pipe->ar_usb == NULL) { /* nothing allocated for this pipe */ return; } ath6kl_dbg(ATH6KL_DBG_USB, "ath6kl usb: free resources lpipe:%d" "hpipe:0x%X urbs:%d avail:%d\n", pipe->logical_pipe_num, pipe->usb_pipe_handle, pipe->urb_alloc, pipe->urb_cnt); if (pipe->urb_alloc != pipe->urb_cnt) { ath6kl_dbg(ATH6KL_DBG_USB, "ath6kl usb: urb leak! lpipe:%d" "hpipe:0x%X urbs:%d avail:%d\n", pipe->logical_pipe_num, pipe->usb_pipe_handle, pipe->urb_alloc, pipe->urb_cnt); } while (true) { urb_context = ath6kl_usb_alloc_urb_from_pipe(pipe); if (urb_context == NULL) break; kfree(urb_context); } } static void ath6kl_usb_cleanup_pipe_resources(struct ath6kl_usb *ar_usb) { int i; for (i = 0; i < ATH6KL_USB_PIPE_MAX; i++) ath6kl_usb_free_pipe_resources(&ar_usb->pipes[i]); } static u8 ath6kl_usb_get_logical_pipe_num(struct ath6kl_usb *ar_usb, u8 ep_address, int *urb_count) { u8 pipe_num = ATH6KL_USB_PIPE_INVALID; switch (ep_address) { case ATH6KL_USB_EP_ADDR_APP_CTRL_IN: pipe_num = ATH6KL_USB_PIPE_RX_CTRL; *urb_count = RX_URB_COUNT; break; case ATH6KL_USB_EP_ADDR_APP_DATA_IN: pipe_num = ATH6KL_USB_PIPE_RX_DATA; *urb_count = RX_URB_COUNT; break; case ATH6KL_USB_EP_ADDR_APP_INT_IN: pipe_num = ATH6KL_USB_PIPE_RX_INT; *urb_count = RX_URB_COUNT; break; case ATH6KL_USB_EP_ADDR_APP_DATA2_IN: pipe_num = ATH6KL_USB_PIPE_RX_DATA2; *urb_count = RX_URB_COUNT; break; case ATH6KL_USB_EP_ADDR_APP_CTRL_OUT: pipe_num = ATH6KL_USB_PIPE_TX_CTRL; *urb_count = TX_URB_COUNT; break; case ATH6KL_USB_EP_ADDR_APP_DATA_LP_OUT: pipe_num = ATH6KL_USB_PIPE_TX_DATA_LP; *urb_count = TX_URB_COUNT; break; case ATH6KL_USB_EP_ADDR_APP_DATA_MP_OUT: pipe_num = ATH6KL_USB_PIPE_TX_DATA_MP; *urb_count = TX_URB_COUNT; break; case ATH6KL_USB_EP_ADDR_APP_DATA_HP_OUT: pipe_num = ATH6KL_USB_PIPE_TX_DATA_HP; *urb_count = TX_URB_COUNT; break; default: /* note: there may be endpoints not currently used */ break; } return pipe_num; } static int ath6kl_usb_setup_pipe_resources(struct ath6kl_usb *ar_usb) { struct usb_interface *interface = ar_usb->interface; struct usb_host_interface *iface_desc = interface->cur_altsetting; struct usb_endpoint_descriptor *endpoint; struct ath6kl_usb_pipe *pipe; int i, urbcount, status = 0; u8 pipe_num; ath6kl_dbg(ATH6KL_DBG_USB, "setting up USB Pipes using interface\n"); /* walk descriptors and setup pipes */ for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) { endpoint = &iface_desc->endpoint[i].desc; if (ATH6KL_USB_IS_BULK_EP(endpoint->bmAttributes)) { ath6kl_dbg(ATH6KL_DBG_USB, "%s Bulk Ep:0x%2.2X maxpktsz:%d\n", ATH6KL_USB_IS_DIR_IN (endpoint->bEndpointAddress) ? "RX" : "TX", endpoint->bEndpointAddress, le16_to_cpu(endpoint->wMaxPacketSize)); } else if (ATH6KL_USB_IS_INT_EP(endpoint->bmAttributes)) { ath6kl_dbg(ATH6KL_DBG_USB, "%s Int Ep:0x%2.2X maxpktsz:%d interval:%d\n", ATH6KL_USB_IS_DIR_IN (endpoint->bEndpointAddress) ? "RX" : "TX", endpoint->bEndpointAddress, le16_to_cpu(endpoint->wMaxPacketSize), endpoint->bInterval); } else if (ATH6KL_USB_IS_ISOC_EP(endpoint->bmAttributes)) { /* TODO for ISO */ ath6kl_dbg(ATH6KL_DBG_USB, "%s ISOC Ep:0x%2.2X maxpktsz:%d interval:%d\n", ATH6KL_USB_IS_DIR_IN (endpoint->bEndpointAddress) ? "RX" : "TX", endpoint->bEndpointAddress, le16_to_cpu(endpoint->wMaxPacketSize), endpoint->bInterval); } /* Ignore broken descriptors. */ if (usb_endpoint_maxp(endpoint) == 0) continue; urbcount = 0; pipe_num = ath6kl_usb_get_logical_pipe_num(ar_usb, endpoint->bEndpointAddress, &urbcount); if (pipe_num == ATH6KL_USB_PIPE_INVALID) continue; pipe = &ar_usb->pipes[pipe_num]; if (pipe->ar_usb != NULL) { /* hmmm..pipe was already setup */ continue; } pipe->ar_usb = ar_usb; pipe->logical_pipe_num = pipe_num; pipe->ep_address = endpoint->bEndpointAddress; pipe->max_packet_size = le16_to_cpu(endpoint->wMaxPacketSize); if (ATH6KL_USB_IS_BULK_EP(endpoint->bmAttributes)) { if (ATH6KL_USB_IS_DIR_IN(pipe->ep_address)) { pipe->usb_pipe_handle = usb_rcvbulkpipe(ar_usb->udev, pipe->ep_address); } else { pipe->usb_pipe_handle = usb_sndbulkpipe(ar_usb->udev, pipe->ep_address); } } else if (ATH6KL_USB_IS_INT_EP(endpoint->bmAttributes)) { if (ATH6KL_USB_IS_DIR_IN(pipe->ep_address)) { pipe->usb_pipe_handle = usb_rcvintpipe(ar_usb->udev, pipe->ep_address); } else { pipe->usb_pipe_handle = usb_sndintpipe(ar_usb->udev, pipe->ep_address); } } else if (ATH6KL_USB_IS_ISOC_EP(endpoint->bmAttributes)) { /* TODO for ISO */ if (ATH6KL_USB_IS_DIR_IN(pipe->ep_address)) { pipe->usb_pipe_handle = usb_rcvisocpipe(ar_usb->udev, pipe->ep_address); } else { pipe->usb_pipe_handle = usb_sndisocpipe(ar_usb->udev, pipe->ep_address); } } pipe->ep_desc = endpoint; if (!ATH6KL_USB_IS_DIR_IN(pipe->ep_address)) pipe->flags |= ATH6KL_USB_PIPE_FLAG_TX; status = ath6kl_usb_alloc_pipe_resources(pipe, urbcount); if (status != 0) break; } return status; } /* pipe operations */ static void ath6kl_usb_post_recv_transfers(struct ath6kl_usb_pipe *recv_pipe, int buffer_length) { struct ath6kl_urb_context *urb_context; struct urb *urb; int usb_status; while (true) { urb_context = ath6kl_usb_alloc_urb_from_pipe(recv_pipe); if (urb_context == NULL) break; urb_context->skb = dev_alloc_skb(buffer_length); if (urb_context->skb == NULL) goto err_cleanup_urb; urb = usb_alloc_urb(0, GFP_ATOMIC); if (urb == NULL) goto err_cleanup_urb; usb_fill_bulk_urb(urb, recv_pipe->ar_usb->udev, recv_pipe->usb_pipe_handle, urb_context->skb->data, buffer_length, ath6kl_usb_recv_complete, urb_context); ath6kl_dbg(ATH6KL_DBG_USB_BULK, "ath6kl usb: bulk recv submit:%d, 0x%X (ep:0x%2.2X), %d bytes buf:0x%p\n", recv_pipe->logical_pipe_num, recv_pipe->usb_pipe_handle, recv_pipe->ep_address, buffer_length, urb_context->skb); usb_anchor_urb(urb, &recv_pipe->urb_submitted); usb_status = usb_submit_urb(urb, GFP_ATOMIC); if (usb_status) { ath6kl_dbg(ATH6KL_DBG_USB_BULK, "ath6kl usb : usb bulk recv failed %d\n", usb_status); usb_unanchor_urb(urb); usb_free_urb(urb); goto err_cleanup_urb; } usb_free_urb(urb); } return; err_cleanup_urb: ath6kl_usb_cleanup_recv_urb(urb_context); return; } static void ath6kl_usb_flush_all(struct ath6kl_usb *ar_usb) { int i; for (i = 0; i < ATH6KL_USB_PIPE_MAX; i++) { if (ar_usb->pipes[i].ar_usb != NULL) usb_kill_anchored_urbs(&ar_usb->pipes[i].urb_submitted); } /* * Flushing any pending I/O may schedule work this call will block * until all scheduled work runs to completion. */ flush_workqueue(ar_usb->wq); } static void ath6kl_usb_start_recv_pipes(struct ath6kl_usb *ar_usb) { /* * note: control pipe is no longer used * ar_usb->pipes[ATH6KL_USB_PIPE_RX_CTRL].urb_cnt_thresh = * ar_usb->pipes[ATH6KL_USB_PIPE_RX_CTRL].urb_alloc/2; * ath6kl_usb_post_recv_transfers(&ar_usb-> * pipes[ATH6KL_USB_PIPE_RX_CTRL], * ATH6KL_USB_RX_BUFFER_SIZE); */ ar_usb->pipes[ATH6KL_USB_PIPE_RX_DATA].urb_cnt_thresh = 1; ath6kl_usb_post_recv_transfers(&ar_usb->pipes[ATH6KL_USB_PIPE_RX_DATA], ATH6KL_USB_RX_BUFFER_SIZE); } /* hif usb rx/tx completion functions */ static void ath6kl_usb_recv_complete(struct urb *urb) { struct ath6kl_urb_context *urb_context = urb->context; struct ath6kl_usb_pipe *pipe = urb_context->pipe; struct sk_buff *skb = NULL; int status = 0; ath6kl_dbg(ATH6KL_DBG_USB_BULK, "%s: recv pipe: %d, stat:%d, len:%d urb:0x%p\n", __func__, pipe->logical_pipe_num, urb->status, urb->actual_length, urb); if (urb->status != 0) { status = -EIO; switch (urb->status) { case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: /* * no need to spew these errors when device * removed or urb killed due to driver shutdown */ status = -ECANCELED; break; default: ath6kl_dbg(ATH6KL_DBG_USB_BULK, "%s recv pipe: %d (ep:0x%2.2X), failed:%d\n", __func__, pipe->logical_pipe_num, pipe->ep_address, urb->status); break; } goto cleanup_recv_urb; } if (urb->actual_length == 0) goto cleanup_recv_urb; skb = urb_context->skb; /* we are going to pass it up */ urb_context->skb = NULL; skb_put(skb, urb->actual_length); /* note: queue implements a lock */ skb_queue_tail(&pipe->io_comp_queue, skb); queue_work(pipe->ar_usb->wq, &pipe->io_complete_work); cleanup_recv_urb: ath6kl_usb_cleanup_recv_urb(urb_context); if (status == 0 && pipe->urb_cnt >= pipe->urb_cnt_thresh) { /* our free urbs are piling up, post more transfers */ ath6kl_usb_post_recv_transfers(pipe, ATH6KL_USB_RX_BUFFER_SIZE); } } static void ath6kl_usb_usb_transmit_complete(struct urb *urb) { struct ath6kl_urb_context *urb_context = urb->context; struct ath6kl_usb_pipe *pipe = urb_context->pipe; struct sk_buff *skb; ath6kl_dbg(ATH6KL_DBG_USB_BULK, "%s: pipe: %d, stat:%d, len:%d\n", __func__, pipe->logical_pipe_num, urb->status, urb->actual_length); if (urb->status != 0) { ath6kl_dbg(ATH6KL_DBG_USB_BULK, "%s: pipe: %d, failed:%d\n", __func__, pipe->logical_pipe_num, urb->status); } skb = urb_context->skb; urb_context->skb = NULL; ath6kl_usb_free_urb_to_pipe(urb_context->pipe, urb_context); /* note: queue implements a lock */ skb_queue_tail(&pipe->io_comp_queue, skb); queue_work(pipe->ar_usb->wq, &pipe->io_complete_work); } static void ath6kl_usb_io_comp_work(struct work_struct *work) { struct ath6kl_usb_pipe *pipe = container_of(work, struct ath6kl_usb_pipe, io_complete_work); struct ath6kl_usb *ar_usb; struct sk_buff *skb; ar_usb = pipe->ar_usb; while ((skb = skb_dequeue(&pipe->io_comp_queue))) { if (pipe->flags & ATH6KL_USB_PIPE_FLAG_TX) { ath6kl_dbg(ATH6KL_DBG_USB_BULK, "ath6kl usb xmit callback buf:0x%p\n", skb); ath6kl_core_tx_complete(ar_usb->ar, skb); } else { ath6kl_dbg(ATH6KL_DBG_USB_BULK, "ath6kl usb recv callback buf:0x%p\n", skb); ath6kl_core_rx_complete(ar_usb->ar, skb, pipe->logical_pipe_num); } } } #define ATH6KL_USB_MAX_DIAG_CMD (sizeof(struct ath6kl_usb_ctrl_diag_cmd_write)) #define ATH6KL_USB_MAX_DIAG_RESP (sizeof(struct ath6kl_usb_ctrl_diag_resp_read)) static void ath6kl_usb_destroy(struct ath6kl_usb *ar_usb) { ath6kl_usb_flush_all(ar_usb); ath6kl_usb_cleanup_pipe_resources(ar_usb); usb_set_intfdata(ar_usb->interface, NULL); kfree(ar_usb->diag_cmd_buffer); kfree(ar_usb->diag_resp_buffer); destroy_workqueue(ar_usb->wq); kfree(ar_usb); } static struct ath6kl_usb *ath6kl_usb_create(struct usb_interface *interface) { struct usb_device *dev = interface_to_usbdev(interface); struct ath6kl_usb *ar_usb; struct ath6kl_usb_pipe *pipe; int status = 0; int i; /* ath6kl_usb_destroy() needs ar_usb != NULL && ar_usb->wq != NULL. */ ar_usb = kzalloc(sizeof(struct ath6kl_usb), GFP_KERNEL); if (ar_usb == NULL) return NULL; ar_usb->wq = alloc_workqueue("ath6kl_wq", 0, 0); if (!ar_usb->wq) { kfree(ar_usb); return NULL; } usb_set_intfdata(interface, ar_usb); spin_lock_init(&(ar_usb->cs_lock)); ar_usb->udev = dev; ar_usb->interface = interface; for (i = 0; i < ATH6KL_USB_PIPE_MAX; i++) { pipe = &ar_usb->pipes[i]; INIT_WORK(&pipe->io_complete_work, ath6kl_usb_io_comp_work); skb_queue_head_init(&pipe->io_comp_queue); } ar_usb->diag_cmd_buffer = kzalloc(ATH6KL_USB_MAX_DIAG_CMD, GFP_KERNEL); if (ar_usb->diag_cmd_buffer == NULL) { status = -ENOMEM; goto fail_ath6kl_usb_create; } ar_usb->diag_resp_buffer = kzalloc(ATH6KL_USB_MAX_DIAG_RESP, GFP_KERNEL); if (ar_usb->diag_resp_buffer == NULL) { status = -ENOMEM; goto fail_ath6kl_usb_create; } status = ath6kl_usb_setup_pipe_resources(ar_usb); fail_ath6kl_usb_create: if (status != 0) { ath6kl_usb_destroy(ar_usb); ar_usb = NULL; } return ar_usb; } static void ath6kl_usb_device_detached(struct usb_interface *interface) { struct ath6kl_usb *ar_usb; ar_usb = usb_get_intfdata(interface); if (ar_usb == NULL) return; ath6kl_stop_txrx(ar_usb->ar); /* Delay to wait for the target to reboot */ mdelay(20); ath6kl_core_cleanup(ar_usb->ar); ath6kl_usb_destroy(ar_usb); } /* exported hif usb APIs for htc pipe */ static void hif_start(struct ath6kl *ar) { struct ath6kl_usb *device = ath6kl_usb_priv(ar); int i; ath6kl_usb_start_recv_pipes(device); /* set the TX resource avail threshold for each TX pipe */ for (i = ATH6KL_USB_PIPE_TX_CTRL; i <= ATH6KL_USB_PIPE_TX_DATA_HP; i++) { device->pipes[i].urb_cnt_thresh = device->pipes[i].urb_alloc / 2; } } static int ath6kl_usb_send(struct ath6kl *ar, u8 PipeID, struct sk_buff *hdr_skb, struct sk_buff *skb) { struct ath6kl_usb *device = ath6kl_usb_priv(ar); struct ath6kl_usb_pipe *pipe = &device->pipes[PipeID]; struct ath6kl_urb_context *urb_context; int usb_status, status = 0; struct urb *urb; u8 *data; u32 len; ath6kl_dbg(ATH6KL_DBG_USB_BULK, "+%s pipe : %d, buf:0x%p\n", __func__, PipeID, skb); urb_context = ath6kl_usb_alloc_urb_from_pipe(pipe); if (urb_context == NULL) { /* * TODO: it is possible to run out of urbs if * 2 endpoints map to the same pipe ID */ ath6kl_dbg(ATH6KL_DBG_USB_BULK, "%s pipe:%d no urbs left. URB Cnt : %d\n", __func__, PipeID, pipe->urb_cnt); status = -ENOMEM; goto fail_hif_send; } urb_context->skb = skb; data = skb->data; len = skb->len; urb = usb_alloc_urb(0, GFP_ATOMIC); if (urb == NULL) { status = -ENOMEM; ath6kl_usb_free_urb_to_pipe(urb_context->pipe, urb_context); goto fail_hif_send; } usb_fill_bulk_urb(urb, device->udev, pipe->usb_pipe_handle, data, len, ath6kl_usb_usb_transmit_complete, urb_context); if ((len % pipe->max_packet_size) == 0) { /* hit a max packet boundary on this pipe */ urb->transfer_flags |= URB_ZERO_PACKET; } ath6kl_dbg(ATH6KL_DBG_USB_BULK, "athusb bulk send submit:%d, 0x%X (ep:0x%2.2X), %d bytes\n", pipe->logical_pipe_num, pipe->usb_pipe_handle, pipe->ep_address, len); usb_anchor_urb(urb, &pipe->urb_submitted); usb_status = usb_submit_urb(urb, GFP_ATOMIC); if (usb_status) { ath6kl_dbg(ATH6KL_DBG_USB_BULK, "ath6kl usb : usb bulk transmit failed %d\n", usb_status); usb_unanchor_urb(urb |